summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishnan Parthasarathi <kparthas@redhat.com>2013-05-21 21:40:31 +0530
committerAnand Avati <avati@redhat.com>2013-05-23 22:27:41 -0700
commit37f707023cb3777dcabd8b4be0d07878f9422025 (patch)
treeee78c2a6ba7cdb3ed10e4963e85bcdcba7ae06f0
parenta387a545b44a553b9aedf1e2f4683c84dee0e6be (diff)
glusterd: Give up biglock during rpc conn cleanup
glusterd could deadlock after a peer-detach command as follows, 1) glusterd_friend_cleanup function 'flushes' out messages in the rpc layer's queue, that haven't received a response. At this point, glusterd has already acquired the big lock. 2) The side-effect of flushing out the messages is that the corresponding call backs are called. Call backs themselves are executed after acquiring the big lock. This results in the big lock being acquired in a nested manner (in the same thread), which causes a deadlock. This can also happen during brick/NFS/SHD disconnect in volume-stop. Change-Id: Iab3aad143cd8ebbab53ea0b69687f0e7627dc8a9 BUG: 965533 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/5061 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index ad19484a16d..27b974796ad 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1115,12 +1115,15 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
GF_ASSERT (peerinfo);
glusterd_peerctx_t *peerctx = NULL;
gf_boolean_t quorum_action = _gf_false;
+ glusterd_conf_t *priv = THIS->private;
if (peerinfo->quorum_contrib != QUORUM_NONE)
quorum_action = _gf_true;
if (peerinfo->rpc) {
/* cleanup the saved-frames before last unref */
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&peerinfo->rpc->conn);
+ synclock_lock (&priv->big_lock);
peerctx = peerinfo->rpc->mydata;
peerinfo->rpc->mydata = NULL;
@@ -1484,10 +1487,13 @@ int32_t
glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
{
GF_ASSERT (brickinfo);
+ glusterd_conf_t *priv = THIS->private;
if (brickinfo->rpc) {
/* cleanup the saved-frames before last unref */
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&brickinfo->rpc->conn);
+ synclock_lock (&priv->big_lock);
rpc_clnt_unref (brickinfo->rpc);
brickinfo->rpc = NULL;
@@ -3457,11 +3463,14 @@ int32_t
glusterd_nodesvc_disconnect (char *server)
{
struct rpc_clnt *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
rpc = glusterd_nodesvc_get_rpc (server);
if (rpc) {
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&rpc->conn);
+ synclock_lock (&priv->big_lock);
rpc_clnt_unref (rpc);
(void)glusterd_nodesvc_set_rpc (server, NULL);
}