summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishnan Parthasarathi <kparthas@redhat.com>2013-05-21 21:40:31 +0530
committerVijay Bellur <vbellur@redhat.com>2013-05-28 20:49:19 -0700
commit0a4c0eac0aeeec88cb9bfeb08c2eac8b98be4622 (patch)
treed5dba83db399fdf53e0f1d9b23d91f7405b1ade8
parente617990a87dbfa78bb625e490d6eac8e967419fd (diff)
glusterd: Give up biglock during rpc conn cleanup
glusterd could deadlock after a peer-detach command as follows, 1) glusterd_friend_cleanup function 'flushes' out messages in the rpc layer's queue, that haven't received a response. At this point, glusterd has already acquired the big lock. 2) The side-effect of flushing out the messages is that the corresponding call backs are called. Call backs themselves are executed after acquiring the big lock. This results in the big lock being acquired in a nested manner (in the same thread), which causes a deadlock. This can also happen during brick/NFS/SHD disconnect in volume-stop. Change-Id: Iab3aad143cd8ebbab53ea0b69687f0e7627dc8a9 BUG: 965533 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/5084 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c9
1 files changed, 9 insertions, 0 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6ff3c80924d..29dec5669d5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1011,12 +1011,15 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
GF_ASSERT (peerinfo);
glusterd_peerctx_t *peerctx = NULL;
gf_boolean_t quorum_action = _gf_false;
+ glusterd_conf_t *priv = THIS->private;
if (peerinfo->quorum_contrib != QUORUM_NONE)
quorum_action = _gf_true;
if (peerinfo->rpc) {
/* cleanup the saved-frames before last unref */
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&peerinfo->rpc->conn);
+ synclock_lock (&priv->big_lock);
peerctx = peerinfo->rpc->mydata;
peerinfo->rpc->mydata = NULL;
@@ -1462,10 +1465,13 @@ int32_t
glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
{
GF_ASSERT (brickinfo);
+ glusterd_conf_t *priv = THIS->private;
if (brickinfo->rpc) {
/* cleanup the saved-frames before last unref */
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&brickinfo->rpc->conn);
+ synclock_lock (&priv->big_lock);
rpc_clnt_unref (brickinfo->rpc);
brickinfo->rpc = NULL;
@@ -3436,11 +3442,14 @@ int32_t
glusterd_nodesvc_disconnect (char *server)
{
struct rpc_clnt *rpc = NULL;
+ glusterd_conf_t *priv = THIS->private;
rpc = glusterd_nodesvc_get_rpc (server);
if (rpc) {
+ synclock_unlock (&priv->big_lock);
rpc_clnt_connection_cleanup (&rpc->conn);
+ synclock_lock (&priv->big_lock);
rpc_clnt_unref (rpc);
(void)glusterd_nodesvc_set_rpc (server, NULL);
}