diff options
author | Krishnan Parthasarathi <kparthas@redhat.com> | 2013-05-14 09:59:45 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-06-18 21:53:27 -0700 |
commit | 878bc03d7df8e18faca13fbf89a7ae55a29b0fdc (patch) | |
tree | 7596f67fe5c6ebdf07108c550d8eb6024debe75a | |
parent | b3f480a8e451ff1b11761c4cfca6b798c35bfb04 (diff) |
glusterd: Disable transport before cleaning up rpc object
Backport of http://review.gluster.org/5000
Problem:
rpc_transport object, which is part of rpc_clnt, is destroyed
prematurely. This is because, rpc_transport object is ref'd by socket
layer and rpc layer. These ref's, until the synctask'izing of
operations, were unref'd sequentially in the epoll thread.
With more threads at play, the sequential unref guarantee is off.
Fix:
Shutting down the transport before proceeding with cleaning up of
rpc_clnt object would serialize the unref's on the rpc_transport object
and thus eliminating the race.
Also, we don't store the address of brickinfo in brick's rpc notify
function, to avoid the possibility of referring a freed brickinfo.
Instead we use a string based id to 'reach' the corresponding brickinfo.
Change-Id: If2739e2eeaee1e8b071ab2b6754b7ea0f81cfceb
BUG: 962619
Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-on: http://review.gluster.org/5214
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 48 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 67 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 3 |
3 files changed, 99 insertions, 19 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index d4b7dc93f16..095f1966d87 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3356,6 +3356,43 @@ glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) __glusterd_handle_cli_clearlocks_volume); } +static int +get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + int ret = -1; + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + *brick = '\0'; + brick++; + if (!volid_str || !brick) + goto out; + + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) + goto out; + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + brickinfo); + if (ret) + goto out; + + ret = 0; +out: + GF_FREE (brickid_dup); + return ret; +} + int __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, void *data) @@ -3363,10 +3400,15 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, xlator_t *this = NULL; glusterd_conf_t *conf = NULL; int ret = 0; + char *brickid = NULL; glusterd_brickinfo_t *brickinfo = NULL; - brickinfo = mydata; - if (!brickinfo) + brickid = mydata; + if (!brickid) + return 0; + + ret = get_brickinfo_from_brickid (brickid, &brickinfo); + if (ret) return 0; this = THIS; @@ -3385,6 +3427,8 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, case RPC_CLNT_DISCONNECT: gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + if (rpc_clnt_is_disabled (rpc)) + GF_FREE (brickid); break; default: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 6cb69615e3d..ab0b8fdb671 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1037,6 +1037,32 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) return 0; } +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + + if (!volume_id) + return -1; + + this = THIS; + priv = this->private; + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + voliter->volname); + break; + } + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -1209,6 +1235,8 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo, { int ret = 0; char socketpath[PATH_MAX] = {0}; + char volume_id_str[64]; + char *brickid = NULL; dict_t *options = NULL; struct rpc_clnt *rpc = NULL; glusterd_conf_t *priv = THIS->private; @@ -1230,10 +1258,17 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo, socketpath, 600); if (ret) goto out; + + uuid_utoa_r (volinfo->volume_id, volume_id_str); + ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str, + brickinfo->hostname, brickinfo->path); + if (ret < 0) + goto out; + synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&rpc, options, glusterd_brick_rpc_notify, - brickinfo); + brickid); synclock_lock (&priv->big_lock); if (ret) goto out; @@ -1464,18 +1499,21 @@ glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) { - GF_ASSERT (brickinfo); - glusterd_conf_t *priv = THIS->private; + rpc_clnt_t *rpc = NULL; - if (brickinfo->rpc) { - /* cleanup the saved-frames before last unref */ - synclock_unlock (&priv->big_lock); - rpc_clnt_connection_cleanup (&brickinfo->rpc->conn); - synclock_lock (&priv->big_lock); + GF_ASSERT (brickinfo); - rpc_clnt_unref (brickinfo->rpc); - brickinfo->rpc = NULL; + if (!brickinfo) { + gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo"); + return -1; } + + rpc = brickinfo->rpc; + brickinfo->rpc = NULL; + + if (rpc) + rpc_clnt_unref (rpc); + return 0; } @@ -3444,17 +3482,12 @@ int32_t glusterd_nodesvc_disconnect (char *server) { struct rpc_clnt *rpc = NULL; - glusterd_conf_t *priv = THIS->private; rpc = glusterd_nodesvc_get_rpc (server); + (void)glusterd_nodesvc_set_rpc (server, NULL); - if (rpc) { - synclock_unlock (&priv->big_lock); - rpc_clnt_connection_cleanup (&rpc->conn); - synclock_lock (&priv->big_lock); + if (rpc) rpc_clnt_unref (rpc); - (void)glusterd_nodesvc_set_rpc (server, NULL); - } return 0; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 9724b5fc253..06f85e8d35d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -117,6 +117,9 @@ glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); + int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); |