summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrishnan Parthasarathi <kparthas@redhat.com>2013-05-14 09:59:45 +0530
committerVijay Bellur <vbellur@redhat.com>2013-06-18 21:53:27 -0700
commit878bc03d7df8e18faca13fbf89a7ae55a29b0fdc (patch)
tree7596f67fe5c6ebdf07108c550d8eb6024debe75a
parentb3f480a8e451ff1b11761c4cfca6b798c35bfb04 (diff)
glusterd: Disable transport before cleaning up rpc object
Backport of http://review.gluster.org/5000 Problem: rpc_transport object, which is part of rpc_clnt, is destroyed prematurely. This is because, rpc_transport object is ref'd by socket layer and rpc layer. These ref's, until the synctask'izing of operations, were unref'd sequentially in the epoll thread. With more threads at play, the sequential unref guarantee is off. Fix: Shutting down the transport before proceeding with cleaning up of rpc_clnt object would serialize the unref's on the rpc_transport object and thus eliminating the race. Also, we don't store the address of brickinfo in brick's rpc notify function, to avoid the possibility of referring a freed brickinfo. Instead we use a string based id to 'reach' the corresponding brickinfo. Change-Id: If2739e2eeaee1e8b071ab2b6754b7ea0f81cfceb BUG: 962619 Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-on: http://review.gluster.org/5214 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c48
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c67
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h3
3 files changed, 99 insertions, 19 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index d4b7dc93f..095f1966d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -3356,6 +3356,43 @@ glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
__glusterd_handle_cli_clearlocks_volume);
}
+static int
+get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ int ret = -1;
+
+ brickid_dup = gf_strdup (brickid);
+ if (!brickid_dup)
+ goto out;
+
+ volid_str = brickid_dup;
+ brick = strchr (brickid_dup, ':');
+ *brick = '\0';
+ brick++;
+ if (!volid_str || !brick)
+ goto out;
+
+ uuid_parse (volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
+ brickinfo);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ GF_FREE (brickid_dup);
+ return ret;
+}
+
int
__glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
rpc_clnt_event_t event, void *data)
@@ -3363,10 +3400,15 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
xlator_t *this = NULL;
glusterd_conf_t *conf = NULL;
int ret = 0;
+ char *brickid = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
- brickinfo = mydata;
- if (!brickinfo)
+ brickid = mydata;
+ if (!brickid)
+ return 0;
+
+ ret = get_brickinfo_from_brickid (brickid, &brickinfo);
+ if (ret)
return 0;
this = THIS;
@@ -3385,6 +3427,8 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
case RPC_CLNT_DISCONNECT:
gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
+ if (rpc_clnt_is_disabled (rpc))
+ GF_FREE (brickid);
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 6cb69615e..ab0b8fdb6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -1037,6 +1037,32 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo)
return 0;
}
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ if (!volume_id)
+ return -1;
+
+ this = THIS;
+ priv = this->private;
+
+ list_for_each_entry (voliter, &priv->volumes, vol_list) {
+ if (uuid_compare (volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ gf_log (this->name, GF_LOG_DEBUG, "Volume %s found",
+ voliter->volname);
+ break;
+ }
+ return ret;
+}
+
int32_t
glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo)
{
@@ -1209,6 +1235,8 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo,
{
int ret = 0;
char socketpath[PATH_MAX] = {0};
+ char volume_id_str[64];
+ char *brickid = NULL;
dict_t *options = NULL;
struct rpc_clnt *rpc = NULL;
glusterd_conf_t *priv = THIS->private;
@@ -1230,10 +1258,17 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo,
socketpath, 600);
if (ret)
goto out;
+
+ uuid_utoa_r (volinfo->volume_id, volume_id_str);
+ ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str,
+ brickinfo->hostname, brickinfo->path);
+ if (ret < 0)
+ goto out;
+
synclock_unlock (&priv->big_lock);
ret = glusterd_rpc_create (&rpc, options,
glusterd_brick_rpc_notify,
- brickinfo);
+ brickid);
synclock_lock (&priv->big_lock);
if (ret)
goto out;
@@ -1464,18 +1499,21 @@ glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo,
int32_t
glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo)
{
- GF_ASSERT (brickinfo);
- glusterd_conf_t *priv = THIS->private;
+ rpc_clnt_t *rpc = NULL;
- if (brickinfo->rpc) {
- /* cleanup the saved-frames before last unref */
- synclock_unlock (&priv->big_lock);
- rpc_clnt_connection_cleanup (&brickinfo->rpc->conn);
- synclock_lock (&priv->big_lock);
+ GF_ASSERT (brickinfo);
- rpc_clnt_unref (brickinfo->rpc);
- brickinfo->rpc = NULL;
+ if (!brickinfo) {
+ gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo");
+ return -1;
}
+
+ rpc = brickinfo->rpc;
+ brickinfo->rpc = NULL;
+
+ if (rpc)
+ rpc_clnt_unref (rpc);
+
return 0;
}
@@ -3444,17 +3482,12 @@ int32_t
glusterd_nodesvc_disconnect (char *server)
{
struct rpc_clnt *rpc = NULL;
- glusterd_conf_t *priv = THIS->private;
rpc = glusterd_nodesvc_get_rpc (server);
+ (void)glusterd_nodesvc_set_rpc (server, NULL);
- if (rpc) {
- synclock_unlock (&priv->big_lock);
- rpc_clnt_connection_cleanup (&rpc->conn);
- synclock_lock (&priv->big_lock);
+ if (rpc)
rpc_clnt_unref (rpc);
- (void)glusterd_nodesvc_set_rpc (server, NULL);
- }
return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 9724b5fc2..06f85e8d3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -117,6 +117,9 @@ glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name);
int32_t
glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo);
+int
+glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo);
+
int32_t
glusterd_service_stop(const char *service, char *pidfile, int sig,
gf_boolean_t force_kill);