summaryrefslogtreecommitdiffstats
path: root/glusterfsd/src
diff options
context:
space:
mode:
authorMohit Agrawal <moagrawa@redhat.com>2017-05-25 21:43:42 +0530
committerJeff Darcy <jeff@pl.atyp.us>2017-05-31 20:43:53 +0000
commitdba55ae364a2772904bb68a6bd0ea87289ee1470 (patch)
treee8a7cf51bd45464cd26f9c4270787ffc50228854 /glusterfsd/src
parentde92c363c95d16966dbcc9d8763fd4448dd84d13 (diff)
glusterfs: Not able to mount running volume after enable brick mux and stopped any volume
Problem: After enabled brick mux if any volume has down and then try ot run mount with running volume , mount command is hung. Solution: After enable brick mux server has shared one data structure server_conf for all associated subvolumes.After down any subvolume in some ungraceful manner (remove brick directory) posix xlator sends GF_EVENT_CHILD_DOWN event to parent xlatros and server notify updates the child_up to false in server_conf.When client is trying to communicate with server through mount it checks conf->child_up and it is FALSE so it throws message "translator are not yet ready". From this patch updated structure server_conf to save child_up status for xlator wise. Another improtant correction from this patch is cleanup threads from server side xlators after stop the volume. BUG: 1453977 Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> Reviewed-on: https://review.gluster.org/17356 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
Diffstat (limited to 'glusterfsd/src')
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c113
1 files changed, 58 insertions, 55 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 8ede110121b..c17bf3bb6fc 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -198,10 +198,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
{
gd1_mgmt_brick_op_req xlator_req = {0,};
ssize_t ret;
- xlator_t *top = NULL;
- xlator_t *victim = NULL;
- glusterfs_ctx_t *ctx = NULL;
- xlator_list_t **trav_p;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_list_t **trav_p = NULL;
+ gf_boolean_t lockflag = _gf_false;
ret = xdr_to_generic (req->msg[0], &xlator_req,
(xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -214,57 +215,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
LOCK (&ctx->volfile_lock);
{
/* Find the xlator_list_t that points to our victim. */
- top = glusterfsd_ctx->active->first;
- for (trav_p = &top->children; *trav_p;
- trav_p = &(*trav_p)->next) {
- victim = (*trav_p)->xlator;
- if (strcmp (victim->name, xlator_req.name) == 0) {
- break;
+ if (glusterfsd_ctx->active) {
+ top = glusterfsd_ctx->active->first;
+ for (trav_p = &top->children; *trav_p;
+ trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (strcmp (victim->name, xlator_req.name) == 0) {
+ break;
+ }
}
}
-
- if (!*trav_p) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "can't terminate %s - not found",
- xlator_req.name);
- /*
- * Used to be -ENOENT. However, the caller asked us to
- * make sure it's down and if it's already down that's
- * good enough.
- */
- glusterfs_terminate_response_send (req, 0);
- goto err;
- }
-
+ }
+ if (!*trav_p) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "can't terminate %s - not found",
+ xlator_req.name);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
glusterfs_terminate_response_send (req, 0);
- if ((trav_p == &top->children) && !(*trav_p)->next) {
- gf_log (THIS->name, GF_LOG_INFO,
- "terminating after loss of last child %s",
- xlator_req.name);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
- xlator_req.name);
- kill (getpid(), SIGTERM);
- } else {
- /*
- * This is terribly unsafe without quiescing or shutting
- * things down properly but it gets us to the point
- * where we can test other stuff.
- *
- * TBD: finish implementing this "detach" code properly
- */
- gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
- " child %s", xlator_req.name);
- top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
- glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
- xlator_req.name);
-
- *trav_p = (*trav_p)->next;
- glusterfs_autoscale_threads (THIS->ctx, -1);
- }
+ goto err;
+ }
+ glusterfs_terminate_response_send (req, 0);
+ if ((trav_p == &top->children) && !(*trav_p)->next) {
+ gf_log (THIS->name, GF_LOG_INFO,
+ "terminating after loss of last child %s",
+ xlator_req.name);
+ glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
+ kill (getpid(), SIGTERM);
+ } else {
+ /*
+ * This is terribly unsafe without quiescing or shutting
+ * things down properly but it gets us to the point
+ * where we can test other stuff.
+ *
+ * TBD: finish implementing this "detach" code properly
+ */
+ UNLOCK (&ctx->volfile_lock);
+ lockflag = _gf_true;
+ gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
+ " child %s", xlator_req.name);
+ top->notify (top, GF_EVENT_CLEANUP, victim);
}
err:
- UNLOCK (&ctx->volfile_lock);
+ if (!lockflag)
+ UNLOCK (&ctx->volfile_lock);
free (xlator_req.name);
xlator_req.name = NULL;
return 0;
@@ -838,6 +836,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
int32_t ret = -1;
gd1_mgmt_brick_op_req xlator_req = {0,};
xlator_t *this = NULL;
+ xlator_t *nextchild = NULL;
glusterfs_graph_t *newgraph = NULL;
glusterfs_ctx_t *ctx = NULL;
@@ -862,15 +861,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
gf_log (this->name, GF_LOG_INFO,
"got attach for %s", xlator_req.name);
ret = glusterfs_graph_attach (this->ctx->active,
- xlator_req.name,
- &newgraph);
- if (ret == 0) {
- ret = glusterfs_graph_parent_up (newgraph);
+ xlator_req.name, &newgraph);
+ if (!ret && (newgraph && newgraph->first)) {
+ nextchild = newgraph->first;
+ ret = xlator_notify (nextchild,
+ GF_EVENT_PARENT_UP,
+ nextchild);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
+ gf_msg (this->name, GF_LOG_ERROR,
+ 0,
LG_MSG_EVENT_NOTIFY_FAILED,
"Parent up notification "
- "failed");
+ "failed for %s ",
+ nextchild->name);
goto out;
}
glusterfs_autoscale_threads (this->ctx, 1);