glusterfs: Not able to mount running volume after enable brick mux and stopped any volume

Problem: After enabled brick mux if any volume has down and then try ot run mount with running volume , mount command is hung. Solution: After enable brick mux server has shared one data structure server_conf for all associated subvolumes.After down any subvolume in some ungraceful manner (remove brick directory) posix xlator sends GF_EVENT_CHILD_DOWN event to parent xlatros and server notify updates the child_up to false in server_conf.When client is trying to communicate with server through mount it checks conf->child_up and it is FALSE so it throws message "translator are not yet ready". From this patch updated structure server_conf to save child_up status for xlator wise. Another improtant correction from this patch is cleanup threads from server side xlators after stop the volume. BUG: 1453977 Change-Id: Ic54da3f01881b7c9429ce92cc569236eb1d43e0d Signed-off-by: Mohit Agrawal <moagrawa@redhat.com> Reviewed-on: https://review.gluster.org/17356 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
author: Mohit Agrawal <moagrawa@redhat.com> 2017-05-25 21:43:42 +0530
committer: Jeff Darcy <jeff@pl.atyp.us> 2017-05-31 20:43:53 +0000
commit: dba55ae364a2772904bb68a6bd0ea87289ee1470 (patch)
tree: e8a7cf51bd45464cd26f9c4270787ffc50228854 /glusterfsd/src
parent: de92c363c95d16966dbcc9d8763fd4448dd84d13 (diff)
1 files changed, 58 insertions, 55 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 8ede110121b..c17bf3bb6fc 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -198,10 +198,11 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
 {
         gd1_mgmt_brick_op_req   xlator_req      = {0,};
         ssize_t                 ret;
-        xlator_t                *top = NULL;
-        xlator_t                *victim = NULL;
-        glusterfs_ctx_t         *ctx    = NULL;
-        xlator_list_t           **trav_p;
+        glusterfs_ctx_t         *ctx            = NULL;
+        xlator_t                *top            = NULL;
+        xlator_t                *victim         = NULL;
+        xlator_list_t           **trav_p        = NULL;
+        gf_boolean_t            lockflag        = _gf_false;
 
         ret = xdr_to_generic (req->msg[0], &xlator_req,
                               (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
@@ -214,57 +215,54 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
         LOCK (&ctx->volfile_lock);
         {
                 /* Find the xlator_list_t that points to our victim. */
-                top = glusterfsd_ctx->active->first;
-                for (trav_p = &top->children; *trav_p;
-                     trav_p = &(*trav_p)->next) {
-                        victim = (*trav_p)->xlator;
-                        if (strcmp (victim->name, xlator_req.name) == 0) {
-                                break;
+                if (glusterfsd_ctx->active) {
+                        top = glusterfsd_ctx->active->first;
+                        for (trav_p = &top->children; *trav_p;
+                                                    trav_p = &(*trav_p)->next) {
+                                victim = (*trav_p)->xlator;
+                                if (strcmp (victim->name, xlator_req.name) == 0) {
+                                        break;
+                                }
                         }
                 }
-
-                if (!*trav_p) {
-                        gf_log (THIS->name, GF_LOG_ERROR,
-                                "can't terminate %s - not found",
-                                xlator_req.name);
-                        /*
-                         * Used to be -ENOENT.  However, the caller asked us to
-                         * make sure it's down and if it's already down that's
-                         * good enough.
-                         */
-                        glusterfs_terminate_response_send (req, 0);
-                        goto err;
-                }
-
+        }
+        if (!*trav_p) {
+                gf_log (THIS->name, GF_LOG_ERROR,
+                        "can't terminate %s - not found",
+                          xlator_req.name);
+                /*
+                 * Used to be -ENOENT.  However, the caller asked us to
+                 * make sure it's down and if it's already down that's
+                 * good enough.
+                 */
                 glusterfs_terminate_response_send (req, 0);
-                if ((trav_p == &top->children) && !(*trav_p)->next) {
-                        gf_log (THIS->name, GF_LOG_INFO,
-                                "terminating after loss of last child %s",
-                                xlator_req.name);
-                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
-                                                     xlator_req.name);
-                        kill (getpid(), SIGTERM);
-                } else {
-                        /*
-                         * This is terribly unsafe without quiescing or shutting
-                         * things down properly but it gets us to the point
-                         * where we can test other stuff.
-                         *
-                         * TBD: finish implementing this "detach" code properly
-                         */
-                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
-                                " child %s", xlator_req.name);
-                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
-                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
-                                                     xlator_req.name);
-
-                        *trav_p = (*trav_p)->next;
-                        glusterfs_autoscale_threads (THIS->ctx, -1);
-                }
+                goto err;
+        }
 
+        glusterfs_terminate_response_send (req, 0);
+        if ((trav_p == &top->children) && !(*trav_p)->next) {
+                gf_log (THIS->name, GF_LOG_INFO,
+                        "terminating after loss of last child %s",
+                        xlator_req.name);
+                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
+                kill (getpid(), SIGTERM);
+        } else {
+                /*
+                 * This is terribly unsafe without quiescing or shutting
+                 * things down properly but it gets us to the point
+                 * where we can test other stuff.
+                 *
+                 * TBD: finish implementing this "detach" code properly
+                 */
+                UNLOCK (&ctx->volfile_lock);
+                lockflag = _gf_true;
+                gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
+                         " child %s", xlator_req.name);
+                top->notify (top, GF_EVENT_CLEANUP, victim);
         }
 err:
-        UNLOCK (&ctx->volfile_lock);
+        if (!lockflag)
+                UNLOCK (&ctx->volfile_lock);
         free (xlator_req.name);
         xlator_req.name = NULL;
         return 0;
@@ -838,6 +836,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
         int32_t                 ret             = -1;
         gd1_mgmt_brick_op_req   xlator_req      = {0,};
         xlator_t                *this           = NULL;
+        xlator_t                *nextchild      = NULL;
         glusterfs_graph_t       *newgraph       = NULL;
         glusterfs_ctx_t         *ctx            = NULL;
 
@@ -862,15 +861,19 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
                         gf_log (this->name, GF_LOG_INFO,
                                 "got attach for %s", xlator_req.name);
                         ret = glusterfs_graph_attach (this->ctx->active,
-                                                      xlator_req.name,
-                                                      &newgraph);
-                        if (ret == 0) {
-                                ret = glusterfs_graph_parent_up (newgraph);
+                                              xlator_req.name, &newgraph);
+                        if (!ret && (newgraph && newgraph->first)) {
+                                nextchild = newgraph->first;
+                                ret = xlator_notify (nextchild,
+                                                     GF_EVENT_PARENT_UP,
+                                                     nextchild);
                                 if (ret) {
-                                        gf_msg (this->name, GF_LOG_ERROR, 0,
+                                        gf_msg (this->name, GF_LOG_ERROR,
+                                                0,
                                                 LG_MSG_EVENT_NOTIFY_FAILED,
                                                 "Parent up notification "
-                                                "failed");
+                                                "failed for %s ",
+                                                nextchild->name);
                                         goto out;
                                 }
                                 glusterfs_autoscale_threads (this->ctx, 1);
author	Mohit Agrawal <moagrawa@redhat.com>	2017-05-25 21:43:42 +0530
committer	Jeff Darcy <jeff@pl.atyp.us>	2017-05-31 20:43:53 +0000
commit	dba55ae364a2772904bb68a6bd0ea87289ee1470 (patch)
tree	e8a7cf51bd45464cd26f9c4270787ffc50228854 /glusterfsd/src
parent	de92c363c95d16966dbcc9d8763fd4448dd84d13 (diff)