diff options
| author | Atin Mukherjee <amukherj@redhat.com> | 2017-05-19 21:04:53 +0530 | 
|---|---|---|
| committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-05-26 12:11:28 +0000 | 
| commit | 3ca5ae2f3bff2371042b607b8e8a218bf316b48c (patch) | |
| tree | 99671a396708a555c86d16c7bc70d41dd491e603 | |
| parent | 23930326e0378edace9c8c41e8ae95931a2f68ba (diff) | |
glusterfsd: process attach and detach request inside lock
With brick multiplexing, there is a high possibility that attach and
detach requests might be parallely processed and to avoid a concurrent
update to the same graph list, a mutex lock is required.
Credits : Rafi (rkavunga@redhat.com) for the RCA of this issue
Change-Id: Ic8e6d1708655c8a143c5a3690968dfa572a32a9c
BUG: 1454865
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-on: https://review.gluster.org/17374
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 173 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-handshake.c | 9 | 
2 files changed, 105 insertions, 77 deletions
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 698cd708799..8ede110121b 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -198,8 +198,9 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)  {          gd1_mgmt_brick_op_req   xlator_req      = {0,};          ssize_t                 ret; -        xlator_t                *top; -        xlator_t                *victim; +        xlator_t                *top = NULL; +        xlator_t                *victim = NULL; +        glusterfs_ctx_t         *ctx    = NULL;          xlator_list_t           **trav_p;          ret = xdr_to_generic (req->msg[0], &xlator_req, @@ -208,52 +209,62 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)                  req->rpc_err = GARBAGE_ARGS;                  return -1;          } +        ctx = glusterfsd_ctx; -        /* Find the xlator_list_t that points to our victim. */ -        top = glusterfsd_ctx->active->first; -        for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { -                victim = (*trav_p)->xlator; -                if (strcmp (victim->name, xlator_req.name) == 0) { -                        break; +        LOCK (&ctx->volfile_lock); +        { +                /* Find the xlator_list_t that points to our victim. */ +                top = glusterfsd_ctx->active->first; +                for (trav_p = &top->children; *trav_p; +                     trav_p = &(*trav_p)->next) { +                        victim = (*trav_p)->xlator; +                        if (strcmp (victim->name, xlator_req.name) == 0) { +                                break; +                        }                  } -        } -        if (!*trav_p) { -                gf_log (THIS->name, GF_LOG_ERROR, -                        "can't terminate %s - not found", xlator_req.name); -                /* -                 * Used to be -ENOENT.  However, the caller asked us to make -                 * sure it's down and if it's already down that's good enough. -                 */ -                glusterfs_terminate_response_send (req, 0); -                goto err; -        } +                if (!*trav_p) { +                        gf_log (THIS->name, GF_LOG_ERROR, +                                "can't terminate %s - not found", +                                xlator_req.name); +                        /* +                         * Used to be -ENOENT.  However, the caller asked us to +                         * make sure it's down and if it's already down that's +                         * good enough. +                         */ +                        glusterfs_terminate_response_send (req, 0); +                        goto err; +                } -        glusterfs_terminate_response_send (req, 0); -        if ((trav_p == &top->children) && !(*trav_p)->next) { -                gf_log (THIS->name, GF_LOG_INFO, -                        "terminating after loss of last child %s", -                        xlator_req.name); -                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); -                cleanup_and_exit (SIGTERM); -        } else { -                /* -                 * This is terribly unsafe without quiescing or shutting things -                 * down properly (or even locking) but it gets us to the point -                 * where we can test other stuff. -                 * -                 * TBD: finish implementing this "detach" code properly -                 */ -                gf_log (THIS->name, GF_LOG_INFO, "detaching not-only child %s", -                        xlator_req.name); -                top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); -                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name); +                glusterfs_terminate_response_send (req, 0); +                if ((trav_p == &top->children) && !(*trav_p)->next) { +                        gf_log (THIS->name, GF_LOG_INFO, +                                "terminating after loss of last child %s", +                                xlator_req.name); +                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, +                                                     xlator_req.name); +                        kill (getpid(), SIGTERM); +                } else { +                        /* +                         * This is terribly unsafe without quiescing or shutting +                         * things down properly but it gets us to the point +                         * where we can test other stuff. +                         * +                         * TBD: finish implementing this "detach" code properly +                         */ +                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only" +                                " child %s", xlator_req.name); +                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim); +                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx, +                                                     xlator_req.name); + +                        *trav_p = (*trav_p)->next; +                        glusterfs_autoscale_threads (THIS->ctx, -1); +                } -                *trav_p = (*trav_p)->next; -                glusterfs_autoscale_threads (THIS->ctx, -1);          } -  err: +        UNLOCK (&ctx->volfile_lock);          free (xlator_req.name);          xlator_req.name = NULL;          return 0; @@ -828,6 +839,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)          gd1_mgmt_brick_op_req   xlator_req      = {0,};          xlator_t                *this           = NULL;          glusterfs_graph_t       *newgraph       = NULL; +        glusterfs_ctx_t         *ctx            = NULL;          GF_ASSERT (req);          this = THIS; @@ -842,32 +854,38 @@ glusterfs_handle_attach (rpcsvc_request_t *req)                  return -1;          }          ret = 0; +        ctx = this->ctx; -        if (this->ctx->active) { -                gf_log (this->name, GF_LOG_INFO, -                        "got attach for %s", xlator_req.name); -                ret = glusterfs_graph_attach (this->ctx->active, -                                              xlator_req.name, &newgraph); -                if (ret == 0) { -                        ret = glusterfs_graph_parent_up (newgraph); -                        if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        LG_MSG_EVENT_NOTIFY_FAILED, -                                        "Parent up notification " -                                        "failed"); -                                goto out; +        LOCK (&ctx->volfile_lock); +        { +                if (this->ctx->active) { +                        gf_log (this->name, GF_LOG_INFO, +                                "got attach for %s", xlator_req.name); +                        ret = glusterfs_graph_attach (this->ctx->active, +                                                      xlator_req.name, +                                                      &newgraph); +                        if (ret == 0) { +                                ret = glusterfs_graph_parent_up (newgraph); +                                if (ret) { +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                LG_MSG_EVENT_NOTIFY_FAILED, +                                                "Parent up notification " +                                                "failed"); +                                        goto out; +                                } +                                glusterfs_autoscale_threads (this->ctx, 1);                          } -                        glusterfs_autoscale_threads (this->ctx, 1); +                } else { +                        gf_log (this->name, GF_LOG_WARNING, +                                "got attach for %s but no active graph", +                                xlator_req.name);                  } -        } else { -                gf_log (this->name, GF_LOG_WARNING, -                        "got attach for %s but no active graph", -                        xlator_req.name); -        } -        glusterfs_translator_info_response_send (req, ret, NULL, NULL); +                glusterfs_translator_info_response_send (req, ret, NULL, NULL);  out: +                UNLOCK (&ctx->volfile_lock); +        }          free (xlator_req.input.input_val);          free (xlator_req.name); @@ -1981,23 +1999,28 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)          xlator_list_t   *trav;          int             ret; -        if (ctx->active) { -                server_xl = ctx->active->first; -                if (strcmp (server_xl->type, "protocol/server") != 0) { -                        server_xl = NULL; +        LOCK (&ctx->volfile_lock); +        { +                if (ctx->active) { +                        server_xl = ctx->active->first; +                        if (strcmp (server_xl->type, "protocol/server") != 0) { +                                server_xl = NULL; +                        } +                } +                if (!server_xl) { +                        /* Startup (ctx->active not set) or non-server. */ +                        UNLOCK (&ctx->volfile_lock); +                        return glusterfs_volfile_fetch_one +                                (ctx, ctx->cmd_args.volfile_id);                  } -        } -        if (!server_xl) { -                /* Startup (ctx->active not set) or non-server. */ -                return glusterfs_volfile_fetch_one (ctx, -                                                    ctx->cmd_args.volfile_id); -        } -        ret = 0; -        for (trav = server_xl->children; trav; trav = trav->next) { -                ret |= glusterfs_volfile_fetch_one (ctx, -                                                    trav->xlator->volfile_id); +                ret = 0; +                for (trav = server_xl->children; trav; trav = trav->next) { +                        ret |= glusterfs_volfile_fetch_one +                                (ctx, trav->xlator->volfile_id); +                }          } +        UNLOCK (&ctx->volfile_lock);          return ret;  } diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 64267f2aef9..f00804a3d3a 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -412,7 +412,7 @@ server_setvolume (rpcsvc_request_t *req)          rpc_transport_t     *xprt          = NULL;          int32_t              fop_version   = 0;          int32_t              mgmt_version  = 0; - +        glusterfs_ctx_t     *ctx           = NULL;          params = dict_new ();          reply  = dict_new (); @@ -423,6 +423,7 @@ server_setvolume (rpcsvc_request_t *req)                  req->rpc_err = GARBAGE_ARGS;                  goto fail;          } +        ctx = THIS->ctx;          this = req->svc->xl;          /* this is to ensure config_params is populated with the first brick @@ -468,7 +469,11 @@ server_setvolume (rpcsvc_request_t *req)                  goto fail;          } -        xl = get_xlator_by_name (this, name); +        LOCK (&ctx->volfile_lock); +        { +                xl = get_xlator_by_name (this, name); +        } +        UNLOCK (&ctx->volfile_lock);          if (xl == NULL) {                  ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",                                     name);  | 
