diff options
| author | Kaushal M <kaushal@redhat.com> | 2013-05-16 16:03:52 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2013-07-02 19:18:26 -0700 | 
| commit | 7fd38981278c8a51587f1db5b59f8cfeed5c6e5a (patch) | |
| tree | 79515aff453c46a47aa01b00ff38641e78b1faed | |
| parent | 979a17d49a8dc9a19d9f3a466c137d5cf2c79a07 (diff) | |
glusterd: More checks before starting rebalance/remove-brick
Check if a previous remove-brick operation has been committed before
starting a new rebalance/remove-brick task.
Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e
BUG: 963541
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/5019
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
| -rwxr-xr-x | tests/bugs/bug-963541.t | 33 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 17 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 38 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 13 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 4 | 
5 files changed, 93 insertions, 12 deletions
diff --git a/tests/bugs/bug-963541.t b/tests/bugs/bug-963541.t new file mode 100755 index 000000000..950c7db54 --- /dev/null +++ b/tests/bugs/bug-963541.t @@ -0,0 +1,33 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}; +TEST $CLI volume start $V0; + +# Start a remove-brick and try to start a rebalance/remove-brick without committing +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start + +TEST ! $CLI volume rebalance $V0 start +TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start + +#Try to start rebalance/remove-brick again after commit +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit + +gluster volume status + +TEST $CLI volume rebalance $V0 start +TEST $CLI volume rebalance $V0 stop + +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start +TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop + +TEST $CLI volume stop $V0 + +cleanup; + diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 4e5204ad1..854d8c728 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)                          gf_log (this->name, GF_LOG_ERROR, "%s", errstr);                          goto out;                  } +                if (!gd_is_remove_brick_committed (volinfo)) { +                        snprintf (msg, sizeof (msg), "An earlier remove-brick " +                                  "task exists for volume %s. Either commit it" +                                  " or stop it before starting a new task.", +                                  volinfo->volname); +                        errstr = gf_strdup (msg); +                        gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick" +                                " task exists for volume %s.", +                                volinfo->volname); +                        goto out; +                }                  if (glusterd_is_defrag_on(volinfo)) {                          errstr = gf_strdup("Rebalance is in progress. Please "                                             "retry after completion"); @@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  }          } -        /* Clear task-id on commmitting/stopping of remove-brick operation */ -        if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) +        /* Clear task-id & rebal.op on commmitting/stopping remove-brick */ +        if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {                  uuid_clear (volinfo->rebal.rebalance_id); +                volinfo->rebal.op = GD_OP_NONE; +        }          ret = -1;          switch (cmd) { diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 4f77f0883..60dd9c19b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,                            int count, void *myframe);  int  glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, -                                size_t len) +                                size_t len, glusterd_op_t op)  { -        int     ret = -1; +        int      ret = -1; +        xlator_t *this = NULL; + +        this = THIS; +        GF_ASSERT (this); + +        /* Check only if operation is not remove-brick */ +        if ((GD_OP_REMOVE_BRICK != op) && +            !gd_is_remove_brick_committed (volinfo)) { +                gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " +                        "volume %s is not yet committed", volinfo->volname); +                snprintf (op_errstr, len, "A remove-brick task on volume %s is" +                          " not yet committed. Either commit or stop the " +                          "remove-brick task.", volinfo->volname); +                goto out; +        }          if (glusterd_is_defrag_on (volinfo)) { -                gf_log ("glusterd", GF_LOG_DEBUG, +                gf_log (this->name, GF_LOG_DEBUG,                          "rebalance on volume %s already started",                          volinfo->volname);                  snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,          if (glusterd_is_rb_started (volinfo) ||              glusterd_is_rb_paused (volinfo)) { -                gf_log ("glusterd", GF_LOG_DEBUG, +                gf_log (this->name, GF_LOG_DEBUG,                          "Rebalance failed as replace brick is in progress on volume %s",                          volinfo->volname);                  snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,          }          ret = 0;  out: -        gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); +        gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } @@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          GF_ASSERT (volinfo);          GF_ASSERT (op_errstr); -        ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); +        ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);          if (ret)                  goto out;          if (!volinfo->rebal.defrag) @@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)                                  ret = 0;                          }                  } -                ret = glusterd_defrag_start_validate (volinfo, -                                msg, sizeof (msg)); +                ret = glusterd_defrag_start_validate (volinfo, msg, +                                                      sizeof (msg), +                                                      GD_OP_REBALANCE);                  if (ret) {                          gf_log (this->name, GF_LOG_DEBUG,                                          "start validate failed"); @@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)                                                      cmd, NULL, GD_OP_REBALANCE);                   break;          case GF_DEFRAG_CMD_STOP: -                /* Clear task-id only on explicitly stopping the -                 * rebalance process. +                /* Clear task-id only on explicitly stopping rebalance. +                 * Also clear the stored operation, so it doesn't cause trouble +                 * with future rebalance/remove-brick starts                   */                  uuid_clear (volinfo->rebal.rebalance_id); +                volinfo->rebal.op = GD_OP_NONE;                  /* Fall back to the old volume file in case of decommission*/                  list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7e822594e..a772ad67a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)          return;  } + +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) +{ +        GF_ASSERT (volinfo); + +        if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && +            !uuid_is_null (volinfo->rebal.rebalance_id)) +                        return _gf_false; + +        return _gf_true; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 4739fab21..a63ea6a66 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2);  void  gd_update_volume_op_versions (glusterd_volinfo_t *volinfo); +  char*  gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo); + +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);  #endif  | 
