summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaushal M <kaushal@redhat.com>2013-05-16 16:03:52 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-02 19:18:26 -0700
commit7fd38981278c8a51587f1db5b59f8cfeed5c6e5a (patch)
tree79515aff453c46a47aa01b00ff38641e78b1faed
parent979a17d49a8dc9a19d9f3a466c137d5cf2c79a07 (diff)
glusterd: More checks before starting rebalance/remove-brick
Check if a previous remove-brick operation has been committed before starting a new rebalance/remove-brick task. Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e BUG: 963541 Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/5019 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rwxr-xr-xtests/bugs/bug-963541.t33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c38
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c13
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h4
5 files changed, 93 insertions, 12 deletions
diff --git a/tests/bugs/bug-963541.t b/tests/bugs/bug-963541.t
new file mode 100755
index 00000000000..950c7db548b
--- /dev/null
+++ b/tests/bugs/bug-963541.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0;
+
+# Start a remove-brick and try to start a rebalance/remove-brick without committing
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+TEST ! $CLI volume rebalance $V0 start
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+#Try to start rebalance/remove-brick again after commit
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
+
+gluster volume status
+
+TEST $CLI volume rebalance $V0 start
+TEST $CLI volume rebalance $V0 stop
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+TEST $CLI volume stop $V0
+
+cleanup;
+
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 4e5204ad19e..854d8c728f1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
+ if (!gd_is_remove_brick_committed (volinfo)) {
+ snprintf (msg, sizeof (msg), "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
+ volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick"
+ " task exists for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
if (glusterd_is_defrag_on(volinfo)) {
errstr = gf_strdup("Rebalance is in progress. Please "
"retry after completion");
@@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
}
}
- /* Clear task-id on commmitting/stopping of remove-brick operation */
- if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS))
+ /* Clear task-id & rebal.op on commmitting/stopping remove-brick */
+ if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) {
uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+ }
ret = -1;
switch (cmd) {
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 4f77f088397..60dd9c19bde 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe);
int
glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
- size_t len)
+ size_t len, glusterd_op_t op)
{
- int ret = -1;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+ /* Check only if operation is not remove-brick */
+ if ((GD_OP_REMOVE_BRICK != op) &&
+ !gd_is_remove_brick_committed (volinfo)) {
+ gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on "
+ "volume %s is not yet committed", volinfo->volname);
+ snprintf (op_errstr, len, "A remove-brick task on volume %s is"
+ " not yet committed. Either commit or stop the "
+ "remove-brick task.", volinfo->volname);
+ goto out;
+ }
if (glusterd_is_defrag_on (volinfo)) {
- gf_log ("glusterd", GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"rebalance on volume %s already started",
volinfo->volname);
snprintf (op_errstr, len, "Rebalance on %s is already started",
@@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
if (glusterd_is_rb_started (volinfo) ||
glusterd_is_rb_paused (volinfo)) {
- gf_log ("glusterd", GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_DEBUG,
"Rebalance failed as replace brick is in progress on volume %s",
volinfo->volname);
snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
@@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
}
ret = 0;
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
GF_ASSERT (volinfo);
GF_ASSERT (op_errstr);
- ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
+ ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op);
if (ret)
goto out;
if (!volinfo->rebal.defrag)
@@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
ret = 0;
}
}
- ret = glusterd_defrag_start_validate (volinfo,
- msg, sizeof (msg));
+ ret = glusterd_defrag_start_validate (volinfo, msg,
+ sizeof (msg),
+ GD_OP_REBALANCE);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
"start validate failed");
@@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
cmd, NULL, GD_OP_REBALANCE);
break;
case GF_DEFRAG_CMD_STOP:
- /* Clear task-id only on explicitly stopping the
- * rebalance process.
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
*/
uuid_clear (volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
/* Fall back to the old volume file in case of decommission*/
list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 7e822594ed8..a772ad67a62 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
return;
}
+
+/* A task is committed/completed once the task-id for it is cleared */
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT (volinfo);
+
+ if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) &&
+ !uuid_is_null (volinfo->rebal.rebalance_id))
+ return _gf_false;
+
+ return _gf_true;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 4739fab2107..a63ea6a66b8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2);
void
gd_update_volume_op_versions (glusterd_volinfo_t *volinfo);
+
char*
gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo);
+
+gf_boolean_t
+gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo);
#endif