summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/glusterd/bug-1245045-remove-brick-validation.t54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c119
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1
3 files changed, 143 insertions, 31 deletions
diff --git a/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
new file mode 100644
index 00000000000..22a8d557d28
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1245045-remove-brick-validation.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+kill_glusterd 2
+
+#remove-brick should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+#volume status should work
+TEST $CLI_2 volume status
+
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+kill_glusterd 2
+
+#remove-brick commit should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit
+
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+#volume status should work
+TEST $CLI_2 volume status
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop
+
+kill_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST start_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_3 volume status
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 76a05b2ef93..9e6c73d88a1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1656,6 +1656,83 @@ out:
return ret;
}
+static int
+glusterd_remove_brick_validate_bricks (gf1_op_commands cmd, int32_t brick_count,
+ dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ char **errstr)
+{
+ char *brick = NULL;
+ char msg[2048] = {0,};
+ char key[256] = {0,};
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int i = 0;
+ int ret = -1;
+
+ /* Check whether all the nodes of the bricks to be removed are
+ * up, if not fail the operation */
+ for (i = 1; i <= brick_count; i++) {
+ snprintf (key, sizeof (key), "brick%d", i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret) {
+ snprintf (msg, sizeof (msg),
+ "Unable to get %s", key);
+ *errstr = gf_strdup (msg);
+ goto out;
+ }
+
+ ret =
+ glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
+ &brickinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Incorrect brick "
+ "%s for volume %s", brick, volinfo->volname);
+ *errstr = gf_strdup (msg);
+ goto out;
+ }
+ /* Do not allow commit if the bricks are not decommissioned
+ * if its a remove brick commit
+ */
+ if (cmd == GF_OP_CMD_COMMIT && !brickinfo->decommissioned) {
+ snprintf (msg, sizeof (msg), "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option",
+ brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_local_brick (THIS, volinfo, brickinfo))
+ continue;
+
+ rcu_read_lock ();
+ peerinfo = glusterd_peerinfo_find_by_uuid
+ (brickinfo->uuid);
+ if (!peerinfo) {
+ snprintf (msg, sizeof(msg), "Host node of the "
+ "brick %s is not in cluster", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+ if (!peerinfo->connected) {
+ snprintf (msg, sizeof(msg), "Host node of the "
+ "brick %s is down", brick);
+ *errstr = gf_strdup (msg);
+ ret = -1;
+ rcu_read_unlock ();
+ goto out;
+ }
+ rcu_read_unlock ();
+ }
+
+out:
+ return ret;
+}
+
int
glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
{
@@ -1674,6 +1751,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
char *brick = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
gsync_status_param_t param = {0,};
+ glusterd_peerinfo_t *peerinfo = NULL;
this = THIS;
GF_ASSERT (this);
@@ -1812,6 +1890,12 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+ dict, volinfo,
+ &errstr);
+ if (ret)
+ goto out;
+
if (is_origin_glusterd (dict)) {
ret = glusterd_generate_and_set_task_id
(dict, GF_REMOVE_BRICK_TID_KEY);
@@ -1856,36 +1940,11 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
- /* Do not allow commit if the bricks are not decommissioned */
- for ( i = 1; i <= brick_count; i++ ) {
- snprintf (key, sizeof (key), "brick%d", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- snprintf (msg, sizeof (msg),
- "Unable to get %s", key);
- errstr = gf_strdup (msg);
- goto out;
- }
-
- ret =
- glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
- &brickinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Incorrect brick "
- "%s for volume %s", brick, volname);
- errstr = gf_strdup (msg);
- goto out;
- }
- if ( !brickinfo->decommissioned ) {
- snprintf (msg, sizeof (msg), "Brick %s "
- "is not decommissioned. "
- "Use start or force option",
- brick);
- errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
- }
+ ret = glusterd_remove_brick_validate_bricks (cmd, brick_count,
+ dict, volinfo,
+ &errstr);
+ if (ret)
+ goto out;
/* If geo-rep is configured, for this volume, it should be
* stopped.
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index ed9e70b9caa..26a7fface29 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3735,7 +3735,6 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo,
new->skipped_files = old->skipped_files;
new->rebalance_failures = old->rebalance_failures;
new->rebalance_time = old->rebalance_time;
- new->dict = (old->dict ? dict_ref (old->dict) : NULL);
/* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume
* import