From 7fd38981278c8a51587f1db5b59f8cfeed5c6e5a Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Thu, 16 May 2013 16:03:52 +0530 Subject: glusterd: More checks before starting rebalance/remove-brick Check if a previous remove-brick operation has been committed before starting a new rebalance/remove-brick task. Change-Id: I553e5ba64a6a352ca91032ab1a17997051a4494e BUG: 963541 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/5019 Reviewed-by: Vijay Bellur Tested-by: Gluster Build System --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 17 ++++++++++-- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 38 +++++++++++++++++++------- xlators/mgmt/glusterd/src/glusterd-utils.c | 13 +++++++++ xlators/mgmt/glusterd/src/glusterd-utils.h | 4 +++ 4 files changed, 60 insertions(+), 12 deletions(-) (limited to 'xlators') diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 4e5204ad..854d8c72 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1264,6 +1264,17 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) gf_log (this->name, GF_LOG_ERROR, "%s", errstr); goto out; } + if (!gd_is_remove_brick_committed (volinfo)) { + snprintf (msg, sizeof (msg), "An earlier remove-brick " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", + volinfo->volname); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); + goto out; + } if (glusterd_is_defrag_on(volinfo)) { errstr = gf_strdup("Rebalance is in progress. Please " "retry after completion"); @@ -1538,9 +1549,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } - /* Clear task-id on commmitting/stopping of remove-brick operation */ - if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) + /* Clear task-id & rebal.op on commmitting/stopping remove-brick */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) { uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + } ret = -1; switch (cmd) { diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 4f77f088..60dd9c19 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -42,12 +42,27 @@ glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe); int glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, - size_t len) + size_t len, glusterd_op_t op) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); + goto out; + } if (glusterd_is_defrag_on (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "rebalance on volume %s already started", volinfo->volname); snprintf (op_errstr, len, "Rebalance on %s is already started", @@ -57,7 +72,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, if (glusterd_is_rb_started (volinfo) || glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "Rebalance failed as replace brick is in progress on volume %s", volinfo->volname); snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " @@ -66,7 +81,7 @@ glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, } ret = 0; out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -190,7 +205,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, GF_ASSERT (volinfo); GF_ASSERT (op_errstr); - ret = glusterd_defrag_start_validate (volinfo, op_errstr, len); + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); if (ret) goto out; if (!volinfo->rebal.defrag) @@ -544,8 +559,9 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) ret = 0; } } - ret = glusterd_defrag_start_validate (volinfo, - msg, sizeof (msg)); + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "start validate failed"); @@ -654,10 +670,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) cmd, NULL, GD_OP_REBALANCE); break; case GF_DEFRAG_CMD_STOP: - /* Clear task-id only on explicitly stopping the - * rebalance process. + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts */ uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; /* Fall back to the old volume file in case of decommission*/ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7e822594..a772ad67 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7630,3 +7630,16 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) return; } + +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && + !uuid_is_null (volinfo->rebal.rebalance_id)) + return _gf_false; + + return _gf_true; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 4739fab2..a63ea6a6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -528,6 +528,10 @@ glusterd_is_same_address (char *name1, char *name2); void gd_update_volume_op_versions (glusterd_volinfo_t *volinfo); + char* gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo); + +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo); #endif -- cgit