From c3c4ee24a64a2447f77788cb84559f1e07a21e04 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Mon, 13 Feb 2012 19:15:17 +0530 Subject: glusterd: Fixed replace-brick commit_force algo. - commit force subcommand of replace-brick (rb) should be allowed even if source brick is (irrecoverably) offline. - modified rb_timer to be active only for start subcommand. This is important since, the rb timer event relies on src_brick and dst_brick objects to be 'alive' when it 'happens'. In the case of abort/commit/commit force it is very likely that src_brick and/or dst_brick objects could have been destroyed. Change-Id: Ib8b8a4d690fbdd6f99b8aff306490eb59c54a437 BUG: 772845 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.com/2620 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-op-sm.c | 10 +++--- xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 36 ++++++++++------------ xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index ce9581644..9718918f3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1863,12 +1863,12 @@ glusterd_op_start_rb_timer (dict_t *dict) goto out; } - if (op == GF_REPLACE_OP_START || - op == GF_REPLACE_OP_ABORT) - timeout.tv_sec = 5; - else - timeout.tv_sec = 1; + if (op != GF_REPLACE_OP_START) { + ret = glusterd_op_sm_inject_all_acc (); + goto out; + } + timeout.tv_sec = 5; timeout.tv_usec = 0; diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index ca127f7a8..c68b2ce4d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -404,7 +404,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, GLUSTERD_GET_BRICK_PIDFILE (pidfile, voldir, src_brickinfo->hostname, src_brickinfo->path); - if (!glusterd_is_service_running (pidfile, NULL)) { + if ((replace_op != GF_REPLACE_OP_COMMIT_FORCE) && + !glusterd_is_service_running (pidfile, NULL)) { snprintf(msg, sizeof(msg), "Source brick %s:%s " "is not online.", src_brickinfo->hostname, src_brickinfo->path); @@ -441,7 +442,8 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if ((volinfo->rb_status ==GF_RB_STATUS_NONE) && - (replace_op == GF_REPLACE_OP_START)) { + (replace_op == GF_REPLACE_OP_START || + replace_op == GF_REPLACE_OP_COMMIT_FORCE)) { ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); volinfo->src_brick = src_brickinfo; volinfo->dst_brick = dst_brickinfo; @@ -1494,7 +1496,6 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) } case GF_REPLACE_OP_COMMIT: - case GF_REPLACE_OP_COMMIT_FORCE: { ctx = glusterd_op_get_ctx (); if (ctx) { @@ -1507,41 +1508,36 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) goto out; } } - + } + /* fall through */ + case GF_REPLACE_OP_COMMIT_FORCE: + { ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log ("", GF_LOG_DEBUG, + gf_log (THIS->name, GF_LOG_DEBUG, "Received commit - will be adding dst brick and " "removing src brick"); - if (!glusterd_is_local_addr (dst_brickinfo->hostname) && - replace_op != GF_REPLACE_OP_COMMIT_FORCE) { - gf_log ("", GF_LOG_INFO, + if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { + gf_log (THIS->name, GF_LOG_DEBUG, "I AM THE DESTINATION HOST"); ret = rb_kill_destination_brick (volinfo, dst_brickinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to kill destination brick"); + gf_log (THIS->name, GF_LOG_CRITICAL, + "Unable to cleanup dst brick"); goto out; } } - if (ret) { - gf_log ("", GF_LOG_CRITICAL, - "Unable to cleanup dst brick"); - goto out; - } - - ret = glusterd_nodesvcs_stop (volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop nfs server, ret: %d", ret); } ret = glusterd_op_perform_replace_brick (volinfo, src_brick, dst_brick); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to add " + gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to add " "dst-brick: %s to volume: %s", dst_brick, volinfo->volname); (void) glusterd_nodesvcs_handle_graph_change (volinfo); @@ -1552,7 +1548,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) ret = glusterd_nodesvcs_handle_graph_change (volinfo); if (ret) { - gf_log ("", GF_LOG_CRITICAL, + gf_log (THIS->name, GF_LOG_CRITICAL, "Failed to generate nfs volume file"); } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index fa9f77370..7a784149e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -312,7 +312,8 @@ glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo); int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brick, glusterd_brickinfo_t *dst_brick); + glusterd_brickinfo_t *src_brick, + glusterd_brickinfo_t *dst_brick); int glusterd_brick_create_path (char *host, char *path, uuid_t uuid, mode_t mode, -- cgit