From 8a45a0e480f7e8c6ea1195f77ce3810d4817dc37 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Tue, 3 Apr 2012 15:37:25 +0530 Subject: glusterd: Added volume-id to 'op' dictionary Volume-id passed in op dictionary would help detect possible split brains among peers in a cluster. The idea is to check if the volume's id and the vol-id that was passed are equal. ie, same volume name, but different volume id indicate that glusterd 'metadata' of one of the participating peers is stale or there is a split brain. This is over and above the existing checksum based validation of peer supplied cluster 'metadata' (ie, volume info file). Change-Id: I1049ef249e417e540ccb4243e450f92fcd0f46f9 BUG: 797734 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.com/3083 Tested-by: Gluster Build System Reviewed-by: Jeff Darcy Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 8 ++++ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 53 +++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-utils.c | 28 +++++++++++++ xlators/mgmt/glusterd/src/glusterd-utils.h | 2 + xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 24 +++++++++++ 5 files changed, 115 insertions(+) diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index aa57341ffd7..300c40a8d5e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1065,6 +1065,10 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + if (glusterd_is_rb_ongoing (volinfo)) { snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " @@ -1201,6 +1205,10 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + if (glusterd_is_rb_ongoing (volinfo)) { snprintf (msg, sizeof (msg), "Replace brick is in progress on " "volume %s. Please retry after replace-brick " diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index a577e161c17..1825065a4b4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -504,6 +504,7 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) char msg[2048] = {0}; char *key = NULL; char *key_fixed = NULL; + glusterd_volinfo_t *volinfo = NULL; ret = dict_get_str (dict, "volname", &volname); @@ -522,6 +523,13 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; ret = dict_get_str (dict, "key", &key); if (ret) { @@ -568,6 +576,7 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) gf_boolean_t exists = _gf_false; glusterd_peerinfo_t *peerinfo = NULL; char msg[2048] = {0,}; + glusterd_volinfo_t *volinfo = NULL; ret = dict_get_str (dict, "hostname", &hostname); if (ret) { @@ -607,6 +616,13 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; } else { ret = 0; } @@ -664,6 +680,10 @@ glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + ret = glusterd_is_volume_started (volinfo); if (!ret) { snprintf (msg, sizeof (msg), "Volume %s is not started", @@ -799,6 +819,10 @@ glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + ret = dict_get_int32 (dict, "op", &stats_op); if (ret) { snprintf (msg, sizeof (msg), "Volume profile op get failed"); @@ -1771,6 +1795,9 @@ glusterd_op_build_payload (dict_t **req) void *ctx = NULL; dict_t *req_dict = NULL; glusterd_op_t op = GD_OP_NONE; + char *volname = NULL; + char *volid = NULL; + glusterd_volinfo_t *volinfo = NULL; GF_ASSERT (req); @@ -1820,6 +1847,32 @@ glusterd_op_build_payload (dict_t **req) case GD_OP_DEFRAG_BRICK_VOLUME: { dict_t *dict = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "volume %s not present in " + "the cluster", volname); + goto out; + } + volid = gf_strdup (uuid_utoa (volinfo->volume_id)); + if (!volid) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "vol-id", volid); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to set volume id in " + "dictionary"); + goto out; + } dict_copy (dict, req_dict); } break; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 817cbbc323e..7198a130775 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5328,3 +5328,31 @@ glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, out: return local; } +int +glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char *volid_str = NULL; + uuid_t vol_uid = {0, }; + + ret = dict_get_str (op_dict, "vol-id", &volid_str); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get volume id"); + goto out; + } + ret = uuid_parse (volid_str, vol_uid); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to parse uuid"); + goto out; + } + + if (uuid_compare (vol_uid, volinfo->volume_id)) { + gf_log (THIS->name, GF_LOG_ERROR, "Volume ids are different. " + "Possibly a split brain among peers."); + ret = -1; + goto out; + } + +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 5491c7a0068..602dc780fef 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -432,4 +432,6 @@ glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos); gf_boolean_t glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +int +glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index b30c4631806..85e7abba853 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -830,6 +830,10 @@ glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) if (ret) goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_resolve_brick (brickinfo); if (ret) { @@ -905,6 +909,10 @@ glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) if (ret) goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + /* If 'force' flag is given, no check is required */ if (flags & GF_CLI_FLAG_OP_FORCE) goto out; @@ -1000,6 +1008,10 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + ret = glusterd_validate_volume_id (dict, volinfo); if (ret) goto out; @@ -1056,6 +1068,10 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + if (!glusterd_is_volume_replicate (volinfo)) { ret = -1; snprintf (msg, sizeof (msg), "Volume %s is not of type " @@ -1142,6 +1158,10 @@ glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + is_running = glusterd_is_volume_started (volinfo); if (!is_running) { snprintf (msg, sizeof(msg), "Volume %s is not in a started" @@ -1209,6 +1229,10 @@ glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr) goto out; } + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + if (!glusterd_is_volume_started (volinfo)) { snprintf (msg, sizeof(msg), "Volume %s is not started", volname); -- cgit