From 1c430d2747f2c0960dcdcd9da4b253c89674bdd7 Mon Sep 17 00:00:00 2001 From: Venkatesh Somyajulu Date: Mon, 4 Feb 2013 13:51:16 +0530 Subject: glusterd: "volume heal info" doesn't report output properly Problem: "volume heal info" doesn't reports files to be healed when gluster* processes on one of the storage node is not running Change-Id: Iff7d41407014624e4da9b70d710039ac14b48291 BUG: 880898 Signed-off-by: Venkatesh Somyajulu Reviewed-on: http://review.gluster.org/4371 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- cli/src/cli-rpc-ops.c | 49 ++++++++++++------- tests/bugs/bug-880898.t | 23 +++++++++ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 63 +++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 32 +++++++------ 4 files changed, 134 insertions(+), 33 deletions(-) create mode 100644 tests/bugs/bug-880898.t diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 20c60bf08..694964922 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -6003,6 +6003,7 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick) uint64_t i = 0; uint32_t time = 0; char timestr[32] = {0}; + char *shd_status = NULL; snprintf (key, sizeof key, "%d-hostname", brick); ret = dict_get_str (dict, key, &hostname); @@ -6013,33 +6014,45 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick) if (ret) goto out; cli_out ("\nBrick %s:%s", hostname, path); - snprintf (key, sizeof key, "%d-count", brick); - ret = dict_get_uint64 (dict, key, &num_entries); - cli_out ("Number of entries: %"PRIu64, num_entries); + snprintf (key, sizeof key, "%d-status", brick); ret = dict_get_str (dict, key, &status); if (status && strlen (status)) cli_out ("Status: %s", status); - for (i = 0; i < num_entries; i++) { - snprintf (key, sizeof key, "%d-%"PRIu64, brick, i); - ret = dict_get_str (dict, key, &path); - if (ret) - continue; - time = 0; - snprintf (key, sizeof key, "%d-%"PRIu64"-time", brick, i); - ret = dict_get_uint32 (dict, key, &time); - if (!time) { - cli_out ("%s", path); - } else { - gf_time_fmt (timestr, sizeof timestr, - time, gf_timefmt_FT); - if (i == 0) { + + snprintf (key, sizeof key, "%d-shd-status",brick); + ret = dict_get_str (dict, key, &shd_status); + + if(!shd_status) + { + snprintf (key, sizeof key, "%d-count", brick); + ret = dict_get_uint64 (dict, key, &num_entries); + cli_out ("Number of entries: %"PRIu64, num_entries); + + + for (i = 0; i < num_entries; i++) { + snprintf (key, sizeof key, "%d-%"PRIu64, brick, i); + ret = dict_get_str (dict, key, &path); + if (ret) + continue; + time = 0; + snprintf (key, sizeof key, "%d-%"PRIu64"-time", + brick, i); + ret = dict_get_uint32 (dict, key, &time); + if (!time) { + cli_out ("%s", path); + } else { + gf_time_fmt (timestr, sizeof timestr, + time, gf_timefmt_FT); + if (i == 0) { cli_out ("at path on brick"); cli_out ("-----------------------------------"); + } + cli_out ("%s %s", timestr, path); } - cli_out ("%s %s", timestr, path); } } + out: return; } diff --git a/tests/bugs/bug-880898.t b/tests/bugs/bug-880898.t new file mode 100644 index 000000000..a069d4a8a --- /dev/null +++ b/tests/bugs/bug-880898.t @@ -0,0 +1,23 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +cleanup; + +TEST glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2 +TEST $CLI volume start $V0 +pkill glusterfs +uuid="" +for line in $(cat /var/lib/glusterd/glusterd.info) +do + if [[ $line == UUID* ]] + then + uuid=`echo $line | sed -r 's/^.{5}//'` + fi +done + +gluster volume heal $V0 info | grep "Status: self-heal-daemon is not running on $uuid"; +EXPECT "0" echo $? + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 2c457ace8..ea2aa0e21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -4272,6 +4272,54 @@ out: } #endif +static int +fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + char msg[1024] = {0,}; + char key[1024] = {0,}; + char value[1024] = {0,}; + int index = 0; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + snprintf (msg, sizeof (msg), "self-heal-daemon is not running on"); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + + if (uuid_compare (MY_UUID, brickinfo->uuid)) { + index++; + continue; + } + snprintf (key, sizeof (key), "%d-status",index); + snprintf (value, sizeof (value), "%s %s",msg, + uuid_utoa(MY_UUID)); + ret = dict_set_dynstr (dict, key, gf_strdup(value)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + "set the dictionary for shd status msg"); + goto out; + } + snprintf (key, sizeof (key), "%d-shd-status",index); + ret = dict_set_str (dict, key, "off"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " set dictionary for shd status msg"); + goto out; + } + + index++; + } + +out: + return ret; + +} + + static int glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, struct list_head *selected) @@ -4285,6 +4333,7 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, glusterd_pending_node_t *pending_node = NULL; gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; int rxlator_count = 0; + dict_t *op_ctx = NULL; this = THIS; GF_ASSERT (this); @@ -4313,6 +4362,20 @@ glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, goto out; } + + if (!glusterd_is_nodesvc_online ("glustershd") && + (heal_op == GF_AFR_OP_INDEX_SUMMARY)) { + + op_ctx = glusterd_op_get_ctx (); + + ret = fill_shd_status_for_local_bricks (op_ctx, volinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Unable to fill the shd" + " status for the local bricks"); + goto out; + } + + switch (heal_op) { case GF_AFR_OP_HEAL_FULL: rxlator_count = _select_rxlators_for_full_self_heal (this, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 2aadce243..9aa8df61d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1085,11 +1085,13 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) glusterd_conf_t *priv = NULL; dict_t *opt_dict = NULL; gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + priv = this->private; if (!priv) { ret = -1; - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "priv is NULL"); goto out; } @@ -1104,7 +1106,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) if (ret) { ret = -1; snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); - gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); goto out; } @@ -1118,7 +1120,7 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Volume %s is not of type " "replicate", volname); *op_errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); goto out; } @@ -1144,26 +1146,26 @@ glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) snprintf (msg, sizeof (msg), "Self-heal-daemon is " "disabled. Heal will not be triggered on volume %s", volname); - gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); *op_errstr = gf_strdup (msg); goto out; } - if (!glusterd_is_nodesvc_online ("glustershd")) { + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret || (heal_op == GF_AFR_OP_INVALID)) { ret = -1; - snprintf (msg, sizeof (msg), "Self-heal daemon is not " - "running. Check self-heal daemon log file."); - *op_errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup("Invalid heal-op"); + gf_log (this->name, GF_LOG_WARNING, "%s", "Invalid heal-op"); goto out; } - ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); - if (ret || (heal_op == GF_AFR_OP_INVALID)) { + if ((heal_op != GF_AFR_OP_INDEX_SUMMARY) && + !glusterd_is_nodesvc_online ("glustershd")) { ret = -1; - snprintf (msg, sizeof (msg), "Invalid heal-op"); - *op_errstr = gf_strdup (msg); - gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup ("Self-heal daemon is not running." + " Check self-heal daemon log file."); + gf_log (this->name, GF_LOG_WARNING, "%s", "Self-heal daemon is " + "not running. Check self-heal daemon log file."); goto out; } -- cgit