summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-03-16 16:09:42 +0530
committerAnand Avati <avati@redhat.com>2012-03-18 00:33:28 -0700
commit642343cc55e6ea2b0d463d77334c34790c30080f (patch)
tree47aa3e890088463828e936b84ccb56b2fdc49152
parentf159e2247b58441449ad8da13ddf5cc5d3887b93 (diff)
Self-heald: Handle errors gracefully and show errors to users
Change-Id: I5424ebfadb5b2773ee6f7370cc2867a555aa48dd BUG: 800352 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/2962 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--cli/src/cli-cmd-volume.c4
-rw-r--r--cli/src/cli-rpc-ops.c22
-rw-r--r--xlators/cluster/afr/src/afr-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h1
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c294
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h4
-rw-r--r--xlators/cluster/afr/src/afr.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c30
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c10
13 files changed, 274 insertions, 151 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 9546831ab7c..4e56f9c656c 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1561,8 +1561,10 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
int sent = 0;
int parse_error = 0;
dict_t *options = NULL;
+ xlator_t *this = NULL;
- frame = create_frame (THIS, THIS->ctx->pool);
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
if (!frame)
goto out;
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index a259addb160..b88b523b7c7 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -5561,6 +5561,7 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick)
char key[256] = {0};
char *hostname = NULL;
char *path = NULL;
+ char *status = NULL;
uint64_t i = 0;
snprintf (key, sizeof (key), "%d-hostname", brick);
@@ -5571,9 +5572,14 @@ cmd_heal_volume_brick_out (dict_t *dict, int brick)
ret = dict_get_str (dict, key, &path);
if (ret)
goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
snprintf (key, sizeof (key), "%d-count", brick);
ret = dict_get_uint64 (dict, key, &num_entries);
- cli_out ("\nEntries on %s:%s %"PRIu64, hostname, path, num_entries);
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+ snprintf (key, sizeof (key), "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
for (i = 0; i < num_entries; i++) {
snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i);
ret = dict_get_str (dict, key, &path);
@@ -5645,21 +5651,15 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
cli_out ("%s", rsp.op_errstr);
else
- cli_out ("Starting heal on volume %s has been %s", volname,
+ cli_out ("Heal operation on volume %s has been %s", volname,
(rsp.op_ret) ? "unsuccessful": "successful");
- if (rsp.op_ret) {
- ret = rsp.op_ret;
- goto out;
- }
-
+ ret = rsp.op_ret;
if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
- (heal_op == GF_AFR_OP_HEAL_INDEX)) {
- ret = 0;
+ (heal_op == GF_AFR_OP_HEAL_INDEX))
goto out;
- }
- dict = dict_new ();
+ dict = dict_new ();
if (!dict) {
ret = -1;
goto out;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e220a61722d..b2077c384cf 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -3597,7 +3597,7 @@ afr_notify (xlator_t *this, int32_t event,
if (propagate)
ret = default_notify (this, event, data);
if (call_psh && priv->shd.iamshd)
- afr_do_poll_self_heal ((void*) (long) up_child);
+ afr_proactive_self_heal ((void*) (long) up_child);
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 343260a7968..f5292b3cca7 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -49,6 +49,7 @@ enum gf_afr_mem_types_ {
gf_afr_mt_shd_timer_t,
gf_afr_mt_shd_event_t,
gf_afr_mt_time_t,
+ gf_afr_mt_pos_data_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index ecd8c472699..3068d5c46d0 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -34,6 +34,11 @@ typedef enum {
STOP_CRAWL_ON_SINGLE_SUBVOL = 1
} afr_crawl_flags_t;
+typedef enum {
+ HEAL = 1,
+ INFO
+} shd_crawl_op;
+
typedef struct shd_dump {
dict_t *dict;
time_t sh_time;
@@ -46,6 +51,12 @@ typedef struct shd_event_ {
char *path;
} shd_event_t;
+typedef struct shd_pos_ {
+ int child;
+ xlator_t *this;
+ afr_child_pos_t pos;
+} shd_pos_t;
+
typedef int
(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
@@ -58,6 +69,9 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
static int
_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+int
+afr_syncop_find_child_position (void *data);
+
void
shd_cleanup_event (void *event)
{
@@ -360,44 +374,119 @@ _do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
afr_crawl_done);
}
-void
-_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl)
+gf_boolean_t
+_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
{
- int i = 0;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ gf_boolean_t proceed = _gf_false;
+ char *msg = NULL;
priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- _do_self_heal_on_subvol (this, i, INDEX);
+ shd = &priv->shd;
+ if (!shd->enabled) {
+ msg = "Self-heal daemon is not enabled";
+ gf_log (this->name, GF_LOG_ERROR, msg);
+ goto out;
+ }
+ if (!priv->child_up[child]) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s , "
+ "subvol went down", priv->children[child]->name);
+ msg = "Brick is Not connected";
+ goto out;
+ }
+
+ if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
+ if (afr_up_children_count (priv->child_up,
+ priv->child_count) < 2) {
+ gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
+ "< 2 children are up");
+ msg = "< 2 bricks in replica are running";
+ goto out;
+ }
+ }
+ proceed = _gf_true;
+out:
+ if (reason)
+ *reason = msg;
+ return proceed;
}
-void
-_do_self_heal_on_local_subvol (xlator_t *this, afr_crawl_type_t crawl)
+int
+_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ shd_crawl_op op, dict_t *output)
{
- int local_child = -1;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ char *status = NULL;
+ char *subkey = NULL;
+ char key[256] = {0};
+ shd_pos_t pos_data = {0};
+ int op_ret = -1;
+ int xl_id = -1;
+ int i = 0;
+ int ret = 0;
+ int crawl_flags = 0;
priv = this->private;
- local_child = afr_get_local_child (&priv->shd,
- priv->child_count);
- if (local_child < -1) {
- gf_log (this->name, GF_LOG_INFO,
- "No local bricks found");
+ if (op == HEAL)
+ crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
+
+ ret = dict_get_int32 (output, this->name, &xl_id);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
+ "translator-id is not available");
+ goto out;
+ }
+ pos_data.this = this;
+ subkey = "status";
+ for (i = 0; i < priv->child_count; i++) {
+ if (_crawl_proceed (this, i, crawl_flags, &status)) {
+ pos_data.child = i;
+ ret = synctask_new (this->ctx->env,
+ afr_syncop_find_child_position,
+ NULL, NULL, &pos_data);
+ if (ret) {
+ status = "Not able to find brick location";
+ } else if (pos_data.pos == AFR_POS_REMOTE) {
+ status = "brick is remote";
+ } else {
+ op_ret = 0;
+ if (op == HEAL) {
+ status = "Started self-heal";
+ _do_self_heal_on_subvol (this, i,
+ crawl);
+ } else {
+ status = "";
+ afr_start_crawl (this, i, INDEX,
+ _add_summary_to_dict,
+ output, _gf_false, 0,
+ NULL);
+ }
+ }
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
+ i, subkey);
+ ret = dict_set_str (output, key, status);
+ if (!op_ret && (crawl == FULL))
+ break;
+ }
+ snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, subkey);
+ ret = dict_set_str (output, key, status);
}
- _do_self_heal_on_subvol (this, local_child, FULL);
+out:
+ return op_ret;
}
int
-_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
+ dict_t *output)
{
- int i = 0;
- afr_private_t *priv = NULL;
+ return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+}
- priv = this->private;
- for (i = 0; i < priv->child_count; i++)
- afr_start_crawl (this, i, INDEX, _add_summary_to_dict,
- output, _gf_false, 0, NULL);
- return 0;
+int
+_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+{
+ return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
}
int
@@ -441,17 +530,13 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
goto out;
switch (op) {
case GF_AFR_OP_HEAL_INDEX:
- _do_self_heal_on_local_subvols (this, INDEX);
- ret = 0;
+ ret = _do_self_heal_on_local_subvols (this, INDEX, output);
break;
case GF_AFR_OP_HEAL_FULL:
- _do_self_heal_on_local_subvol (this, FULL);
- ret = 0;
+ ret = _do_self_heal_on_local_subvols (this, FULL, output);
break;
case GF_AFR_OP_INDEX_SUMMARY:
ret = _get_index_summary_on_local_subvols (this, output);
- if (ret)
- goto out;
break;
case GF_AFR_OP_HEALED_FILES:
ret = _add_all_subvols_eh_to_dict (this, shd->healed, output);
@@ -474,34 +559,85 @@ out:
}
void
-afr_do_poll_self_heal (void *data)
+afr_poll_self_heal (void *data)
{
afr_private_t *priv = NULL;
afr_self_heald_t *shd = NULL;
struct timeval timeout = {0};
xlator_t *this = NULL;
long child = (long)data;
+ gf_timer_t *old_timer = NULL;
+ gf_timer_t *new_timer = NULL;
this = THIS;
priv = this->private;
shd = &priv->shd;
- if (shd->enabled)
- _do_self_heal_on_subvol (this, child, INDEX);
- if (shd->pos[child] == AFR_POS_REMOTE)
- goto out;
+ _do_self_heal_on_subvol (this, child, INDEX);
timeout.tv_sec = AFR_POLL_TIMEOUT;
timeout.tv_usec = 0;
- if (shd->timer[child])
- gf_timer_call_cancel (this->ctx, shd->timer[child]);
- shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
- afr_do_poll_self_heal, data);
+ //notify and previous timer should be synchronized.
+ LOCK (&priv->lock);
+ {
+ old_timer = shd->timer[child];
+ shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
+ afr_poll_self_heal,
+ data);
+ new_timer = shd->timer[child];
+ }
+ UNLOCK (&priv->lock);
- if (shd->timer[child] == NULL) {
+ if (old_timer)
+ gf_timer_call_cancel (this->ctx, old_timer);
+ if (!new_timer) {
gf_log (this->name, GF_LOG_WARNING,
- "Cannot create pending self-heal polling timer for %s",
+ "Could not create self-heal polling timer for %s",
priv->children[child]->name);
}
+ return;
+}
+
+static int
+afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data)
+{
+ afr_self_heald_t *shd = NULL;
+ shd_pos_t *pos_data = data;
+ afr_private_t *priv = NULL;
+
+ if (ret)
+ goto out;
+
+ priv = pos_data->this->private;
+ shd = &priv->shd;
+ shd->pos[pos_data->child] = pos_data->pos;
+ if (pos_data->pos == AFR_POS_LOCAL)
+ afr_poll_self_heal ((void*)(long)pos_data->child);
+out:
+ GF_FREE (data);
+ return 0;
+}
+
+void
+afr_proactive_self_heal (void *data)
+{
+ xlator_t *this = NULL;
+ long child = (long)data;
+ shd_pos_t *pos_data = NULL;
+ int ret = 0;
+
+ this = THIS;
+
+ //Position of brick could have changed and it could be local now.
+ //Compute the position again
+ pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
+ if (!pos_data)
+ goto out;
+ pos_data->this = this;
+ pos_data->child = child;
+ ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
+ afr_local_child_poll_self_heal, NULL, pos_data);
+ if (ret)
+ goto out;
out:
return;
}
@@ -680,31 +816,6 @@ afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
return ret;
}
-gf_boolean_t
-_crawl_proceed (xlator_t *this, int child, int crawl_flags)
-{
- afr_private_t *priv = this->private;
- gf_boolean_t proceed = _gf_false;
-
- if (!priv->child_up[child]) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s "
- ", subvol went down", priv->children[child]->name);
- goto out;
- }
-
- if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- goto out;
- }
- }
- proceed = _gf_true;
-out:
- return proceed;
-}
-
static int
_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
off_t *offset, afr_crawl_data_t *crawl_data)
@@ -719,7 +830,7 @@ _process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
list_for_each_entry_safe (entry, tmp, &entries->list, list) {
if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags)) {
+ crawl_data->crawl_flags, NULL)) {
ret = -1;
goto out;
}
@@ -813,7 +924,7 @@ _crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
free_entries = _gf_true;
if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags)) {
+ crawl_data->crawl_flags, NULL)) {
ret = -1;
goto out;
}
@@ -847,7 +958,7 @@ position_str_get (afr_child_pos_t pos)
}
int
-afr_find_child_position (xlator_t *this, int child)
+afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
{
afr_private_t *priv = NULL;
dict_t *xattr_rsp = NULL;
@@ -855,28 +966,16 @@ afr_find_child_position (xlator_t *this, int child)
int ret = 0;
gf_boolean_t local = _gf_false;
char *pathinfo = NULL;
- afr_child_pos_t *pos = NULL;
priv = this->private;
- pos = &priv->shd.pos[child];
-
- if (!priv->root_inode) {
- LOCK (&priv->lock);
- {
- if (!priv->root_inode)
- priv->root_inode = inode_ref
- (this->itable->root);
- }
- UNLOCK (&priv->lock);
- }
afr_build_root_loc (this, &loc);
ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
GF_XATTR_PATHINFO_KEY);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on child "
- "%d", child);
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s",
+ priv->children[child]->name);
goto out;
}
@@ -904,18 +1003,21 @@ out:
return ret;
}
-static inline int
-afr_is_local_child (afr_self_heald_t *shd, int child, unsigned int child_count)
+int
+afr_syncop_find_child_position (void *data)
{
- return (shd->pos[child] == AFR_POS_LOCAL);
+ shd_pos_t *pos_data = data;
+ int ret = 0;
+
+ ret = afr_find_child_position (pos_data->this, pos_data->child,
+ &pos_data->pos);
+ return ret;
}
static int
afr_dir_crawl (void *data)
{
xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
int ret = -1;
xlator_t *readdir_xl = NULL;
fd_t *fd = NULL;
@@ -923,17 +1025,9 @@ afr_dir_crawl (void *data)
afr_crawl_data_t *crawl_data = data;
this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags))
- goto out;
-
- ret = afr_find_child_position (this, crawl_data->child);
- if (ret)
- goto out;
- if (!afr_is_local_child (shd, crawl_data->child, priv->child_count))
+ if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
+ NULL))
goto out;
readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
@@ -1026,16 +1120,12 @@ afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
afr_crawl_done_cbk_t crawl_done)
{
afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
call_frame_t *frame = NULL;
afr_crawl_data_t *crawl_data = NULL;
int ret = 0;
int (*crawler) (void*) = NULL;
priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled)
- goto out;
frame = create_frame (this, this->ctx->pool);
if (!frame)
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 57b29ce2dc4..84873862123 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -42,14 +42,12 @@ typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data
gf_dirent_t *entry, loc_t *child, loc_t *parent,
struct iatt *iattr);
-void afr_proactive_self_heal (xlator_t *this, int idx);
-
void afr_build_root_loc (xlator_t *this, loc_t *loc);
int afr_set_root_gfid (dict_t *dict);
void
-afr_do_poll_self_heal (void *data);
+afr_proactive_self_heal (void *data);
int
afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 5d4516190ff..89a8e87fbc3 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -415,6 +415,7 @@ init (xlator_t *this)
this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
if (!this->itable)
goto out;
+ priv->root_inode = inode_ref (this->itable->root);
ret = 0;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d96d6717eeb..ce9581644b1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2780,9 +2780,9 @@ _heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
int rxl_child_id = 0;
int brick_id = 0;
int int_len = 0;
- int brick_count = 0;
int ret = 0;
glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
rsp_ctx = data;
rxl_end = strchr (key, '-');
@@ -2810,13 +2810,19 @@ _heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
volinfo = rsp_ctx->volinfo;
brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
+ if (!strcmp (rxl_child_end, "status")) {
+ brickinfo = glusterd_get_brickinfo_by_position (volinfo,
+ brick_id);
+ if (!brickinfo)
+ goto out;
+ if (!glusterd_is_local_brick (rsp_ctx->this, volinfo,
+ brickinfo))
+ goto out;
+ }
new_value = data_copy (value);
snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end);
dict_set (rsp_ctx->dict, new_key, new_value);
- ret = dict_get_int32 (rsp_ctx->dict, "count", &brick_count);
- if (brick_id >= brick_count)
- ret = dict_set_int32 (rsp_ctx->dict, "count", brick_id + 1);
out:
return;
}
@@ -2847,6 +2853,7 @@ glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict,
rsp_ctx.dict = op_ctx;
rsp_ctx.volinfo = volinfo;
+ rsp_ctx.this = THIS;
dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx);
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index bfc41b29252..1f32681a5a1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -157,6 +157,7 @@ typedef struct glusterd_pr_brick_rsp_conv_t {
typedef struct glusterd_heal_rsp_conv_ {
dict_t *dict;
glusterd_volinfo_t *volinfo;
+ xlator_t *this;
} glusterd_heal_rsp_conv_t;
typedef struct glusterd_status_rsp_conv_ {
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index c7931dbfafc..649156f4b33 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -111,11 +111,6 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
}
break;
}
- case GD_OP_HEAL_VOLUME:
- {
- glusterd_add_bricks_hname_path_to_dict (ctx);
- break;
- }
case GD_OP_PROFILE_VOLUME:
{
if (ctx && dict_get_int32 (ctx, "count", &count)) {
@@ -153,6 +148,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
case GD_OP_SET_VOLUME:
case GD_OP_LIST_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_HEAL_VOLUME:
{
/*nothing specific to be done*/
break;
@@ -1198,28 +1194,6 @@ out:
return ret;
}
-void
-_heal_volume_add_peer_rsp (dict_t *peer_dict, char *key, data_t *value,
- void *data)
-{
- int max_brick = 0;
- int peer_max_brick = 0;
- int ret = 0;
- dict_t *ctx_dict = data;
-
-
-
- ret = dict_get_int32 (ctx_dict, "count", &max_brick);
- ret = dict_get_int32 (peer_dict, "count", &peer_max_brick);
- if (peer_max_brick > max_brick)
- ret = dict_set_int32 (ctx_dict, "count", peer_max_brick);
- else
- ret = dict_set_int32 (ctx_dict, "count", max_brick);
- dict_del (peer_dict, "count");
- dict_copy (peer_dict, ctx_dict);
- return;
-}
-
int
glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict)
{
@@ -1236,7 +1210,7 @@ glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict)
if (!ctx_dict)
goto out;
- dict_foreach (rsp_dict, _heal_volume_add_peer_rsp, ctx_dict);
+ dict_copy (rsp_dict, ctx_dict);
out:
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 9cc436496ab..4b84039dfe6 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -3303,9 +3303,23 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port,
return ret;
}
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos)
+{
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ list_for_each_entry (tmpbrkinfo, &volinfo->bricks,
+ brick_list) {
+ if (pos == 0)
+ return tmpbrkinfo;
+ pos--;
+ }
+ return NULL;
+}
+
void
glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo,
- gf_brick_status_t status)
+ gf_brick_status_t status)
{
GF_ASSERT (brickinfo);
brickinfo->status = status;
@@ -5139,3 +5153,21 @@ glusterd_uuid_to_hostname (uuid_t uuid)
return hostname;
}
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ gf_boolean_t local = _gf_false;
+ int ret = 0;
+ glusterd_conf_t *conf = NULL;
+
+ if (uuid_is_null (brickinfo->uuid)) {
+ ret = glusterd_resolve_brick (brickinfo);
+ if (ret)
+ goto out;
+ }
+ conf = this->private;
+ local = !uuid_compare (brickinfo->uuid, conf->uuid);
+out:
+ return local;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 7b5a387c275..fa9f7737005 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -419,4 +419,11 @@ glusterd_add_node_to_dict (char *server, dict_t *dict, int count);
char *
glusterd_uuid_to_hostname (uuid_t uuid);
+
+glusterd_brickinfo_t*
+glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos);
+
+gf_boolean_t
+glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index d6e58c1b844..93c00983a21 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -448,6 +448,7 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
dict_t *dict = NULL;
glusterd_op_t cli_op = GD_OP_HEAL_VOLUME;
char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
GF_ASSERT (req);
@@ -483,6 +484,15 @@ glusterd_handle_cli_heal_volume (rpcsvc_request_t *req)
gf_log ("glusterd", GF_LOG_INFO, "Received heal vol req"
"for volume %s", volname);
+ ret = glusterd_add_bricks_hname_path_to_dict (dict);
+ if (ret)
+ goto out;
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret)
+ goto out;
+ ret = dict_set_int32 (dict, "count", volinfo->brick_count);
+ if (ret)
+ goto out;
ret = glusterd_op_begin (req, GD_OP_HEAL_VOLUME, dict);
gf_cmd_log ("volume heal","on volname: %s %s", volname,