diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2012-09-24 13:14:56 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-09-25 19:41:25 -0700 |
commit | e8712f36335dd3b8508914f917d74b69a2d751a1 (patch) | |
tree | 290fe142fb57b92798df0b61bef75a1324c03732 | |
parent | ddcfd0888294b99049c7b05c0a43129b5099b19e (diff) |
cluster/afr: Trigger heal on local subvols on any child_up
Problem:
The index in the child that comes online is generally empty
because the changes would have happened on the other child which
has been up. So the sync begins when the other child's poll
time-out happens (i.e. 10 minutes). The expectation is that the
sync must be triggered as soon as the connection with any brick
is established.
Fix:
Whenever any child_up happens trigger the index self-heal on all
local children in the replicate subvolume.
Tests:
1) Checked that the self-heal is triggered on all local children
whenever any child comes online.
2) Checked that the volume heal commands are working fine.
Change-Id: I4f64737866470a2f989349a889ea52782930e11d
BUG: 852741
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.org/3972
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 34 |
1 files changed, 21 insertions, 13 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index b5653dee73e..47537beca59 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -443,11 +443,13 @@ _do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, if (op == HEAL) crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL; - ret = dict_get_int32 (output, this->name, &xl_id); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Invalid input, " - "translator-id is not available"); - goto out; + if (output) { + ret = dict_get_int32 (output, this->name, &xl_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Invalid input, " + "translator-id is not available"); + goto out; + } } pos_data.this = this; subkey = "status"; @@ -467,7 +469,7 @@ _do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, status = "Started self-heal"; _do_self_heal_on_subvol (this, i, crawl); - } else { + } else if (output) { status = ""; afr_start_crawl (this, i, INDEX, _add_summary_to_dict, @@ -475,14 +477,19 @@ _do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, NULL); } } - snprintf (key, sizeof (key), "%d-%d-%s", xl_id, - i, subkey); - ret = dict_set_str (output, key, status); + if (output) { + snprintf (key, sizeof (key), "%d-%d-%s", xl_id, + i, subkey); + ret = dict_set_str (output, key, status); + } if (!op_ret && (crawl == FULL)) break; } - snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, subkey); - ret = dict_set_str (output, key, status); + if (output) { + snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, + subkey); + ret = dict_set_str (output, key, status); + } } out: return op_ret; @@ -626,7 +633,7 @@ unlock: } static int -afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data) +afr_handle_child_up (int ret, call_frame_t *sync_frame, void *data) { afr_self_heald_t *shd = NULL; shd_pos_t *pos_data = data; @@ -640,6 +647,7 @@ afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data) shd->pos[pos_data->child] = pos_data->pos; if (pos_data->pos != AFR_POS_REMOTE) afr_poll_self_heal ((void*)(long)pos_data->child); + _do_self_heal_on_local_subvols (THIS, INDEX, NULL); out: GF_FREE (data); return 0; @@ -663,7 +671,7 @@ afr_proactive_self_heal (void *data) pos_data->this = this; pos_data->child = child; ret = synctask_new (this->ctx->env, afr_syncop_find_child_position, - afr_local_child_poll_self_heal, NULL, pos_data); + afr_handle_child_up, NULL, pos_data); if (ret) goto out; out: |