diff options
author | Venkatesh Somyajulu <vsomyaju@redhat.com> | 2013-06-28 19:11:47 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-07-02 10:25:17 -0700 |
commit | ef8092fab7b6fa5a16cc0e22b75945758519d5a6 (patch) | |
tree | 65aa4fa06801b135d61a5e57637dad882793e73e /xlators/cluster/afr | |
parent | 7062eda1575214819f5c7411748b06be95e08ffa (diff) |
cluster/afr: Allow data/entry self heal for metadata split-brain
Problem:
Currently whenever there is metadata split-brain, a variable
sh->op_failed is set to 1 to denote that self heal got failed.
But if we proceed for data self heal, even code-path of data
self heal also relies on the sh->op_failed variable. So if will
check for sh->op_failed variable and will eventually fails to
do data self heal. So needed a mechanism to allow data self heal
even if metadata is in split brain.
Fix:
Some data structure revamp is done in
http://review.gluster.com/#/c/5106/ fix and this patch is
based on the above fix. Now we can store which particular self-heal
got failed i.e GFID_OR_MISSING_ENTRY_SELF_HEAL, METADATA, DATA,
ENTRY. And we can do two types of self heal failure check.
1. Individual type check: We can check which among all four
(Metadata, Data, Gfid or missing entry, entry self heal)
got failed.
2. In afr_self_heal_completion_cbk, we need to make check
based on the fact that if any specific self heal got failed treat
the complete self heal as failure so that it will populate
corresponding circular buffer of event history accordingly.
Change-Id: Icb91e513bcc752386fc8a78812405cfabe5cac2d
BUG: 977797
Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com>
Reviewed-on: http://review.gluster.org/5253
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 29 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 167 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 21 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 24 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 21 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 15 |
7 files changed, 151 insertions, 134 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 22e074571ed..1d577cfb5ab 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this, } sh_private_cleanup (sh_frame, this); - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { GF_ASSERT (!last_loop_frame); //loop_finish should have happened and the old_loop should be NULL gf_log (this->name, GF_LOG_DEBUG, @@ -276,7 +276,7 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset, _gf_true, sh_loop_lock_success, sh_loop_lock_failure); return 0; out: - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); if (old_loop_frame) sh_loop_finish (old_loop_frame, this); sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM); @@ -307,8 +307,9 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, sh_priv->loops_running--; offset = sh_priv->offset; block_size = sh->block_size; - while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) && - (sh_priv->loops_running < priv->data_self_heal_window_size) + while ((!sh->eof_reached) && + (!is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) && + (sh_priv->loops_running < priv->data_self_heal_window_size) && (sh_priv->offset < sh->file_size)) { loop++; @@ -327,7 +328,8 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, if (0 == loop) { //loop finish does unlock, but the erasing of the pending //xattrs needs to happen before that so do not finish the loop - if (is_driver_done && !is_self_heal_failed (sh)) + if (is_driver_done && + !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) goto driver_done; if (old_loop_frame) { sh_loop_finish (old_loop_frame, this); @@ -338,7 +340,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, //If we have more loops to form we should finish previous loop after //the next loop lock while (loop--) { - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { // op failed in other loop, stop spawning more loops if (old_loop_frame) { sh_loop_finish (old_loop_frame, this); @@ -384,7 +386,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame } if (op_ret == -1) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); if (loop_frame) { sh_loop_finish (loop_frame, this); @@ -432,7 +434,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (loop_sh, op_errno); } else if (op_ret < loop_local->cont.writev.vector->iov_len) { gf_log (this->name, GF_LOG_ERROR, @@ -440,7 +442,7 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, "(expected %lu, returned %d)", sh_local->loc.path, priv->children[child_index]->name, loop_local->cont.writev.vector->iov_len, op_ret); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } call_count = afr_frame_return (loop_frame); @@ -514,7 +516,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, if (op_ret <= 0) { if (op_ret < 0) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); gf_log (this->name, GF_LOG_ERROR, "read failed on %d " "for %s reason :%s", sh->source, sh_local->loc.path, strerror (errno)); @@ -624,7 +626,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, "checksum on %s failed on subvolume %s (%s)", sh_local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH, strong_checksum, MD5_DIGEST_LENGTH); @@ -662,7 +664,8 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, } UNLOCK (&sh_priv->lock); - if (write_needed && !is_self_heal_failed (sh)) { + if (write_needed && + !is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { sh_loop_read (loop_frame, this); } else { sh_loop_return (sh_frame, this, loop_frame, @@ -800,7 +803,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this, ret = 0; out: if (ret) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); sh_loop_driver_done (sh_frame, this, NULL); } return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 5f985374f29..f0915b01d2e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1018,7 +1018,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) local->loc.path); } - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { sh->completion_cbk (frame, this); } else { gf_log (this->name, GF_LOG_TRACE, @@ -1250,7 +1250,7 @@ out: if (ret) { gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, " "reason: %s", local->loc.path, strerror (-ret)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } afr_sh_missing_entries_finish (frame, this); } @@ -1265,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, local = frame->local; sh = &local->self_heal; if (op_ret < 0) - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_missing_entries_finish (frame, this); return 0; } @@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this, } return; out: - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_missing_entries_finish (frame, this); return; @@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, LOCK (&frame->lock); { afr_sh_set_error (sh, EIO); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } UNLOCK (&frame->lock); } @@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_missing_entries_finish (frame, this); } else { if (afr_gfid_missing_count (this->name, sh->fresh_children, @@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this, priv->child_count, ENOENT); if (fresh_child_enoents == fresh_parent_count) { afr_sh_set_error (sh, ENOENT); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_purge_entry (frame, this); } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, priv->child_count, local->loc.path, @@ -1787,7 +1787,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this, return; fail: - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_missing_entries_finish (frame, this); return; @@ -1858,7 +1858,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this, out: afr_sh_set_error (sh, op_errno); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_missing_entries_finish (frame, this); return; } @@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame, if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_INFO, "Non blocking entrylks failed."); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_missing_entries_done (frame, this); } else { @@ -2047,8 +2047,9 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status; - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY; + + afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_self_heal_parent_entrylk (frame, this, afr_sh_post_nb_entrylk_missing_entry_sh_cbk); @@ -2176,7 +2177,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) afr_self_heal_type_str_get (sh, sh_type_str, sizeof(sh_type_str)); - if (is_self_heal_failed (sh) && !priv->shd.iamshd) { + if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) { loglevel = GF_LOG_ERROR; } else { loglevel = GF_LOG_DEBUG; @@ -2191,7 +2192,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) orig_frame_sh = &orig_frame_local->self_heal; orig_frame_sh->actual_sh_started = _gf_true; sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, - is_self_heal_failed (sh)); + is_self_heal_failed (sh, AFR_CHECK_ALL)); } if (sh->background) { @@ -2305,6 +2306,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) sh->do_gfid_self_heal = _gf_false; } + sh->sh_type_in_action = AFR_SELF_HEAL_INVALID; + FRAME_SU_DO (sh_frame, afr_local_t); if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) { afr_self_heal_missing_entries (sh_frame, this); @@ -2514,7 +2517,7 @@ out: GF_FREE (erase_xattr); if (ret < 0) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); finish (frame, this); } @@ -2522,59 +2525,39 @@ out: } void -afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) -{ - xlator_t *this = NULL; - - this = THIS; - - if (sh) - sh->afr_all_sh_status.data_self_heal = status; - else - gf_log_callingfn (this->name, GF_LOG_ERROR, - "Null self heal struct"); -} - -void -afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) -{ - xlator_t *this = NULL; - - this = THIS; - - if (sh) - sh->afr_all_sh_status.metadata_self_heal = status; - else - gf_log_callingfn (this->name, GF_LOG_ERROR, - "Null self heal struct"); -} - -void -afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status) { - xlator_t *this = NULL; - + xlator_t *this = NULL; + afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status); + afr_self_heal_type sh_type_in_action = sh->sh_type_in_action; this = THIS; - if (sh) - sh->afr_all_sh_status.entry_self_heal = status; - else - gf_log_callingfn (this->name, GF_LOG_ERROR, - "Null self heal struct"); -} -void -afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, - afr_self_heal_status status) -{ - xlator_t *this = NULL; - - this = THIS; + if (!sh) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal" + "Structure"); + goto out; + } - if (sh) - sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status; - else - gf_log_callingfn (this->name, GF_LOG_ERROR, - "Null self heal struct"); + switch (sh_type_in_action) { + case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY: + sh_status->gfid_or_missing_entry_self_heal = status; + break; + case AFR_SELF_HEAL_METADATA: + sh_status->metadata_self_heal = status; + break; + case AFR_SELF_HEAL_DATA: + sh_status->data_self_heal = status; + break; + case AFR_SELF_HEAL_ENTRY: + sh_status->entry_self_heal = status; + break; + case AFR_SELF_HEAL_INVALID: + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid" + "self heal type in action"); + break; + } +out: + return; } void @@ -2585,22 +2568,58 @@ afr_set_local_for_unhealable (afr_local_t *local) sh = &local->self_heal; local->unhealable = 1; - if (sh->afr_set_self_heal_status) - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } int -is_self_heal_failed (afr_self_heal_t *sh) +is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type) { - afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status; + afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status; + afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID; + afr_self_heal_status status = AFR_SELF_HEAL_FAILED; + xlator_t *this = NULL; + int sh_failed = 0; + + this = THIS; - int sh_failed = 0; - if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED) - || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED)) - sh_failed = 1; + if (!sh) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal " + "structure"); + sh_failed = 1; + goto out; + } + if (type == AFR_CHECK_ALL) { + if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED)) + sh_failed = 1; + } else if (type == AFR_CHECK_SPECIFIC) { + sh_type_in_action = sh->sh_type_in_action; + switch (sh_type_in_action) { + case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY: + status = sh_status.gfid_or_missing_entry_self_heal; + break; + case AFR_SELF_HEAL_METADATA: + status = sh_status.metadata_self_heal; + break; + case AFR_SELF_HEAL_ENTRY: + status = sh_status.entry_self_heal; + break; + case AFR_SELF_HEAL_DATA: + status = sh_status.data_self_heal; + break; + case AFR_SELF_HEAL_INVALID: + status = AFR_SELF_HEAL_NOT_ATTEMPTED; + break; + } + if (status == AFR_SELF_HEAL_FAILED) + sh_failed = 1; + + } + +out: return sh_failed; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 329bb2f1ed0..cc67e23fe95 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -15,13 +15,6 @@ #define AFR_SH_MIN_PARTICIPANTS 2 typedef enum { - AFR_SELF_HEAL_ENTRY, - AFR_SELF_HEAL_METADATA, - AFR_SELF_HEAL_DATA, - AFR_SELF_HEAL_INVALID = -1, -} afr_self_heal_type; - -typedef enum { AFR_LOOKUP_FAIL_CONFLICTS = 1, AFR_LOOKUP_FAIL_MISSING_GFIDS = 2, } afr_lookup_flags_t; @@ -138,20 +131,10 @@ void afr_set_local_for_unhealable (afr_local_t *local); int -is_self_heal_failed (afr_self_heal_t *sh); - -void -afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); - -void -afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus); - -void -afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); +is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type); void -afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, - afr_self_heal_status status); +afr_set_self_heal_status (afr_self_heal_t *sh, afr_self_heal_status status); void afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index fc7f5e7ac4b..9c2f3d53c83 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "finishing failed data selfheal of %s", local->loc.path); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); if (sh->data_lock_held) afr_sh_data_unlock (frame, this, afr_sh_data_close); else @@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie, "log failed on %s for subvol %s, reason: %s", local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } call_count = afr_frame_return (frame); if (call_count == 0) { - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { if (sh->old_loop_frame) sh_loop_finish (sh->old_loop_frame, this); sh->old_loop_frame = NULL; @@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->children[child_index]->name, strerror (op_errno)); LOCK (&frame->lock); { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } UNLOCK (&frame->lock); if (sh->old_loop_frame) @@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (is_self_heal_failed (sh)) + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) afr_sh_data_fail (frame, this); else afr_sh_data_erase_pending (frame, this); @@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_DEBUG, "ftruncate of %s on subvolume %s completed", @@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (is_self_heal_failed (sh)) + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) afr_sh_data_fail (frame, this); else afr_sh_data_sync_prepare (frame, this); @@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) if (sh->background && sh->unwind && !sh->unwound) { sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, - is_self_heal_failed (sh)); + is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)); sh->unwound = _gf_true; } @@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_TRACE, "open of %s succeeded on child %s", @@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_data_fail (frame, this); return 0; } @@ -1485,10 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->afr_set_self_heal_status = afr_set_data_sh_status; + sh->sh_type_in_action = AFR_SELF_HEAL_DATA; if (afr_can_start_data_self_heal (sh, priv)) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); if (IA_ISREG (sh->type)) { afr_sh_data_open (frame, this); } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 14ccca21b8b..3598f79d1ff 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; if (sh->entries_skipped) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); goto out; } afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION, @@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_sink (frame, this, sh->active_source); sh->active_source = active_src; - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { goto out; } @@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie, local->loc.path, priv->children[active_src]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_TRACE, "readdir of %s on subvolume %s complete", @@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_source (frame, this, sh->active_source); sh->active_source = active_src; - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } } UNLOCK (&frame->lock); @@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (is_self_heal_failed (sh)) { + if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, priv = this->private; if (op_ret < 0) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_entry_finish (frame, this); goto out; @@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks " "failed for %s.", local->loc.path); - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_entry_done (frame, this); } else { @@ -2321,9 +2321,10 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->afr_set_self_heal_status = afr_set_entry_sh_status; + sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY; + if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_entrylk (frame, this, &local->loc, NULL, afr_sh_post_nonblocking_entry_cbk); } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index ac2d7fcc668..1f663b692fc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_metadata_finish (frame, this); return 0; } @@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this, priv = this->private; if (op_ret < 0) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_metadata_finish (frame, this); goto out; @@ -618,10 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->afr_set_self_heal_status = afr_set_metadata_sh_status; + sh->sh_type_in_action = AFR_SELF_HEAL_METADATA; if (afr_can_start_metadata_self_heal (sh, priv)) { - sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_metadata_lock (frame, this); } else { afr_sh_metadata_done (frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index cbe6b339d08..9594a828773 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -185,6 +185,18 @@ typedef struct { afr_self_heal_status entry_self_heal; } afr_sh_status_for_all_type; +typedef enum { + AFR_SELF_HEAL_ENTRY, + AFR_SELF_HEAL_METADATA, + AFR_SELF_HEAL_DATA, + AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY, + AFR_SELF_HEAL_INVALID = -1, +} afr_self_heal_type; + +typedef enum { + AFR_CHECK_ALL, + AFR_CHECK_SPECIFIC, +} afr_sh_fail_check_type; struct afr_self_heal_ { /* External interface: These are variables (some optional) that @@ -283,9 +295,8 @@ struct afr_self_heal_ { afr_sh_algo_private_t *private; afr_sh_status_for_all_type afr_all_sh_status; + afr_self_heal_type sh_type_in_action; - void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh, - afr_self_heal_status status); struct afr_sh_algorithm *algo; afr_lock_cbk_t data_lock_success_handler; afr_lock_cbk_t data_lock_failure_handler; |