diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 38 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 215 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 22 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 34 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 23 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 25 |
7 files changed, 280 insertions, 83 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 1721fd270dc..22e074571ed 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this, } sh_private_cleanup (sh_frame, this); - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { GF_ASSERT (!last_loop_frame); //loop_finish should have happened and the old_loop should be NULL gf_log (this->name, GF_LOG_DEBUG, @@ -273,10 +273,10 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset, new_loop_sh->offset = offset; new_loop_sh->block_size = sh->block_size; afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size, - _gf_true, sh_loop_lock_success, sh_loop_lock_failure); + _gf_true, sh_loop_lock_success, sh_loop_lock_failure); return 0; out: - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); if (old_loop_frame) sh_loop_finish (old_loop_frame, this); sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM); @@ -307,7 +307,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, sh_priv->loops_running--; offset = sh_priv->offset; block_size = sh->block_size; - while ((!sh->eof_reached) && (0 == sh->op_failed) && + while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) && (sh_priv->loops_running < priv->data_self_heal_window_size) && (sh_priv->offset < sh->file_size)) { @@ -327,7 +327,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, if (0 == loop) { //loop finish does unlock, but the erasing of the pending //xattrs needs to happen before that so do not finish the loop - if (is_driver_done && !sh->op_failed) + if (is_driver_done && !is_self_heal_failed (sh)) goto driver_done; if (old_loop_frame) { sh_loop_finish (old_loop_frame, this); @@ -338,7 +338,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, //If we have more loops to form we should finish previous loop after //the next loop lock while (loop--) { - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { // op failed in other loop, stop spawning more loops if (old_loop_frame) { sh_loop_finish (old_loop_frame, this); @@ -384,7 +384,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame } if (op_ret == -1) { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); if (loop_frame) { sh_loop_finish (loop_frame, this); @@ -432,16 +432,16 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (loop_sh, op_errno); } else if (op_ret < loop_local->cont.writev.vector->iov_len) { - gf_log(this->name, GF_LOG_ERROR, - "incomplete write to %s on subvolume %s " - "(expected %lu, returned %d)", sh_local->loc.path, - priv->children[child_index]->name, - loop_local->cont.writev.vector->iov_len, op_ret); - sh->op_failed = 1; - } + gf_log (this->name, GF_LOG_ERROR, + "incomplete write to %s on subvolume %s " + "(expected %lu, returned %d)", sh_local->loc.path, + priv->children[child_index]->name, + loop_local->cont.writev.vector->iov_len, op_ret); + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + } call_count = afr_frame_return (loop_frame); @@ -514,7 +514,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, if (op_ret <= 0) { if (op_ret < 0) { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); gf_log (this->name, GF_LOG_ERROR, "read failed on %d " "for %s reason :%s", sh->source, sh_local->loc.path, strerror (errno)); @@ -624,7 +624,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, "checksum on %s failed on subvolume %s (%s)", sh_local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH, strong_checksum, MD5_DIGEST_LENGTH); @@ -662,7 +662,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, } UNLOCK (&sh_priv->lock); - if (write_needed && !sh->op_failed) { + if (write_needed && !is_self_heal_failed (sh)) { sh_loop_read (loop_frame, this); } else { sh_loop_return (sh_frame, this, loop_frame, @@ -800,7 +800,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this, ret = 0; out: if (ret) { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); sh_loop_driver_done (sh_frame, this, NULL); } return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 2538f4c8bfd..5f985374f29 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1012,14 +1012,13 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this) afr_sh_reset (frame, this); - if (local->govinda_gOvinda) { + if (local->unhealable) { gf_log (this->name, GF_LOG_DEBUG, "split brain found, aborting selfheal of %s", local->loc.path); - sh->op_failed = 1; } - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { sh->completion_cbk (frame, this); } else { gf_log (this->name, GF_LOG_TRACE, @@ -1251,7 +1250,7 @@ out: if (ret) { gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, " "reason: %s", local->loc.path, strerror (-ret)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } afr_sh_missing_entries_finish (frame, this); } @@ -1266,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, local = frame->local; sh = &local->self_heal; if (op_ret < 0) - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_missing_entries_finish (frame, this); return 0; } @@ -1290,7 +1289,7 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this) if (!afr_valid_ia_type (type)) { gf_log (this->name, GF_LOG_ERROR, "%s: unknown file type: 0%o", local->loc.path, type); - local->govinda_gOvinda = 1; + afr_set_local_for_unhealable (local); afr_sh_missing_entries_finish (frame, this); goto out; } @@ -1323,8 +1322,9 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this, loc = &local->loc; if (op_ret < 0) { - if (op_errno == EIO) - local->govinda_gOvinda = 1; + if (op_errno == EIO) { + afr_set_local_for_unhealable (local); + } // EIO can happen if finding the fresh parent dir failed goto out; } @@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this, } return; out: - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_missing_entries_finish (frame, this); return; @@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, LOCK (&frame->lock); { afr_sh_set_error (sh, EIO); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } UNLOCK (&frame->lock); } @@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; priv = this->private; - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { afr_sh_missing_entries_finish (frame, this); } else { if (afr_gfid_missing_count (this->name, sh->fresh_children, @@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this, priv->child_count, ENOENT); if (fresh_child_enoents == fresh_parent_count) { afr_sh_set_error (sh, ENOENT); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_purge_entry (frame, this); } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, priv->child_count, local->loc.path, @@ -1780,14 +1780,14 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this, afr_sh_purge_stale_entry (frame, this); } else { op_errno = EIO; - local->govinda_gOvinda = 1; + afr_set_local_for_unhealable (local); goto fail; } return; fail: - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_missing_entries_finish (frame, this); return; @@ -1858,8 +1858,8 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this, out: afr_sh_set_error (sh, op_errno); - sh->op_failed = 1; - afr_sh_missing_entries_finish (frame, this); + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_sh_missing_entries_finish (frame, this); return; } @@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame, if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_INFO, "Non blocking entrylks failed."); - sh->op_failed = -1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_missing_entries_done (frame, this); } else { @@ -2041,8 +2041,17 @@ out: static int afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) { + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + + sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + afr_self_heal_parent_entrylk (frame, this, - afr_sh_post_nb_entrylk_missing_entry_sh_cbk); + afr_sh_post_nb_entrylk_missing_entry_sh_cbk); return 0; } @@ -2155,32 +2164,26 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) afr_local_t * orig_frame_local = NULL; afr_self_heal_t * orig_frame_sh = NULL; char sh_type_str[256] = {0,}; + gf_loglevel_t loglevel = 0; priv = this->private; local = bgsh_frame->local; sh = &local->self_heal; - if (local->govinda_gOvinda) { + if (local->unhealable) { afr_set_split_brain (this, sh->inode, SPB, SPB); - sh->op_failed = 1; } afr_self_heal_type_str_get (sh, sh_type_str, sizeof(sh_type_str)); - if (sh->op_failed) { - gf_loglevel_t loglevel = GF_LOG_ERROR; - if (priv->shd.iamshd) - loglevel = GF_LOG_DEBUG; - - gf_log (this->name, loglevel, "background %s self-heal " - "failed on %s", sh_type_str, local->loc.path); - + if (is_self_heal_failed (sh) && !priv->shd.iamshd) { + loglevel = GF_LOG_ERROR; } else { - gf_log (this->name, GF_LOG_DEBUG, "background %s self-heal " - "completed on %s", sh_type_str, local->loc.path); - + loglevel = GF_LOG_DEBUG; } + afr_log_self_heal_completion_status (local, loglevel); + FRAME_SU_UNDO (bgsh_frame, afr_local_t); if (!sh->unwound && sh->unwind) { @@ -2188,7 +2191,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this) orig_frame_sh = &orig_frame_local->self_heal; orig_frame_sh->actual_sh_started = _gf_true; sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, - sh->op_failed); + is_self_heal_failed (sh)); } if (sh->background) { @@ -2511,9 +2514,155 @@ out: GF_FREE (erase_xattr); if (ret < 0) { - sh->op_failed = _gf_true; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); finish (frame, this); } return 0; } + +void +afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ + xlator_t *this = NULL; + + this = THIS; + + if (sh) + sh->afr_all_sh_status.data_self_heal = status; + else + gf_log_callingfn (this->name, GF_LOG_ERROR, + "Null self heal struct"); +} + +void +afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ + xlator_t *this = NULL; + + this = THIS; + + if (sh) + sh->afr_all_sh_status.metadata_self_heal = status; + else + gf_log_callingfn (this->name, GF_LOG_ERROR, + "Null self heal struct"); +} + +void +afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ + xlator_t *this = NULL; + + this = THIS; + + if (sh) + sh->afr_all_sh_status.entry_self_heal = status; + else + gf_log_callingfn (this->name, GF_LOG_ERROR, + "Null self heal struct"); +} +void +afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, + afr_self_heal_status status) +{ + xlator_t *this = NULL; + + this = THIS; + + if (sh) + sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status; + else + gf_log_callingfn (this->name, GF_LOG_ERROR, + "Null self heal struct"); +} + +void +afr_set_local_for_unhealable (afr_local_t *local) +{ + afr_self_heal_t *sh = NULL; + + sh = &local->self_heal; + + local->unhealable = 1; + if (sh->afr_set_self_heal_status) + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); +} + +int +is_self_heal_failed (afr_self_heal_t *sh) +{ + afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status; + + int sh_failed = 0; + if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED) + || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED)) + sh_failed = 1; + + return sh_failed; +} + +char * +get_sh_completion_status (afr_self_heal_status status) +{ + + char *not_attempted = " is not attempted"; + char *failed = " failed"; + char *successfull_complt = " is successfully completed"; + char *result = " has unknown status"; + + switch (status) + { + case AFR_SELF_HEAL_NOT_ATTEMPTED: + result = not_attempted; + break; + case AFR_SELF_HEAL_FAILED: + result = failed; + break; + case AFR_SELF_HEAL_STARTED: + result = successfull_complt; + break; + } + + return result; + +} + +void +afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl) +{ + + char *gfid_or_missing_entry_sh = NULL; + char *metadata_sh = NULL; + char *data_sh = NULL; + char *entry_sh = NULL; + + afr_self_heal_t *sh = &local->self_heal; + afr_sh_status_for_all_type all_status = sh->afr_all_sh_status; + xlator_t *this = NULL; + + this = THIS; + + gfid_or_missing_entry_sh = get_sh_completion_status + (all_status.gfid_or_missing_entry_self_heal); + + metadata_sh = get_sh_completion_status (all_status.metadata_self_heal); + + + data_sh = get_sh_completion_status (all_status.data_self_heal); + + entry_sh = get_sh_completion_status (all_status.entry_self_heal); + + + gf_log (this->name, loglvl, "%s " + "gfid or missing entry self heal %s," + " medatadata self heal %s," + " data self heal %s," + " entry self heal %s on %s", + (sh->background ? "background" : "foreground"), + gfid_or_missing_entry_sh, metadata_sh, data_sh, entry_sh, + local->loc.path); + +} diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 035fce543a5..329bb2f1ed0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -133,4 +133,26 @@ int afr_sh_erase_pending (call_frame_t *frame, xlator_t *this, afr_transaction_type type, afr_fxattrop_cbk_t cbk, int (*finish)(call_frame_t *frame, xlator_t *this)); + +void +afr_set_local_for_unhealable (afr_local_t *local); + +int +is_self_heal_failed (afr_self_heal_t *sh); + +void +afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); + +void +afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus); + +void +afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); + +void +afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, + afr_self_heal_status status); + +void +afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t logl); #endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 2f63ed27d74..fc7f5e7ac4b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_DEBUG, "finishing failed data selfheal of %s", local->loc.path); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); if (sh->data_lock_held) afr_sh_data_unlock (frame, this, afr_sh_data_close); else @@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie, "log failed on %s for subvol %s, reason: %s", local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { if (sh->old_loop_frame) sh_loop_finish (sh->old_loop_frame, this); sh->old_loop_frame = NULL; @@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->children[child_index]->name, strerror (op_errno)); LOCK (&frame->lock); { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } UNLOCK (&frame->lock); if (sh->old_loop_frame) @@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) + if (is_self_heal_failed (sh)) afr_sh_data_fail (frame, this); else afr_sh_data_erase_pending (frame, this); @@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_DEBUG, "ftruncate of %s on subvolume %s completed", @@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) + if (is_self_heal_failed (sh)) afr_sh_data_fail (frame, this); else afr_sh_data_sync_prepare (frame, this); @@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) if (sh->background && sh->unwind && !sh->unwound) { sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, - sh->op_failed); + is_self_heal_failed (sh)); sh->unwound = _gf_true; } @@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_TRACE, "open of %s succeeded on child %s", @@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { afr_sh_data_fail (frame, this); return 0; } @@ -1364,11 +1364,12 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "fd for %s opened, commencing sync", local->loc.path); - /* - * The read and write self-heal trigger codepaths do not provide - * an unwind callback. We run a trylock in these codepaths - * because we are sensitive to locking latency. - */ + /* + * The read and write self-heal trigger codepaths do not provide + * an unwind callback. We run a trylock in these codepaths + * because we are sensitive to locking latency. + */ + block = sh->unwind ? _gf_true : _gf_false; afr_sh_data_lock (frame, this, 0, 0, block, afr_sh_data_big_lock_success, @@ -1484,7 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; + sh->afr_set_self_heal_status = afr_set_data_sh_status; + if (afr_can_start_data_self_heal (sh, priv)) { + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); if (IA_ISREG (sh->type)) { afr_sh_data_open (frame, this); } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index c3c9f9fca57..14ccca21b8b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; if (sh->entries_skipped) { - sh->op_failed = _gf_true; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); goto out; } afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION, @@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_sink (frame, this, sh->active_source); sh->active_source = active_src; - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { goto out; } @@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie, local->loc.path, priv->children[active_src]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } else { gf_log (this->name, GF_LOG_TRACE, "readdir of %s on subvolume %s complete", @@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this) active_src = next_active_source (frame, this, sh->active_source); sh->active_source = active_src; - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); } } UNLOCK (&frame->lock); @@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { - if (sh->op_failed) { + if (is_self_heal_failed (sh)) { afr_sh_entry_finish (frame, this); return 0; } @@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this, priv = this->private; if (op_ret < 0) { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_entry_finish (frame, this); goto out; @@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) if (int_lock->lock_op_ret < 0) { gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks " "failed for %s.", local->loc.path); - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_entry_done (frame, this); } else { @@ -2313,14 +2313,17 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this) int afr_self_heal_entry (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; afr_private_t *priv = NULL; - + afr_self_heal_t *sh = NULL; priv = this->private; local = frame->local; + sh = &local->self_heal; + sh->afr_set_self_heal_status = afr_set_entry_sh_status; if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_entrylk (frame, this, &local->loc, NULL, afr_sh_post_nonblocking_entry_cbk); } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index cc85d9b9f99..ac2d7fcc668 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_metadata_finish (frame, this); return 0; } @@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this, priv = this->private; if (op_ret < 0) { - sh->op_failed = 1; + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_set_error (sh, op_errno); afr_sh_metadata_finish (frame, this); goto out; @@ -618,8 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) local = frame->local; sh = &local->self_heal; + sh->afr_set_self_heal_status = afr_set_metadata_sh_status; if (afr_can_start_metadata_self_heal (sh, priv)) { + sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_metadata_lock (frame, this); } else { afr_sh_metadata_done (frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index ced4e6fab25..cbe6b339d08 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -172,7 +172,21 @@ typedef struct _afr_private { uint64_t sh_readdir_size; } afr_private_t; +typedef enum { + AFR_SELF_HEAL_NOT_ATTEMPTED, + AFR_SELF_HEAL_STARTED, + AFR_SELF_HEAL_FAILED, +} afr_self_heal_status; + typedef struct { + afr_self_heal_status gfid_or_missing_entry_self_heal; + afr_self_heal_status metadata_self_heal; + afr_self_heal_status data_self_heal; + afr_self_heal_status entry_self_heal; +} afr_sh_status_for_all_type; + + +struct afr_self_heal_ { /* External interface: These are variables (some optional) that are set by whoever has triggered self-heal */ @@ -249,7 +263,6 @@ typedef struct { const char *linkname; gf_boolean_t entries_skipped; - int op_failed; gf_boolean_t actual_sh_started; gf_boolean_t sync_done; gf_boolean_t data_lock_held; @@ -264,13 +277,15 @@ typedef struct { afr_post_remove_call_t post_remove_call; loc_t parent_loc; - call_frame_t *orig_frame; call_frame_t *old_loop_frame; gf_boolean_t unwound; afr_sh_algo_private_t *private; + afr_sh_status_for_all_type afr_all_sh_status; + void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh, + afr_self_heal_status status); struct afr_sh_algorithm *algo; afr_lock_cbk_t data_lock_success_handler; afr_lock_cbk_t data_lock_failure_handler; @@ -282,7 +297,9 @@ typedef struct { void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); call_frame_t *sh_frame; -} afr_self_heal_t; +}; + +typedef struct afr_self_heal_ afr_self_heal_t; typedef enum { AFR_DATA_TRANSACTION, /* truncate, write, ... */ @@ -408,7 +425,7 @@ typedef struct _afr_local { unsigned int enoent_count; - unsigned int govinda_gOvinda; + unsigned int unhealable; unsigned int read_child_index; unsigned char read_child_returned; |