diff options
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 38 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 215 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 22 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 34 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 23 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 25 | 
7 files changed, 280 insertions, 83 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 1721fd270dc..22e074571ed 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -100,7 +100,7 @@ sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,          }          sh_private_cleanup (sh_frame, this); -        if (sh->op_failed) { +        if (is_self_heal_failed (sh)) {                  GF_ASSERT (!last_loop_frame);                  //loop_finish should have happened and the old_loop should be NULL                  gf_log (this->name, GF_LOG_DEBUG, @@ -273,10 +273,10 @@ sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,          new_loop_sh->offset = offset;          new_loop_sh->block_size = sh->block_size;          afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size, -			  _gf_true, sh_loop_lock_success, sh_loop_lock_failure); +                          _gf_true, sh_loop_lock_success, sh_loop_lock_failure);          return 0;  out: -        sh->op_failed = 1; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          if (old_loop_frame)                  sh_loop_finish (old_loop_frame, this);          sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM); @@ -307,7 +307,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,                          sh_priv->loops_running--;                  offset = sh_priv->offset;                  block_size = sh->block_size; -                while ((!sh->eof_reached) && (0 == sh->op_failed) && +                while ((!sh->eof_reached) && (!is_self_heal_failed (sh)) &&                         (sh_priv->loops_running < priv->data_self_heal_window_size)                         && (sh_priv->offset < sh->file_size)) { @@ -327,7 +327,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,          if (0 == loop) {                  //loop finish does unlock, but the erasing of the pending                  //xattrs needs to happen before that so do not finish the loop -                if (is_driver_done && !sh->op_failed) +                if (is_driver_done && !is_self_heal_failed (sh))                          goto driver_done;                  if (old_loop_frame) {                          sh_loop_finish (old_loop_frame, this); @@ -338,7 +338,7 @@ sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,          //If we have more loops to form we should finish previous loop after          //the next loop lock          while (loop--) { -                if (sh->op_failed) { +                if (is_self_heal_failed (sh)) {                          // op failed in other loop, stop spawning more loops                          if (old_loop_frame) {                                  sh_loop_finish (old_loop_frame, this); @@ -384,7 +384,7 @@ sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame          }          if (op_ret == -1) { -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_set_error (sh, op_errno);                  if (loop_frame) {                          sh_loop_finish (loop_frame, this); @@ -432,16 +432,16 @@ sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,                          priv->children[child_index]->name,                          strerror (op_errno)); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_set_error (loop_sh, op_errno);          } else if (op_ret < loop_local->cont.writev.vector->iov_len) { -		gf_log(this->name, GF_LOG_ERROR, -		       "incomplete write to %s on subvolume %s " -		       "(expected %lu, returned %d)", sh_local->loc.path, -		       priv->children[child_index]->name, -		       loop_local->cont.writev.vector->iov_len, op_ret); -		sh->op_failed = 1; -	} +                gf_log (this->name, GF_LOG_ERROR, +                        "incomplete write to %s on subvolume %s " +                        "(expected %lu, returned %d)", sh_local->loc.path, +                        priv->children[child_index]->name, +                        loop_local->cont.writev.vector->iov_len, op_ret); +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); +        }          call_count = afr_frame_return (loop_frame); @@ -514,7 +514,7 @@ sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,          if (op_ret <= 0) {                  if (op_ret < 0) { -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                          gf_log (this->name, GF_LOG_ERROR, "read failed on %d "                                  "for %s reason :%s", sh->source,                                  sh_local->loc.path, strerror (errno)); @@ -624,7 +624,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,                          "checksum on %s failed on subvolume %s (%s)",                          sh_local->loc.path, priv->children[child_index]->name,                          strerror (op_errno)); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          } else {                  memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,                          strong_checksum, MD5_DIGEST_LENGTH); @@ -662,7 +662,7 @@ sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,                  }                  UNLOCK (&sh_priv->lock); -                if (write_needed && !sh->op_failed) { +                if (write_needed && !is_self_heal_failed (sh)) {                          sh_loop_read (loop_frame, this);                  } else {                          sh_loop_return (sh_frame, this, loop_frame, @@ -800,7 +800,7 @@ afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,          ret = 0;  out:          if (ret) { -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  sh_loop_driver_done (sh_frame, this, NULL);          }          return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 2538f4c8bfd..5f985374f29 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1012,14 +1012,13 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)          afr_sh_reset (frame, this); -        if (local->govinda_gOvinda) { +        if (local->unhealable) {                  gf_log (this->name, GF_LOG_DEBUG,                          "split brain found, aborting selfheal of %s",                          local->loc.path); -                sh->op_failed = 1;          } -        if (sh->op_failed) { +        if (is_self_heal_failed (sh)) {                  sh->completion_cbk (frame, this);          } else {                  gf_log (this->name, GF_LOG_TRACE, @@ -1251,7 +1250,7 @@ out:          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "                          "reason: %s", local->loc.path, strerror (-ret)); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          }          afr_sh_missing_entries_finish (frame, this);  } @@ -1266,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,          local = frame->local;          sh = &local->self_heal;          if (op_ret < 0) -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          afr_sh_missing_entries_finish (frame, this);          return 0;  } @@ -1290,7 +1289,7 @@ sh_missing_entries_create (call_frame_t *frame, xlator_t *this)          if (!afr_valid_ia_type (type)) {                  gf_log (this->name, GF_LOG_ERROR,                          "%s: unknown file type: 0%o", local->loc.path, type); -                local->govinda_gOvinda = 1; +                afr_set_local_for_unhealable (local);                  afr_sh_missing_entries_finish (frame, this);                  goto out;          } @@ -1323,8 +1322,9 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,          loc = &local->loc;          if (op_ret < 0) { -                if (op_errno == EIO) -                        local->govinda_gOvinda = 1; +                if (op_errno == EIO) { +                        afr_set_local_for_unhealable (local); +                }                  // EIO can happen if finding the fresh parent dir failed                  goto out;          } @@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,          }          return;  out: -        sh->op_failed = 1; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          afr_sh_set_error (sh, op_errno);          afr_sh_missing_entries_finish (frame, this);          return; @@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,                  LOCK (&frame->lock);                  {                          afr_sh_set_error (sh, EIO); -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  }                  UNLOCK (&frame->lock);          } @@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)          sh       = &local->self_heal;          priv     = this->private; -        if (sh->op_failed) { +        if (is_self_heal_failed (sh)) {                  afr_sh_missing_entries_finish (frame, this);          } else {                  if (afr_gfid_missing_count (this->name, sh->fresh_children, @@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,                                                 priv->child_count, ENOENT);          if (fresh_child_enoents == fresh_parent_count) {                  afr_sh_set_error (sh, ENOENT); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_purge_entry (frame, this);          } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,                                              priv->child_count, local->loc.path, @@ -1780,14 +1780,14 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,                  afr_sh_purge_stale_entry (frame, this);          } else {                  op_errno = EIO; -                local->govinda_gOvinda = 1; +                afr_set_local_for_unhealable (local);                  goto fail;          }          return;  fail: -        sh->op_failed = 1; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          afr_sh_set_error (sh, op_errno);          afr_sh_missing_entries_finish (frame, this);          return; @@ -1858,8 +1858,8 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,  out:          afr_sh_set_error (sh, op_errno); -        sh->op_failed = 1; -        afr_sh_missing_entries_finish (frame, this); +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); +	afr_sh_missing_entries_finish (frame, this);          return;  } @@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,          if (int_lock->lock_op_ret < 0) {                  gf_log (this->name, GF_LOG_INFO,                          "Non blocking entrylks failed."); -                sh->op_failed = -1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_missing_entries_done (frame, this);          } else { @@ -2041,8 +2041,17 @@ out:  static int  afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)  { +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; + +        local = frame->local; +        sh = &local->self_heal; + +        sh->afr_set_self_heal_status  = afr_set_gfid_or_missing_entry_sh_status; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); +          afr_self_heal_parent_entrylk (frame, this, -                                   afr_sh_post_nb_entrylk_missing_entry_sh_cbk); +                                      afr_sh_post_nb_entrylk_missing_entry_sh_cbk);          return 0;  } @@ -2155,32 +2164,26 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          afr_local_t *     orig_frame_local = NULL;          afr_self_heal_t * orig_frame_sh = NULL;          char              sh_type_str[256] = {0,}; +        gf_loglevel_t     loglevel = 0;          priv  = this->private;          local = bgsh_frame->local;          sh    = &local->self_heal; -        if (local->govinda_gOvinda) { +        if (local->unhealable) {                  afr_set_split_brain (this, sh->inode, SPB, SPB); -                sh->op_failed = 1;          }          afr_self_heal_type_str_get (sh, sh_type_str,                                      sizeof(sh_type_str)); -        if (sh->op_failed) { -                gf_loglevel_t     loglevel = GF_LOG_ERROR; -                if (priv->shd.iamshd) -                        loglevel = GF_LOG_DEBUG; - -                gf_log (this->name, loglevel, "background %s self-heal " -                        "failed on %s", sh_type_str, local->loc.path); - +        if (is_self_heal_failed (sh) && !priv->shd.iamshd) { +                loglevel = GF_LOG_ERROR;          } else { -                gf_log (this->name, GF_LOG_DEBUG, "background %s self-heal " -                        "completed on %s", sh_type_str, local->loc.path); - +                loglevel = GF_LOG_DEBUG;          } +        afr_log_self_heal_completion_status (local, loglevel); +          FRAME_SU_UNDO (bgsh_frame, afr_local_t);          if (!sh->unwound && sh->unwind) { @@ -2188,7 +2191,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)                  orig_frame_sh = &orig_frame_local->self_heal;                  orig_frame_sh->actual_sh_started = _gf_true;                  sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, -                            sh->op_failed); +                            is_self_heal_failed (sh));          }          if (sh->background) { @@ -2511,9 +2514,155 @@ out:          GF_FREE (erase_xattr);          if (ret < 0) { -                sh->op_failed = _gf_true; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  finish (frame, this);          }          return 0;  } + +void +afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ +        xlator_t              *this = NULL; + +        this = THIS; + +        if (sh) +                sh->afr_all_sh_status.data_self_heal = status; +        else +                gf_log_callingfn (this->name, GF_LOG_ERROR, +                                  "Null self heal struct"); +} + +void +afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ +        xlator_t              *this = NULL; + +        this = THIS; + +        if (sh) +                sh->afr_all_sh_status.metadata_self_heal = status; +        else +                gf_log_callingfn (this->name, GF_LOG_ERROR, +                                  "Null self heal struct"); +} + +void +afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status) +{ +        xlator_t              *this = NULL; + +        this = THIS; + +        if (sh) +                sh->afr_all_sh_status.entry_self_heal = status; +        else +                gf_log_callingfn (this->name, GF_LOG_ERROR, +                                  "Null self heal struct"); +} +void +afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, +                                         afr_self_heal_status status) +{ +        xlator_t              *this = NULL; + +        this = THIS; + +        if (sh) +                sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status; +        else +                gf_log_callingfn (this->name, GF_LOG_ERROR, +                                  "Null self heal struct"); +} + +void +afr_set_local_for_unhealable (afr_local_t *local) +{ +        afr_self_heal_t  *sh = NULL; + +        sh = &local->self_heal; + +        local->unhealable = 1; +        if (sh->afr_set_self_heal_status) +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); +} + +int +is_self_heal_failed (afr_self_heal_t *sh) +{ +        afr_sh_status_for_all_type  sh_status = sh->afr_all_sh_status; + +        int sh_failed = 0; +        if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED) +            || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED) +            || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED) +            || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED)) +                     sh_failed = 1; + +        return sh_failed; +} + +char * +get_sh_completion_status (afr_self_heal_status status) +{ + +        char *not_attempted       = " is not attempted"; +        char *failed              = " failed"; +        char *successfull_complt  = " is successfully completed"; +        char *result              = " has unknown status"; + +        switch (status) +        { +                case AFR_SELF_HEAL_NOT_ATTEMPTED: +                        result = not_attempted; +                        break; +                case AFR_SELF_HEAL_FAILED: +                        result = failed; +                        break; +                case AFR_SELF_HEAL_STARTED: +                        result = successfull_complt; +                        break; +        } + +        return result; + +} + +void +afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t loglvl) +{ + +        char *gfid_or_missing_entry_sh = NULL; +        char *metadata_sh              = NULL; +        char *data_sh                  = NULL; +        char *entry_sh                 = NULL; + +        afr_self_heal_t *sh            = &local->self_heal; +        afr_sh_status_for_all_type   all_status = sh->afr_all_sh_status; +        xlator_t      *this            = NULL; + +        this = THIS; + +        gfid_or_missing_entry_sh = get_sh_completion_status +                                   (all_status.gfid_or_missing_entry_self_heal); + +        metadata_sh = get_sh_completion_status (all_status.metadata_self_heal); + + +        data_sh = get_sh_completion_status (all_status.data_self_heal); + +        entry_sh = get_sh_completion_status (all_status.entry_self_heal); + + +        gf_log (this->name, loglvl, "%s " +                "gfid or missing entry self heal %s," +                " medatadata self heal %s," +                " data self heal %s," +                " entry self heal %s on  %s", +                (sh->background ? "background" : "foreground"), +                gfid_or_missing_entry_sh, metadata_sh, data_sh, entry_sh, +                local->loc.path); + +} diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 035fce543a5..329bb2f1ed0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -133,4 +133,26 @@ int  afr_sh_erase_pending (call_frame_t *frame, xlator_t *this,                        afr_transaction_type type, afr_fxattrop_cbk_t cbk,                        int (*finish)(call_frame_t *frame, xlator_t *this)); + +void +afr_set_local_for_unhealable (afr_local_t *local); + +int +is_self_heal_failed (afr_self_heal_t *sh); + +void +afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); + +void +afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status staus); + +void +afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status); + +void +afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh, +                                         afr_self_heal_status status); + +void +afr_log_self_heal_completion_status (afr_local_t *local, gf_loglevel_t  logl);  #endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 2f63ed27d74..fc7f5e7ac4b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -335,7 +335,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this)          gf_log (this->name, GF_LOG_DEBUG,                  "finishing failed data selfheal of %s", local->loc.path); -        sh->op_failed = 1; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          if (sh->data_lock_held)                  afr_sh_data_unlock (frame, this, afr_sh_data_close);          else @@ -362,13 +362,13 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,                          "log failed on %s for subvol %s, reason: %s",                          local->loc.path, priv->children[child_index]->name,                          strerror (op_errno)); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          }          call_count = afr_frame_return (frame);          if (call_count == 0) { -                if (sh->op_failed) { +                if (is_self_heal_failed (sh)) {                          if (sh->old_loop_frame)                                  sh_loop_finish (sh->old_loop_frame, this);                          sh->old_loop_frame = NULL; @@ -418,7 +418,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          priv->children[child_index]->name, strerror (op_errno));                  LOCK (&frame->lock);                  { -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  }                  UNLOCK (&frame->lock);                  if (sh->old_loop_frame) @@ -428,7 +428,7 @@ afr_sh_data_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { -                if (sh->op_failed) +                if (is_self_heal_failed (sh))                          afr_sh_data_fail (frame, this);                  else                          afr_sh_data_erase_pending (frame, this); @@ -604,7 +604,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  local->loc.path,                                  priv->children[child_index]->name,                                  strerror (op_errno)); -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  } else {                          gf_log (this->name, GF_LOG_DEBUG,                                  "ftruncate of %s on subvolume %s completed", @@ -617,7 +617,7 @@ afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { -                if (sh->op_failed) +                if (is_self_heal_failed (sh))                          afr_sh_data_fail (frame, this);                  else                          afr_sh_data_sync_prepare (frame, this); @@ -718,7 +718,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)          if (sh->background && sh->unwind && !sh->unwound) {                  sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno, -                            sh->op_failed); +                            is_self_heal_failed (sh));                  sh->unwound = _gf_true;          } @@ -1342,7 +1342,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  local->loc.path,                                  priv->children[child_index]->name,                                  strerror (op_errno)); -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  } else {                          gf_log (this->name, GF_LOG_TRACE,                                  "open of %s succeeded on child %s", @@ -1355,7 +1355,7 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { -                if (sh->op_failed) { +                if (is_self_heal_failed (sh)) {                          afr_sh_data_fail (frame, this);                          return 0;                  } @@ -1364,11 +1364,12 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          "fd for %s opened, commencing sync",                          local->loc.path); -		/* -		 * The read and write self-heal trigger codepaths do not provide -		 * an unwind callback. We run a trylock in these codepaths -		 * because we are sensitive to locking latency. -		 */ +                /* +                 * The read and write self-heal trigger codepaths do not provide +                 * an unwind callback. We run a trylock in these codepaths +                 * because we are sensitive to locking latency. +                 */ +  		block = sh->unwind ? _gf_true : _gf_false;                  afr_sh_data_lock (frame, this, 0, 0, block,                                    afr_sh_data_big_lock_success, @@ -1484,7 +1485,10 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; +        sh->afr_set_self_heal_status = afr_set_data_sh_status; +          if (afr_can_start_data_self_heal (sh, priv)) { +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);                  if (IA_ISREG (sh->type)) {                          afr_sh_data_open (frame, this);                  } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index c3c9f9fca57..14ccca21b8b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -162,7 +162,7 @@ afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)          sh = &local->self_heal;          if (sh->entries_skipped) { -                sh->op_failed = _gf_true; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  goto out;          }          afr_sh_erase_pending (frame, this, AFR_ENTRY_TRANSACTION, @@ -799,7 +799,7 @@ afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)          active_src = next_active_sink (frame, this, sh->active_source);          sh->active_source = active_src; -        if (sh->op_failed) { +        if (is_self_heal_failed (sh)) {                  goto out;          } @@ -1946,7 +1946,7 @@ afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,                                  local->loc.path,                                  priv->children[active_src]->name,                                  strerror (op_errno)); -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  } else {                          gf_log (this->name, GF_LOG_TRACE,                                  "readdir of %s on subvolume %s complete", @@ -2019,7 +2019,7 @@ afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)          active_src = next_active_source (frame, this, sh->active_source);          sh->active_source = active_src; -        if (sh->op_failed) { +        if (is_self_heal_failed (sh)) {                  afr_sh_entry_finish (frame, this);                  return 0;          } @@ -2068,7 +2068,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  local->loc.path,                                  priv->children[child_index]->name,                                  strerror (op_errno)); -                        sh->op_failed = 1; +                        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  }          }          UNLOCK (&frame->lock); @@ -2076,7 +2076,7 @@ afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          call_count = afr_frame_return (frame);          if (call_count == 0) { -                if (sh->op_failed) { +                if (is_self_heal_failed (sh)) {                          afr_sh_entry_finish (frame, this);                          return 0;                  } @@ -2231,7 +2231,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,          priv = this->private;          if (op_ret < 0) { -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_set_error (sh, op_errno);                  afr_sh_entry_finish (frame, this);                  goto out; @@ -2294,7 +2294,7 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)          if (int_lock->lock_op_ret < 0) {                  gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "                          "failed for %s.", local->loc.path); -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_entry_done (frame, this);          } else { @@ -2313,14 +2313,17 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)  int  afr_self_heal_entry (call_frame_t *frame, xlator_t *this)  { -        afr_local_t   *local = NULL; +        afr_local_t     *local = NULL;          afr_private_t   *priv = NULL; - +        afr_self_heal_t *sh = NULL;          priv = this->private;          local = frame->local; +        sh = &local->self_heal; +        sh->afr_set_self_heal_status = afr_set_entry_sh_status;          if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);                  afr_sh_entrylk (frame, this, &local->loc, NULL,                                  afr_sh_post_nonblocking_entry_cbk);          } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index cc85d9b9f99..ac2d7fcc668 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -97,7 +97,7 @@ afr_sh_metadata_fail (call_frame_t *frame, xlator_t *this)          local    = frame->local;          sh       = &local->self_heal; -        sh->op_failed = 1; +        sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);          afr_sh_metadata_finish (frame, this);          return 0;  } @@ -461,7 +461,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,          priv = this->private;          if (op_ret < 0) { -                sh->op_failed = 1; +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);                  afr_sh_set_error (sh, op_errno);                  afr_sh_metadata_finish (frame, this);                  goto out; @@ -618,8 +618,10 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; +        sh->afr_set_self_heal_status = afr_set_metadata_sh_status;          if (afr_can_start_metadata_self_heal (sh, priv)) { +                sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);                  afr_sh_metadata_lock (frame, this);          } else {                  afr_sh_metadata_done (frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index ced4e6fab25..cbe6b339d08 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -172,7 +172,21 @@ typedef struct _afr_private {          uint64_t               sh_readdir_size;  } afr_private_t; +typedef enum { +        AFR_SELF_HEAL_NOT_ATTEMPTED, +        AFR_SELF_HEAL_STARTED, +        AFR_SELF_HEAL_FAILED, +} afr_self_heal_status; +  typedef struct { +        afr_self_heal_status gfid_or_missing_entry_self_heal; +        afr_self_heal_status metadata_self_heal; +        afr_self_heal_status data_self_heal; +        afr_self_heal_status entry_self_heal; +} afr_sh_status_for_all_type; + + +struct afr_self_heal_ {          /* External interface: These are variables (some optional) that             are set by whoever has triggered self-heal */ @@ -249,7 +263,6 @@ typedef struct {          const char *linkname;          gf_boolean_t entries_skipped; -        int   op_failed;          gf_boolean_t actual_sh_started;          gf_boolean_t sync_done;          gf_boolean_t data_lock_held; @@ -264,13 +277,15 @@ typedef struct {          afr_post_remove_call_t post_remove_call;          loc_t parent_loc; -          call_frame_t *orig_frame;          call_frame_t *old_loop_frame;          gf_boolean_t unwound;          afr_sh_algo_private_t *private; +        afr_sh_status_for_all_type  afr_all_sh_status; +	void (*afr_set_self_heal_status) (struct afr_self_heal_ *sh, +                                          afr_self_heal_status status);          struct afr_sh_algorithm  *algo;          afr_lock_cbk_t data_lock_success_handler;          afr_lock_cbk_t data_lock_failure_handler; @@ -282,7 +297,9 @@ typedef struct {          void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);          call_frame_t *sh_frame; -} afr_self_heal_t; +}; + +typedef struct afr_self_heal_ afr_self_heal_t;  typedef enum {          AFR_DATA_TRANSACTION,          /* truncate, write, ... */ @@ -408,7 +425,7 @@ typedef struct _afr_local {          unsigned int enoent_count; -        unsigned int govinda_gOvinda; +        unsigned int unhealable;          unsigned int read_child_index;          unsigned char read_child_returned;  | 
