diff options
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 762 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 10 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 1194 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.h | 24 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 320 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 61 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 77 | 
11 files changed, 1663 insertions, 802 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f49d8c55e2a..69e980a03d5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -60,6 +60,117 @@  #define AFR_ICTX_SPLIT_BRAIN_MASK      0x0000000100000000ULL  #define AFR_ICTX_READ_CHILD_MASK       0x00000000FFFFFFFFULL +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, +                                gf_boolean_t fail_conflict); +gf_boolean_t +afr_is_child_present (int32_t *success_children, int32_t child_count, +                      int32_t child) +{ +        gf_boolean_t             success_child = _gf_false; +        int                      i = 0; + +        GF_ASSERT (child < child_count); + +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                if (child == success_children[i]) { +                        success_child = _gf_true; +                        break; +                } +        } +        return success_child; +} + +gf_boolean_t +afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child) +{ +        gf_boolean_t             source_xattrs = _gf_false; + +        GF_ASSERT (child < child_count); + +        if ((child >= 0) && (child < child_count) && +             sources[child]) { +                source_xattrs = _gf_true; +        } +        return source_xattrs; +} + +gf_boolean_t +afr_is_read_child (int32_t *success_children, int32_t *sources, +                   int32_t child_count, int32_t child) +{ +        gf_boolean_t             success_child = _gf_false; +        gf_boolean_t             source        = _gf_false; + +        GF_ASSERT (success_children); +        GF_ASSERT (child_count > 0); + +        success_child = afr_is_child_present (success_children, child_count, +                                              child); +        if (!success_child) +                goto out; +        if (NULL == sources) { +                source = _gf_true; +                goto out; +        } +        source = afr_is_source_child (sources, child_count, child); +out: +        return (success_child && source); +} + +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count) +{ +        int     i = 0; + +        for (i = 0; i < child_count; i++) +                dst[i] = src[i]; +} + +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path) +{ +        int             i           = 0; +        afr_private_t   *priv       = NULL; +        int             ret         = 0; + +        priv   = this->private; + +        for (i = 0; i < priv->child_count; i++) { +                ret = dict_set_uint64 (xattr_req, priv->pending_key[i], +                                       3 * sizeof(int32_t)); +                if (ret < 0) +                        gf_log (this->name, GF_LOG_WARNING, +                                "%s: Unable to set dict value for %s", +                                path, priv->pending_key[i]); +                /* 3 = data+metadata+entry */ +        } +} + +int +afr_errno_count (int32_t *children, int *child_errno, +                 unsigned int child_count, int32_t op_errno) +{ +        int i = 0; +        int errno_count = 0; +        int child = 0; + +        for (i = 0; i < child_count; i++) { +                if (children) { +                        child = children[i]; +                        if (child == -1) +                                break; +                } else { +                        child = i; +                } +                if (child_errno[child] == op_errno) +                        errno_count++; +        } +        return errno_count; +} +  int32_t  afr_set_dict_gfid (dict_t *dict, uuid_t gfid)  { @@ -267,9 +378,22 @@ out:  } -/** - * afr_local_cleanup - cleanup everything in frame->local - */ +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count) +{ +        unsigned int i = 0; + +        if (!xattr) +                goto out; +        for (i = 0; i < child_count; i++) { +                if (xattr[i]) { +                        dict_unref (xattr[i]); +                        xattr[i] = NULL; +                } +        } +out: +        return; +}  void  afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) @@ -285,13 +409,14 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)          if (sh->buf)                  GF_FREE (sh->buf); +        if (sh->parentbufs) +                GF_FREE (sh->parentbufs); + +        if (sh->inode) +                inode_unref (sh->inode); +          if (sh->xattr) { -                for (i = 0; i < priv->child_count; i++) { -                        if (sh->xattr[i]) { -                                dict_unref (sh->xattr[i]); -                                sh->xattr[i] = NULL; -                        } -                } +                afr_reset_xattr (sh->xattr, priv->child_count);                  GF_FREE (sh->xattr);          } @@ -331,6 +456,9 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)          if (sh->child_success)                  GF_FREE (sh->child_success); +        if (sh->fresh_parent_dirs) +                GF_FREE (sh->fresh_parent_dirs); +          loc_wipe (&sh->parent_loc);  } @@ -374,10 +502,13 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)  } +/** + * afr_local_cleanup - cleanup everything in frame->local + */ +  void  afr_local_cleanup (afr_local_t *local, xlator_t *this)  { -        int i = 0;          afr_private_t * priv = NULL;          if (!local) @@ -402,12 +533,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)          { /* lookup */                  if (local->cont.lookup.xattrs) { -                        for (i = 0; i < priv->child_count; i++) { -                                if (local->cont.lookup.xattrs[i]) { -                                        dict_unref (local->cont.lookup.xattrs[i]); -                                        local->cont.lookup.xattrs[i] = NULL; -                                } -                        } +                        afr_reset_xattr (local->cont.lookup.xattrs, +                                         priv->child_count);                          GF_FREE (local->cont.lookup.xattrs);                          local->cont.lookup.xattrs = NULL;                  } @@ -585,25 +712,6 @@ afr_deitransform (ino64_t ino, int child_count)  } -int -afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this) -{ -        afr_local_t *local = NULL; - -        local = frame->local; - -        if (local->govinda_gOvinda && local->cont.lookup.inode) { -                afr_set_split_brain (this, local->cont.lookup.inode, _gf_true); -        } - -        AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, -                          local->cont.lookup.inode, &local->cont.lookup.buf, -                          local->cont.lookup.xattr, -                          &local->cont.lookup.postparent); - -        return 0; -} -  void  afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)  { @@ -613,13 +721,14 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)          dict_t          **xattr = NULL;          GF_ASSERT (local); -        GF_ASSERT (local->cont.lookup.read_child >= 0);          buf = &local->cont.lookup.buf;          postparent = &local->cont.lookup.postparent;          xattr = &local->cont.lookup.xattr; -        read_child = local->cont.lookup.read_child; +        read_child = afr_read_child (this, local->cont.lookup.inode); +        gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d", +                read_child);          *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);          *buf = local->cont.lookup.bufs[read_child];          *postparent = local->cont.lookup.postparents[read_child]; @@ -630,8 +739,7 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)          }  } - - static void +static void  afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,                              int child_index, dict_t *xattr)  { @@ -742,6 +850,8 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)                  local->self_heal.need_metadata_self_heal = _gf_true;                  local->self_heal.need_data_self_heal     = _gf_true;                  local->self_heal.need_entry_self_heal    = _gf_true; +                local->self_heal.need_gfid_self_heal    = _gf_true; +                local->self_heal.need_missing_entry_self_heal    = _gf_true;                  gf_log(this->name, GF_LOG_INFO,                         "entries are missing in lookup of %s.",                         local->loc.path); @@ -749,14 +859,15 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)                  goto out;          } -        if (local->success_count > 0) { -                if (afr_is_split_brain (this, local->cont.lookup.inode) && -                    IA_ISREG (local->cont.lookup.inode->ia_type)) { -                        local->self_heal.need_data_self_heal = _gf_true; -                        gf_log (this->name, GF_LOG_WARNING, -                                "split brain detected during lookup of %s.", -                                local->loc.path); -                } +        if ((local->success_count > 0) && +            afr_is_split_brain (this, local->cont.lookup.inode) && +            IA_ISREG (local->cont.lookup.inode->ia_type)) { +                local->self_heal.need_data_self_heal = _gf_true; +                local->self_heal.need_gfid_self_heal    = _gf_true; +                local->self_heal.need_missing_entry_self_heal    = _gf_true; +                gf_log (this->name, GF_LOG_WARNING, +                        "split brain detected during lookup of %s.", +                        local->loc.path);          }  out: @@ -769,49 +880,62 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)          GF_ASSERT (sh);          GF_ASSERT (priv); -        return ((priv->data_self_heal && sh->need_data_self_heal) +        return (sh->need_gfid_self_heal +                || sh->need_missing_entry_self_heal +                || (priv->data_self_heal && sh->need_data_self_heal)                  || (priv->metadata_self_heal && sh->need_metadata_self_heal)                  || (priv->entry_self_heal && sh->need_entry_self_heal));  } -gf_boolean_t -afr_is_self_heal_enabled (afr_private_t *priv) +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type)  { -        GF_ASSERT (priv); +        afr_transaction_type    type = AFR_METADATA_TRANSACTION; -        return (priv->data_self_heal || priv->metadata_self_heal -                || priv->entry_self_heal); +        GF_ASSERT (ia_type != IA_INVAL); + +        if (IA_ISDIR (ia_type)) { +                type = AFR_ENTRY_TRANSACTION; +        } else if (IA_ISREG (ia_type)) { +                type = AFR_DATA_TRANSACTION; +        } +        return type;  }  int  afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,                                int32_t *read_child)  { -        int32_t                 source = -1; -        ia_type_t               ia_type = 0; -        int                     ret = -1; -        afr_transaction_type    type = AFR_METADATA_TRANSACTION; -        dict_t                  **xattrs = NULL; -        int32_t                 *child_success = NULL; -        struct iatt             *bufs = NULL; +        ia_type_t               ia_type        = IA_INVAL; +        int32_t                 source         = -1; +        int                     ret            = -1; +        dict_t                  **xattrs       = NULL; +        int32_t                 *success_children = NULL; +        struct iatt             *bufs          = NULL; +        afr_transaction_type    type           = AFR_METADATA_TRANSACTION;          GF_ASSERT (local);          GF_ASSERT (this);          bufs = local->cont.lookup.bufs; -        child_success = local->cont.lookup.child_success; -        ia_type = local->cont.lookup.bufs[child_success[0]].ia_type; -        if (IA_ISDIR (ia_type)) { -                type = AFR_ENTRY_TRANSACTION; -        } else if (IA_ISREG (ia_type)) { -                type = AFR_DATA_TRANSACTION; -        } +        success_children = local->cont.lookup.child_success; +        /*We can take the success_children[0] only because we already +         *handle the conflicting children other wise, we could select the +         *read_child based on wrong file type +         */ +        ia_type = local->cont.lookup.bufs[success_children[0]].ia_type; +        type = afr_transaction_type_get (ia_type);          xattrs = local->cont.lookup.xattrs;          source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,                                                             type); -        if (source < 0) +        if (source < 0) { +                gf_log (this->name, GF_LOG_DEBUG, "failed to select source " +                        "for %s", local->loc.path);                  goto out; +        } +        gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s", +                source, local->loc.path);          *read_child = source;          ret = 0;  out: @@ -828,7 +952,10 @@ afr_is_self_heal_running (afr_local_t *local)  static void  afr_launch_self_heal (call_frame_t *frame, xlator_t *this,                        gf_boolean_t is_background, ia_type_t ia_type, -                      int (*unwind) (call_frame_t *frame, xlator_t *this)) +                      inode_t *inode, +                      void (*gfid_sh_success_cbk) (call_frame_t*, xlator_t*), +                      int (*unwind) (call_frame_t *frame, xlator_t *this, +                                     int32_t op_ret, int32_t op_errno))  {          afr_local_t             *local = NULL;          char                    sh_type_str[256] = {0,}; @@ -840,6 +967,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,          local->self_heal.background = is_background;          local->self_heal.type       = ia_type;          local->self_heal.unwind     = unwind; +        local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;          afr_self_heal_type_str_get (&local->self_heal,                                      sh_type_str, @@ -849,11 +977,142 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,                  "background %s self-heal triggered. path: %s",                  sh_type_str, local->loc.path); -        afr_self_heal (frame, this); +        afr_self_heal (frame, this, inode); +} + +int +afr_gfid_missing_count (const char *xlator_name, int32_t *success_children, +                        struct iatt *bufs, unsigned int child_count, +                        const char *path) +{ +        int             gfid_miss_count   = 0; +        int             i              = 0; +        struct iatt     *child1        = NULL; + +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                child1 = &bufs[success_children[i]]; +                if (uuid_is_null (child1->ia_gfid)) { +                        gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null" +                                " on subvolume %d", path, success_children[i]); +                        gfid_miss_count++; +                } +        } + +        return gfid_miss_count; +} + +static int +afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this) +{ +        int32_t         *success_children = NULL; +        afr_private_t   *priv          = NULL; +        struct iatt     *bufs          = NULL; +        int             miss_count     = 0; + +        priv = this->private; +        bufs = local->cont.lookup.bufs; +        success_children = local->cont.lookup.child_success; + +        miss_count =  afr_gfid_missing_count (this->name, success_children, +                                              bufs, priv->child_count, +                                              local->loc.path); +        return miss_count; +} + +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, +                        unsigned int child_count, const char *path, +                        const char *xlator_name) +{ +        gf_boolean_t    conflicting    = _gf_false; +        int             i              = 0; +        struct iatt     *child1        = NULL; +        struct iatt     *child2        = NULL; +        uuid_t          *gfid          = NULL; +        char            gfid_str[64]   = {0}; + +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                child1 = &bufs[success_children[i]]; +                if ((!gfid) && (!uuid_is_null (child1->ia_gfid))) +                        gfid = &child1->ia_gfid; + +                if (i == 0) +                        continue; + +                child2 = &bufs[success_children[i-1]]; +                if (FILETYPE_DIFFERS (child1, child2)) { +                        gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype " +                                "differs on subvolumes (%d, %d)", path, +                                success_children[i-1], success_children[i]); +                        conflicting = _gf_true; +                        goto out; +                } +                if (!gfid || uuid_is_null (child1->ia_gfid)) +                        continue; +                if (uuid_compare (*gfid, child1->ia_gfid)) { +                        uuid_utoa_r (*gfid, gfid_str); +                        gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs" +                                " on subvolume %d (%s, %s)", path, +                                success_children[i], gfid_str, +                                uuid_utoa (child1->ia_gfid)); +                        conflicting = _gf_true; +                        goto out; +                } +        } +out: +        return conflicting; +} + +/* afr_update_gfid_from_iatts: This function should be called only if the + * iatts are not conflicting. + */ +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, +                            int32_t *success_children, unsigned int child_count) +{ +        uuid_t          *gfid = NULL; +        int             i = 0; +        int             child = 0; + +        for (i = 0; i < child_count; i++) { +                child = success_children[i]; +                if (child == -1) +                        break; +                if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) { +                        gfid = &bufs[child].ia_gfid; +                } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) { +                        if (uuid_compare (*gfid, bufs[child].ia_gfid)) { +                                GF_ASSERT (0); +                                goto out; +                        } +                } +        } +        if (gfid && (!uuid_is_null (*gfid))) +                uuid_copy (uuid, *gfid); +out: +        return; +} + +static gf_boolean_t +afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this) +{ +        afr_private_t           *priv = NULL; +        gf_boolean_t            conflict = _gf_false; + +        priv = this->private; +        conflict =  afr_conflicting_iattrs (local->cont.lookup.bufs, +                                            local->cont.lookup.child_success, +                                            priv->child_count, local->loc.path, +                                            this->name); +        return conflict;  }  static void -afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) +afr_lookup_set_self_heal_data (afr_local_t *local, xlator_t *this)  {          int                     i = 0;          struct iatt             *bufs = NULL; @@ -862,8 +1121,20 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)          int32_t                 child1 = -1;          int32_t                 child2 = -1; +        priv  = this->private;          afr_detect_self_heal_by_lookup_status (local, this); +        if (afr_lookup_gfid_missing_count (local, this)) +                local->self_heal.need_gfid_self_heal    = _gf_true; + +        if (_gf_true == afr_lookup_conflicting_entries (local, this)) +                local->self_heal.need_missing_entry_self_heal    = _gf_true; +        else +                afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req, +                                            local->cont.lookup.bufs, +                                            local->cont.lookup.child_success, +                                            priv->child_count); +          bufs = local->cont.lookup.bufs;          for (i = 1; i < local->success_count; i++) {                  child1 = local->cont.lookup.child_success[i-1]; @@ -873,7 +1144,6 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)          }          xattr = local->cont.lookup.xattrs; -        priv  = this->private;          for (i = 0; i < local->success_count; i++) {                  child1 = local->cont.lookup.child_success[i];;                  afr_lookup_detect_self_heal_by_xattr (local, this, @@ -881,6 +1151,74 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)          }  } +int +afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, +                             int32_t op_ret, int32_t op_errno) +{ +        afr_local_t *local = NULL; + +        local = frame->local; + +        if (op_ret == -1) { +                local->op_ret = -1; +                if (afr_error_more_important (local->op_errno, op_errno)) +                        local->op_errno = op_errno; + +                goto out; +        } else { +                local->op_ret = 0; +        } + +        afr_lookup_done_success_action (frame, this, _gf_true); +out: +        AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, +                          local->cont.lookup.inode, &local->cont.lookup.buf, +                          local->cont.lookup.xattr, +                          &local->cont.lookup.postparent); + +        return 0; +} + +//TODO: At the moment only lookup needs this, so not doing any checks, in the +// future we will have to do fop specific operations +void +afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this) +{ +        afr_local_t             *local = NULL; +        afr_local_t             *sh_local = NULL; +        afr_private_t           *priv = NULL; +        afr_self_heal_t         *sh = NULL; +        int                     i = 0; +        struct iatt             *lookup_bufs = NULL; +        struct iatt             *lookup_parentbufs = NULL; + +        sh_local = sh_frame->local; +        sh       = &sh_local->self_heal; +        local = sh->orig_frame->local; +        lookup_bufs = local->cont.lookup.bufs; +        lookup_parentbufs = local->cont.lookup.postparents; +        priv = this->private; + +        memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf)); +        memcpy (lookup_parentbufs, sh->parentbufs, +                priv->child_count * sizeof (*sh->parentbufs)); + +        afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count); +        if (local->cont.lookup.xattr) { +                dict_unref (local->cont.lookup.xattr); +                local->cont.lookup.xattr = NULL; +        } + +        for (i = 0; i < priv->child_count; i++) { +                if (sh->xattr[i]) +                        local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]); +        } +        afr_reset_children (local->cont.lookup.child_success, +                            priv->child_count); +        afr_children_copy (local->cont.lookup.child_success, +                           sh->fresh_children, priv->child_count); +} +  static void  afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,                                          gf_boolean_t *sh_launched) @@ -901,20 +1239,15 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,                  goto out;          } -        if (_gf_false == afr_is_self_heal_enabled (priv)) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "Self heal is not enabled"); -                goto out; -        } - -        afr_lookup_detect_self_heal (local, this); +        afr_lookup_set_self_heal_data (local, this);          if (afr_can_self_heal_proceed (&local->self_heal, priv)) { -                if  (afr_is_self_heal_running (local)) { +                if  (afr_is_self_heal_running (local))                          goto out; -                }                  afr_launch_self_heal (frame, this, _gf_true,                                        local->cont.lookup.buf.ia_type, +                                      local->cont.lookup.inode, +                                      afr_post_gfid_sh_success,                                        afr_self_heal_lookup_unwind);                  *sh_launched = _gf_true;          } @@ -922,46 +1255,6 @@ out:          return;  } -static gf_boolean_t -afr_lookup_split_brain (afr_local_t *local, xlator_t *this) -{ -        int             i              = 0; -        gf_boolean_t    symptom        = _gf_false; -        struct iatt     *bufs          = NULL; -        int32_t         *child_success = NULL; -        struct iatt     *child1        = NULL; -        struct iatt     *child2        = NULL; -        const char      *path          = NULL; - -        bufs = local->cont.lookup.bufs; -        child_success = local->cont.lookup.child_success; -        for (i = 1; i < local->success_count; i++) { -                child1 = &bufs[child_success[i-1]]; -                child2 = &bufs[child_success[i]]; -                /* -                 * TODO: gfid self-heal -                 * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) { -                 *        gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs" -                 *                " on subvolumes (%d, %d)", local->loc.path, -                 *                child_success[i-1], child_success[i]); -                 *        symptom = _gf_true; -                 * } -                 */ - -                if (FILETYPE_DIFFERS (child1, child2)) { -                        path = local->loc.path; -                        gf_log (this->name, GF_LOG_WARNING, "%s: filetype " -                                "differs on subvolumes (%d, %d)", path, -                                child_success[i-1], child_success[i]); -                        symptom = _gf_true; -                        local->govinda_gOvinda = 1; -                } -                if (symptom) -                        break; -        } -        return symptom; -} -  static int  afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_child)  { @@ -973,6 +1266,60 @@ afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_chil          return 0;  } +int +afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, +                                gf_boolean_t fail_conflict) +{ +        int32_t             read_child = -1; +        int32_t             ret        = -1; +        afr_local_t         *local     = NULL; +        afr_private_t       *priv      = NULL; + +        local   = frame->local; +        priv    = this->private; + +        if (local->loc.parent == NULL) +                fail_conflict = _gf_true; + +        if (afr_conflicting_iattrs (local->cont.lookup.bufs, +                                    local->cont.lookup.child_success, +                                    priv->child_count, local->loc.path, +                                     this->name)) { +                if (fail_conflict == _gf_false) { +                        ret = 0; +                } else { +                        local->op_ret = -1; +                        local->op_errno = EIO; +                } +                goto out; +        } + +        ret = afr_lookup_select_read_child (local, this, &read_child); +        if (ret) { +                local->op_ret = -1; +                local->op_errno = EIO; +                goto out; +        } + +        ret = afr_lookup_set_read_child (local, this, read_child); +        if (ret) { +                local->op_ret = -1; +                local->op_errno = EIO; +                goto out; +        } + +        afr_lookup_build_response_params (local, this); +        if (afr_is_fresh_lookup (&local->loc, this)) { +                afr_update_loc_gfids (&local->loc, +                                      &local->cont.lookup.buf, +                                      &local->cont.lookup.postparent); +        } + +        ret = 0; +out: +        return ret; +} +  static void  afr_lookup_done (call_frame_t *frame, xlator_t *this)  { @@ -981,44 +1328,44 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)          afr_local_t         *local = NULL;          int                 ret = -1;          gf_boolean_t        sh_launched = _gf_false; -        int32_t             read_child = -1; +        int                 gfid_miss_count = 0; +        int                 enotconn_count = 0; +        int                 up_children_count = 0;          priv  = this->private;          local = frame->local;          if (local->op_ret < 0)                  goto unwind; - -        if (_gf_true == afr_lookup_split_brain (local, this)) { -                local->op_ret = -1; -                local->op_errno = EIO; -                goto unwind; -        } - -        ret = afr_lookup_select_read_child (local, this, &read_child); -        if (ret) { +        gfid_miss_count = afr_lookup_gfid_missing_count (local, this); +        up_children_count = afr_up_children_count (priv->child_count, +                                                   local->child_up); +        enotconn_count = priv->child_count - up_children_count; +        if ((gfid_miss_count == local->success_count) && +            (enotconn_count > 0)) {                  local->op_ret = -1;                  local->op_errno = EIO; +                gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, " +                        "LOOKUP on a file without gfid is not allowed when " +                        "some of the children are down", local->loc.path);                  goto unwind;          } -        ret = afr_lookup_set_read_child (local, this, read_child); +        ret = afr_lookup_done_success_action (frame, this, _gf_false);          if (ret)                  goto unwind; - -        afr_lookup_build_response_params (local, this); -        if (afr_is_fresh_lookup (&local->loc, this)) { -                afr_update_loc_gfids (&local->loc, &local->cont.lookup.buf, -                                      &local->cont.lookup.postparent); -        } +        uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);          afr_lookup_perform_self_heal_if_needed (frame, this, &sh_launched); -        if (sh_launched) +        if (sh_launched) {                  unwind = 0; +                goto unwind; +        } +   unwind:           if (unwind) {                   AFR_STACK_UNWIND (lookup, frame, local->op_ret, -                                  local->op_errno, local->cont.lookup.inode, +                                   local->op_errno, local->cont.lookup.inode,                                     &local->cont.lookup.buf,                                     local->cont.lookup.xattr,                                     &local->cont.lookup.postparent); @@ -1034,8 +1381,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)   *   */ -static gf_boolean_t -__error_more_important (int32_t old_errno, int32_t new_errno) +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno)  {          gf_boolean_t ret = _gf_true; @@ -1050,6 +1397,28 @@ __error_more_important (int32_t old_errno, int32_t new_errno)          return ret;  } +int32_t +afr_resultant_errno_get (int32_t *children, +                         int *child_errno, unsigned int child_count) +{ +        int     i = 0; +        int32_t op_errno = 0; +        int     child = 0; + +        for (i = 0; i < child_count; i++) { +                if (children) { +                        child = children[i]; +                        if (child == -1) +                                break; +                } else { +                        child = i; +                } +                if (afr_error_more_important (op_errno, child_errno[child])) +                                op_errno = child_errno[child]; +        } +        return op_errno; +} +  static void  afr_lookup_handle_error (afr_local_t *local, int32_t op_ret,  int32_t op_errno)  { @@ -1057,7 +1426,7 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret,  int32_t op_errno)          if (op_errno == ENOENT)                  local->enoent_count++; -        if (__error_more_important (local->op_errno, op_errno)) +        if (afr_error_more_important (local->op_errno, op_errno))                  local->op_errno = op_errno;          if (local->op_errno == ESTALE) {                  local->op_ret = -1; @@ -1196,7 +1565,6 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)          local->cont.lookup.child_success = child_success; -        local->cont.lookup.read_child = -1;          ret = 0;  out:          return ret; @@ -1208,6 +1576,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,  {          afr_private_t    *priv           = NULL;          afr_local_t      *local          = NULL; +        void              *gfid_req      = NULL;          int               ret            = -1;          int               i              = 0;          int               call_count     = 0; @@ -1277,23 +1646,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this,          else                  local->xattr_req = dict_ref (xattr_req); -        for (i = 0; i < priv->child_count; i++) { -                ret = dict_set_uint64 (local->xattr_req, priv->pending_key[i], -                                       3 * sizeof(int32_t)); -                if (ret < 0) -                        gf_log (this->name, GF_LOG_WARNING, -                                "%s: Unable to set dict value for %s", -                                loc->path, priv->pending_key[i]); -                /* 3 = data+metadata+entry */ -        } - +        afr_xattr_req_prepare (this, local->xattr_req, loc->path);          ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);          if (ret < 0) {                  gf_log (this->name, GF_LOG_WARNING,                          "%s: Unable to set dict value for %s",                          loc->path, GLUSTERFS_INODELK_COUNT);          } -          ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);          if (ret < 0) {                  gf_log (this->name, GF_LOG_WARNING, @@ -1301,6 +1660,16 @@ afr_lookup (call_frame_t *frame, xlator_t *this,                          loc->path, GLUSTERFS_ENTRYLK_COUNT);          } +        ret = dict_get_ptr (xattr_req, "gfid-req", &gfid_req); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "failed to get the gfid from dict"); +        } else { +                uuid_copy (local->cont.lookup.gfid_req, gfid_req); +        } +        if (local->loc.parent != NULL) +                dict_del (xattr_req, "gfid-req"); +          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) {                          STACK_WIND_COOKIE (frame, afr_lookup_cbk, @@ -2810,3 +3179,86 @@ afr_notify (xlator_t *this, int32_t event,  out:          return ret;  } +void +afr_reset_children (int32_t *fresh_children, int32_t child_count) +{ +        unsigned int i = 0; +        for (i = 0; i < child_count; i++) +                fresh_children[i] = -1; +} + +int32_t* +afr_fresh_children_create (int32_t child_count) +{ +        int32_t           *fresh_children = NULL; +        int               i               = 0; + +        GF_ASSERT (child_count > 0); + +        fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children), +                                    gf_afr_mt_int32_t); +        if (NULL == fresh_children) +                goto out; +        for (i = 0; i < child_count; i++) +                fresh_children[i] = -1; +out: +        return fresh_children; +} + +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, +                              int32_t child_count) +{ +        gf_boolean_t child_found = _gf_false; +        int          i               = 0; + +        for (i = 0; i < child_count; i++) { +                if (fresh_children[i] == -1) +                        break; +                if (fresh_children[i] == child) { +                        child_found = _gf_true; +                        break; +                } +        } +        if (!child_found) { +                GF_ASSERT (i < child_count); +                fresh_children[i] = child; +        } +} + +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count) +{ +        int count = 0; +        int i = 0; + +        for (i = 0; i < child_count; i++) { +                if (fresh_children[i] == -1) +                        break; +                count++; +        } +        return count; +} + +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, +                        int32_t *fresh_children, unsigned int child_count) +{ +        unsigned int i = 0; +        unsigned int j = 0; + +        GF_ASSERT (success_children); +        GF_ASSERT (sources); +        GF_ASSERT (fresh_children); + +        afr_reset_children (fresh_children, child_count); +        for (i = 0; i < child_count; i++) { +                if (success_children[i] == -1) +                        break; +                if (afr_is_read_child (success_children, sources, child_count, +                                       success_children[i])) { +                        fresh_children[j] = success_children[i]; +                        j++; +                } +        } +} diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 3f056b68628..9e799b85813 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -51,7 +51,8 @@  int -afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, +                           int32_t op_errno)  {          afr_local_t *local  = NULL; @@ -177,7 +178,7 @@ out:                                  " forced merge option set",                                  sh_type_str, local->loc.path); -                        afr_self_heal (frame, this); +                        afr_self_heal (frame, this, local->fd->inode);                  } else {                          afr_set_opendir_done (this, local->fd->inode); diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 3212c1e9563..c6f68a072c3 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -54,6 +54,8 @@ afr_build_parent_loc (loc_t *parent, loc_t *child)          char *tmp = NULL;          if (!child->parent) { +                //this should never be called with root as the child +                GF_ASSERT (0);                  loc_copy (parent, child);                  return;          } diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index c68c025dd79..306f5a85af0 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -277,7 +277,8 @@ __unopened_count (int child_count, unsigned int *opened_on, unsigned char *child  int -afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this) +afr_openfd_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret, +                      int32_t op_errno)  {          afr_local_t   *local      = NULL;          afr_private_t *priv       = NULL; @@ -418,9 +419,8 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this)          GF_ASSERT (local->loc.path);          /* forcibly trigger missing-entries self-heal */ -        local->success_count    = 1; -        local->enoent_count     = 1; - +        sh->need_missing_entry_self_heal = _gf_true; +        sh->need_gfid_self_heal = _gf_true;          sh->data_lock_held      = _gf_true;          sh->need_data_self_heal = _gf_true;          sh->type                = local->fd->inode->ia_type; @@ -434,7 +434,7 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this)                  "path: %s, reason: Replicate up down flush, data lock is held",                  sh_type_str, local->loc.path); -        afr_self_heal (frame, this); +        afr_self_heal (frame, this, local->fd->inode);          return 0;  } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b29deb8bca2..dc660e19888 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -27,6 +27,21 @@  #include "afr-self-heal.h"  #include "pump.h" +//Intersection[child]=1 if child is part of intersection +void +afr_children_intersection_get (int32_t *set1, int32_t *set2, +                               int *intersection, unsigned int child_count) +{ +        int                      i = 0; + +        memset (intersection, 0, sizeof (*intersection) * child_count); +        for (i = 0; i < child_count; i++) { +                intersection[i] = afr_is_child_present (set1, child_count, i) +                                     && afr_is_child_present (set2, child_count, +                                                              i); +        } +} +  /**   * select_source - select a source and return it   */ @@ -71,6 +86,14 @@ afr_sh_source_count (int sources[], int child_count)  }  void +afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) +{ +        sh->op_ret = -1; +        if (afr_error_more_important (sh->op_errno, op_errno)) +                sh->op_errno = op_errno; +} + +void  afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)  {          afr_private_t *  priv = this->private; @@ -186,31 +209,6 @@ out:          return ret;  } - -/** - * mark_sources: Mark all 'source' nodes and return number of source - * nodes found - * - * A node (a row in the pending matrix) belongs to one of - * three categories: - * - * M is the pending matrix. - * - * 'innocent' - M[i] is all zeroes - * 'fool'     - M[i] has i'th element = 1 (self-reference) - * 'wise'     - M[i] has i'th element = 0, others are 1 or 0. - * - * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is - * needed. - * - * A 'wise' node can be a source. If two 'wise' nodes conflict, it is - * a split-brain. If one wise node refers to the other but the other doesn't - * refer back, the referrer is a source. - * - * All fools are sinks, unless there are no 'wise' nodes. In that case, - * one of the fools is made a source. - */ -  typedef enum {          AFR_NODE_INNOCENT,          AFR_NODE_FOOL, @@ -585,6 +583,60 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,  }  int +afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs, +                   int32_t **pending_matrix, int32_t *sources, +                   int32_t *success_children, afr_transaction_type type) +{ +        afr_private_t           *priv = NULL; +        afr_self_heal_type      sh_type    = AFR_SELF_HEAL_INVALID; +        int                     nsources   = -1; + +        priv = this->private; + +        if (afr_get_children_count (success_children, priv->child_count) == 0) +                goto out; + +        afr_build_pending_matrix (priv->pending_key, pending_matrix, +                                  xattr, type, priv->child_count); + +        sh_type = afr_self_heal_type_for_transaction (type); +        if (AFR_SELF_HEAL_INVALID == sh_type) +                goto out; + +        afr_sh_print_pending_matrix (pending_matrix, this); + +        nsources = afr_mark_sources (sources, pending_matrix, bufs, +                                     priv->child_count, sh_type, +                                     success_children, this->name); +out: +        return nsources; +} + +/** + * mark_sources: Mark all 'source' nodes and return number of source + * nodes found + * + * A node (a row in the pending matrix) belongs to one of + * three categories: + * + * M is the pending matrix. + * + * 'innocent' - M[i] is all zeroes + * 'fool'     - M[i] has i'th element = 1 (self-reference) + * 'wise'     - M[i] has i'th element = 0, others are 1 or 0. + * + * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is + * needed. + * + * A 'wise' node can be a source. If two 'wise' nodes conflict, it is + * a split-brain. If one wise node refers to the other but the other doesn't + * refer back, the referrer is a source. + * + * All fools are sinks, unless there are no 'wise' nodes. In that case, + * one of the fools is made a source. + */ + +int  afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,                    int32_t child_count, afr_self_heal_type type,                    int32_t *valid_children, const char *xlator_name) @@ -886,7 +938,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)                  sh->xattr[i] = NULL;          } -        if (local->govinda_gOvinda) { +        if (local->govinda_gOvinda || sh->op_failed) {                  gf_log (this->name, GF_LOG_INFO,                          "split brain found, aborting selfheal of %s",                          local->loc.path); @@ -904,7 +956,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)  static int -sh_missing_entries_finish (call_frame_t *frame, xlator_t *this) +afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -918,524 +970,751 @@ sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)          return 0;  } - -static int -sh_destroy_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                int32_t op_ret, int op_errno, -                struct iatt *preop, struct iatt *postop) +static void +afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie, +                                   xlator_t *this, +                                   int32_t op_ret, int32_t op_errno, +                                   inode_t *inode, struct iatt *buf, +                                   dict_t *xattr, struct iatt *postparent)  { -        afr_local_t *local      = NULL; -        loc_t       *parent_loc = cookie; -        int          call_count = 0; +        int              child_index = 0; +        afr_local_t     *local = NULL; +        afr_private_t   *priv = NULL; +        afr_self_heal_t *sh = NULL;          local = frame->local; +        priv = this->private; +        sh   = &local->self_heal; +        child_index = (long) cookie; -        if (op_ret == -1) { -                gf_log (this->name, GF_LOG_INFO, -                        "setattr on %s failed: %s", -                        local->loc.path, strerror (op_errno)); +        LOCK (&frame->lock); +        { +                if (op_ret == 0) { +                        sh->buf[child_index] = *buf; +                        sh->parentbuf        = *postparent; +                        sh->parentbufs[child_index] = *postparent; +                        sh->child_success[sh->success_count] = child_index; +                        sh->success_count++; +                        sh->xattr[child_index] = dict_ref (xattr); +                } else { +                        gf_log (this->name, GF_LOG_ERROR, +                                "path %s on subvolume %s => -1 (%s)", +                                local->loc.path, +                                priv->children[child_index]->name, +                                strerror (op_errno)); +                        local->self_heal.child_errno[child_index] = op_errno; +                }          } +        UNLOCK (&frame->lock); +        return; +} -        if (parent_loc) { -                loc_wipe (parent_loc); -                GF_FREE (parent_loc); +gf_boolean_t +afr_valid_ia_type (ia_type_t ia_type) +{ +        switch (ia_type) { +        case IA_IFSOCK: +        case IA_IFREG: +        case IA_IFBLK: +        case IA_IFCHR: +        case IA_IFIFO: +        case IA_IFLNK: +        case IA_IFDIR: +                return _gf_true; +        default: +                return _gf_false;          } +        return _gf_false; +} -        call_count = afr_frame_return (frame); +void +afr_sh_call_entry_impunge_recreate (call_frame_t *frame, xlator_t *this, +                                    int child_index, struct iatt *buf, +                                    struct iatt *postparent, +                                    afr_impunge_done_cbk_t impunge_done) +{ +        call_frame_t    *impunge_frame = NULL; +        afr_local_t     *local = NULL; +        afr_local_t     *impunge_local = NULL; +        afr_self_heal_t *sh = NULL; +        afr_self_heal_t *impunge_sh = NULL; +        int32_t         op_errno = 0; -        if (call_count == 0) { -                STACK_DESTROY (frame->root); +        impunge_frame = copy_frame (frame); +        if (!impunge_frame) { +                op_errno = ENOMEM; +                goto out;          } -        return 0; -} +        ALLOC_OR_GOTO (impunge_local, afr_local_t, out); +        local = frame->local; +        sh = &local->self_heal; +        impunge_frame->local = impunge_local; +        impunge_sh = &impunge_local->self_heal; +        impunge_sh->sh_frame = frame; +        impunge_sh->active_source = sh->source; +        impunge_sh->impunging_entry_mode = st_mode_from_ia (buf->ia_prot, +                                                            buf->ia_type); +        impunge_sh->impunge_ret_child = child_index; +        loc_copy (&impunge_local->loc, &local->loc); +        sh->impunge_done = impunge_done; +        impunge_local->call_count = 1; +        afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf, +                                     postparent); +        return; +out: +        gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, reason: %s", +                local->loc.path, strerror (op_errno)); +        impunge_done (frame, this, child_index, -1, op_errno); +} -static int -sh_missing_entries_newentry_cbk (call_frame_t *frame, void *cookie, -                                 xlator_t *this, -                                 int32_t op_ret, int32_t op_errno, -                                 inode_t *inode, struct iatt *buf, -                                 struct iatt *preparent, -                                 struct iatt *postparent) -{ -        afr_local_t     *local         = NULL; -        afr_self_heal_t *sh            = NULL; -        afr_private_t   *priv          = NULL; -        call_frame_t    *setattr_frame = NULL; -        int              call_count    = 0; -        int              child_index   = 0; -        loc_t           *parent_loc    = NULL; -        struct iatt      stbuf         = {0,}; -        int32_t          valid         = 0; +int +afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this, int child, +                         int32_t op_ret, int32_t op_errno) +{ +        int             call_count = 0; +        afr_local_t     *local = NULL;          local = frame->local; -        sh    = &local->self_heal; -        priv  = this->private; -        child_index = (long) cookie; - -        stbuf.ia_atime = sh->buf[sh->source].ia_atime; -        stbuf.ia_atime_nsec = sh->buf[sh->source].ia_atime_nsec; -        stbuf.ia_mtime = sh->buf[sh->source].ia_mtime; -        stbuf.ia_mtime_nsec = sh->buf[sh->source].ia_mtime_nsec; +        if (op_ret == -1) +                gf_log (this->name, GF_LOG_ERROR, +                        "create entry %s failed, on child %d reason, %s", +                        local->loc.path, child, strerror (op_errno)); +        call_count = afr_frame_return (frame); +        if (call_count == 0) +                afr_sh_missing_entries_finish (frame, this); +        return 0; +} -        stbuf.ia_uid = sh->buf[sh->source].ia_uid; -        stbuf.ia_gid = sh->buf[sh->source].ia_gid; +static int +sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int              type = 0; +        afr_private_t   *priv = NULL; +        int             enoent_count = 0; +        int             i = 0; +        struct iatt     *buf = NULL; +        struct iatt     *postparent = NULL; -        valid = GF_SET_ATTR_UID   | GF_SET_ATTR_GID | -                GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; -        if (op_ret == -1) { +        enoent_count = afr_errno_count (NULL, sh->child_errno, +                                        priv->child_count, ENOENT); +        if (enoent_count == 0) {                  gf_log (this->name, GF_LOG_INFO, -                        "%s: failed to mknod on %s (%s)", -                        local->loc.path, priv->children[child_index]->name, -                        strerror (op_errno)); +                        "no missing files - %s. proceeding to metadata check", +                        local->loc.path); +                /* proceed to next step - metadata self-heal */ +                afr_sh_missing_entries_finish (frame, this); +                return 0;          } -        if (op_ret == 0) { -                setattr_frame = copy_frame (frame); - -                setattr_frame->local = GF_CALLOC (1, sizeof (afr_local_t), -                                                  gf_afr_mt_afr_local_t); +        buf = &sh->buf[sh->source]; +        postparent = &sh->parentbufs[sh->source]; -                ((afr_local_t *)setattr_frame->local)->call_count = 2; - -                gf_log (this->name, GF_LOG_TRACE, -                        "setattr (%s) on subvolume %s", -                        local->loc.path, priv->children[child_index]->name); - -                STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, -                                   (void *) (long) 0, -                                   priv->children[child_index], -                                   priv->children[child_index]->fops->setattr, -                                   &local->loc, &stbuf, valid); - -                valid      = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME; -                parent_loc = GF_CALLOC (1, sizeof (*parent_loc), -                                        gf_afr_mt_loc_t); -                afr_build_parent_loc (parent_loc, &local->loc); - -                STACK_WIND_COOKIE (setattr_frame, sh_destroy_cbk, -                                   (void *) (long) parent_loc, -                                   priv->children[child_index], -                                   priv->children[child_index]->fops->setattr, -                                   parent_loc, &sh->parentbuf, valid); +        type = buf->ia_type; +        if (!afr_valid_ia_type (type)) { +                gf_log (this->name, GF_LOG_ERROR, +                        "%s: unknown file type: 0%o", local->loc.path, type); +                local->govinda_gOvinda = 1; +                afr_sh_missing_entries_finish (frame, this); +                goto out;          } -        call_count = afr_frame_return (frame); - -        if (call_count == 0) { -                sh_missing_entries_finish (frame, this); +        local->call_count = enoent_count; +        for (i = 0; i < priv->child_count; i++) { +                //If !child_up errno will be zero +                if (sh->child_errno[i] != ENOENT) +                        continue; +                afr_sh_call_entry_impunge_recreate (frame, this, i, +                                                    buf, postparent, +                                                    afr_sh_create_entry_cbk); +                enoent_count--;          } - +        GF_ASSERT (enoent_count == 0); +out:          return 0;  } - -static int -sh_missing_entries_mknod (call_frame_t *frame, xlator_t *this) +void +afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL;          afr_private_t   *priv = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; -        mode_t           st_mode = 0; -        dev_t            ia_rdev = 0; -        dict_t          *dict = NULL; -        dev_t            st_rdev = 0; +        int32_t         op_errno = 0; +        ia_type_t       ia_type = IA_INVAL; +        int32_t         nsources = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; +        if (afr_get_children_count (sh->child_success, +                                    priv->child_count) == 0) { +                op_errno = afr_resultant_errno_get (NULL, sh->child_errno, +                                                    priv->child_count); +                goto out; +        } -        call_count = enoent_count; -        local->call_count = call_count; +        if (afr_gfid_missing_count (this->name, sh->child_success, +                                    sh->buf, priv->child_count, +                                    local->loc.path) || +            afr_conflicting_iattrs (sh->buf, sh->child_success, +                                    priv->child_count, local->loc.path, +                                    this->name)) { +                //this can happen if finding the fresh parent dir failed +                local->govinda_gOvinda = 1; +                sh->op_failed = 1; +                op_errno = EIO; +                goto out; +        } -        st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, -                                   sh->buf[sh->source].ia_type); -        ia_rdev  = sh->buf[sh->source].ia_rdev; -        st_rdev = makedev (ia_major (ia_rdev), ia_minor (ia_rdev)); +        //now No chance for the ia_type to conflict +        ia_type = sh->buf[sh->child_success[0]].ia_type; +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->child_success, +                                      afr_transaction_type_get (ia_type)); +        if (nsources < 0) { +                gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                op_errno = EIO; +                goto out; +        } -        gf_log (this->name, GF_LOG_TRACE, -                "mknod %s mode 0%o device type %"PRId64" on %d subvolumes", -                local->loc.path, st_mode, (uint64_t)st_rdev, enoent_count); +        afr_get_fresh_children (sh->child_success, sh->sources, +                                sh->fresh_children, priv->child_count); +        sh->source = sh->fresh_children[0]; +        if (sh->source == -1) { +                gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); +                op_errno = EIO; +                goto out; +        } -        dict = dict_new (); -        if (!dict) -                gf_log (this->name, GF_LOG_ERROR, "out of memory"); +        if (sh->gfid_sh_success_cbk) +                sh->gfid_sh_success_cbk (frame, this); +        sh_missing_entries_create (frame, this); +        return; +out: +        afr_sh_set_error (sh, op_errno); +        afr_sh_missing_entries_finish (frame, this); +        return; +} -        ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed", -                        local->loc.path); +static int +afr_sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, +                                   xlator_t *this, int32_t op_ret, +                                   int32_t op_errno, inode_t *inode, +                                   struct iatt *buf, dict_t *xattr, +                                   struct iatt *postparent) +{ +        int                     call_count = 0; +        afr_local_t             *local = NULL; +        afr_self_heal_t         *sh = NULL; +        afr_private_t           *priv = NULL; -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_newentry_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->mknod, -                                           &local->loc, st_mode, st_rdev, dict); -                        if (!--call_count) -                                break; -                } -        } +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; -        if (dict) -                dict_unref (dict); +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent); +        call_count = afr_frame_return (frame); + +        if (call_count == 0) +                afr_sh_missing_entries_lookup_done (frame, this);          return 0;  } - -static int -sh_missing_entries_mkdir (call_frame_t *frame, xlator_t *this) +int +afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child, +                         int32_t op_ret, int32_t op_errno)  { +        int             call_count = 0;          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL; -        dict_t          *dict = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; -        mode_t           st_mode = 0;          local = frame->local;          sh = &local->self_heal; -        priv = this->private; - -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; -        call_count = enoent_count; -        local->call_count = call_count; - -        st_mode = st_mode_from_ia (sh->buf[sh->source].ia_prot, -                                   sh->buf[sh->source].ia_type); - -        dict = dict_new (); -        if (!dict) { +        GF_ASSERT (sh->post_remove_call); +        if ((op_ret == -1) && (op_errno != ENOENT)) {                  gf_log (this->name, GF_LOG_ERROR, -                        "Out of memory"); -                sh_missing_entries_finish (frame, this); -                return 0; +                        "purge entry %s failed, on child %d reason, %s", +                        local->loc.path, child, strerror (op_errno)); +                LOCK (&frame->lock); +                { +                        afr_sh_set_error (sh, EIO); +                        sh->op_failed = 1; +                } +                UNLOCK (&frame->lock);          } +        call_count = afr_frame_return (frame); +        if (call_count == 0) +                sh->post_remove_call (frame, this); +        return 0; +} -        ret = afr_set_dict_gfid (dict, sh->buf[sh->source].ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_INFO, -                        "%s: inode gfid set failed", local->loc.path); +void +afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this, +                                  int child_index, struct iatt *buf, +                                  afr_expunge_done_cbk_t expunge_done) +{ +        call_frame_t    *expunge_frame = NULL; +        afr_local_t     *local = NULL; +        afr_local_t     *expunge_local = NULL; +        afr_self_heal_t *sh = NULL; +        afr_self_heal_t *expunge_sh = NULL; +        int32_t         op_errno = 0; +        expunge_frame = copy_frame (frame); +        if (!expunge_frame) { +                goto out; +        } -        gf_log (this->name, GF_LOG_TRACE, -                "mkdir %s mode 0%o on %d subvolumes", -                local->loc.path, st_mode, enoent_count); +        ALLOC_OR_GOTO (expunge_local, afr_local_t, out); -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        if (!strcmp (local->loc.path, "/")) { -                                /* We shouldn't try to create "/" */ +        local = frame->local; +        sh = &local->self_heal; +        expunge_frame->local = expunge_local; +        expunge_sh = &expunge_local->self_heal; +        expunge_sh->sh_frame = frame; +        loc_copy (&expunge_local->loc, &local->loc); +        sh->expunge_done = expunge_done; +        afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf); +        return; +out: +        gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s", +                local->loc.path, strerror (op_errno)); +        expunge_done (frame, this, child_index, -1, op_errno); +} -                                sh_missing_entries_finish (frame, this); +void +afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children, +                                 int32_t *fresh_children, +                                 unsigned int child_count) +{ +        int     i = 0; -                                return 0; -                        } else { -                                STACK_WIND_COOKIE (frame, -                                                   sh_missing_entries_newentry_cbk, -                                                   (void *) (long) i, -                                                   priv->children[i], -                                                   priv->children[i]->fops->mkdir, -                                                   &local->loc, st_mode, dict); -                                if (!--call_count) -                                        break; -                        } +        for (i = 0; i < child_count; i++) { +                if (afr_is_child_present (success_children, child_count, i) && +                    !afr_is_child_present (fresh_children, child_count, i)) { +                        sh->child_errno[i] = ENOENT; +                        GF_ASSERT (sh->xattr[i]); +                        dict_unref (sh->xattr[i]); +                        sh->xattr[i] = NULL;                  }          } +} -        if (dict) -                dict_unref (dict); +int +afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t         *local    = NULL; +        afr_self_heal_t     *sh       = NULL; +        afr_private_t       *priv     = NULL; +        local    = frame->local; +        sh       = &local->self_heal; +        priv     = this->private; + +        if (sh->op_failed) { +                afr_sh_missing_entries_finish (frame, this); +        } else { +                if (afr_gfid_missing_count (this->name, sh->fresh_children, +                                            sh->buf, priv->child_count, +                                            local->loc.path)) { +                        afr_sh_common_lookup (frame, this, &local->loc, +                                              afr_sh_missing_entries_lookup_cbk, +                                              _gf_true); +                } else { +                        //No need to set gfid so goto missing entries lookup done +                        //Behave as if you have done the lookup +                        afr_sh_remove_stale_lookup_info (sh, +                                                         sh->child_success, +                                                         sh->fresh_children, +                                                         priv->child_count); +                        afr_children_copy (sh->child_success, +                                           sh->fresh_children, +                                           priv->child_count); +                        afr_sh_missing_entries_lookup_done (frame, this); +                } +        }          return 0;  } - -static int -sh_missing_entries_symlink (call_frame_t *frame, xlator_t *this, -                            const char *link, struct iatt *buf) +gf_boolean_t +afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv, +                              int child)  { -        afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL; -        dict_t          *dict = NULL; -        int              i = 0; -        int              ret = 0; -        int              enoent_count = 0; -        int              call_count = 0; - -        local = frame->local;          sh = &local->self_heal; -        priv = this->private; - -        for (i = 0; i < priv->child_count; i++) -                if (sh->child_errno[i] == ENOENT) -                        enoent_count++; -        call_count = enoent_count; -        local->call_count = call_count; +        if (local->child_up[child] && +            (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count, +                                    child)) +            && (sh->child_errno[child] != ENOENT)) +                return _gf_true; -        dict = dict_new (); -        if (!dict) { -                gf_log (this->name, GF_LOG_ERROR, -                        "Out of memory"); -                sh_missing_entries_finish (frame, this); -                return 0; -        } +        return _gf_false; +} -        ret = afr_set_dict_gfid (dict, buf->ia_gfid); -        if (ret) -                gf_log (this->name, GF_LOG_DEBUG, -                        "%s: dict gfid set failed", local->loc.path); +gf_boolean_t +afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv, +                                    int child) +{ +        afr_self_heal_t *sh = NULL; -        gf_log (this->name, GF_LOG_TRACE, -                "symlink %s -> %s on %d subvolumes", -                local->loc.path, link, enoent_count); +        sh = &local->self_heal; -        for (i = 0; i < priv->child_count; i++) { -                if (sh->child_errno[i] == ENOENT) { -                        STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_newentry_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->symlink, -                                           link, &local->loc, dict); -                        if (!--call_count) -                                break; -                } -        } +        if (local->child_up[child] && +            (!afr_is_child_present (sh->fresh_children, priv->child_count, +                                    child)) +             && (sh->child_errno[child] != ENOENT)) +                return _gf_true; -        return 0; +        return _gf_false;  } - -static int -sh_missing_entries_readlink_cbk (call_frame_t *frame, void *cookie, -                                 xlator_t *this, -                                 int32_t op_ret, int32_t op_errno, -                                 const char *link, struct iatt *sbuf) +void +afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this, +                           gf_boolean_t purge_condition (afr_local_t *local, +                                                         afr_private_t *priv, +                                                         int child))  {          afr_local_t     *local = NULL; -        afr_self_heal_t *sh = NULL;          afr_private_t   *priv = NULL; +        afr_self_heal_t *sh = NULL; +        int             i = 0; +        int             call_count = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; -        if (op_ret > 0) -                sh_missing_entries_symlink (frame, this, link, sbuf); -        else { -                gf_log (this->name, GF_LOG_INFO, -                        "%s: failed to do readlink on %s (%s)", -                        local->loc.path, priv->children[sh->source]->name, -                        strerror (op_errno)); -                sh_missing_entries_finish (frame, this); +        for (i = 0; i < priv->child_count; i++) { +                if (purge_condition (local, priv, i)) +                        call_count++;          } -        return 0; -} +        if (call_count == 0) { +                sh->post_remove_call (frame, this); +                goto out; +        } +        local->call_count = call_count; +        for (i = 0; i < priv->child_count; i++) { +                if (!purge_condition (local, priv, i)) +                        continue; +                afr_sh_call_entry_expunge_remove (frame, this, +                                                  (long) i, &sh->buf[i], +                                                  afr_sh_remove_entry_cbk); +        } +out: +        return; +} -static int -sh_missing_entries_readlink (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        afr_private_t   *priv = NULL;          local = frame->local;          sh = &local->self_heal; -        priv = this->private; +        sh->post_remove_call = afr_sh_missing_entries_finish; -        STACK_WIND (frame, sh_missing_entries_readlink_cbk, -                    priv->children[sh->source], -                    priv->children[sh->source]->fops->readlink, -                    &local->loc, 4096); - -        return 0; +        afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);  } - -static int -sh_missing_entries_create (call_frame_t *frame, xlator_t *this) +void +afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; -        int              type = 0; -        int              i = 0;          afr_private_t   *priv = NULL; -        int              enoent_count = 0; -        int              govinda_gOvinda = 0; +        int             i = 0;          local = frame->local;          sh = &local->self_heal;          priv = this->private; +        sh->post_remove_call = afr_sh_purge_stale_entries_done; +          for (i = 0; i < priv->child_count; i++) { -                if (!local->child_up[i]) +                if (afr_is_child_present (sh->fresh_children, +                                          priv->child_count, i))                          continue; -                if (sh->child_errno[i]) { -                        if (sh->child_errno[i] == ENOENT) -                                enoent_count++; -                } else { -                        if (type) { -                                if (type != sh->buf[i].ia_type) { -                                        gf_log (this->name, GF_LOG_DEBUG, -                                                "file %s is not recoverable " -                                                "automatically!", -                                                local->loc.path); - -                                        govinda_gOvinda = 1; -                                } -                        } else { -                                sh->source = i; -                                type = sh->buf[i].ia_type; -                        } -                } -        } +                if ((!local->child_up[i]) || sh->child_errno[i] != 0) +                        continue; -        if (govinda_gOvinda) { -                gf_log (this->name, GF_LOG_ERROR, -                        "conflicting filetypes exist for path %s. returning.", -                        local->loc.path); +                GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) || +                           uuid_is_null (sh->buf[i].ia_gfid)); -                local->govinda_gOvinda = 1; -                sh_missing_entries_finish (frame, this); -                return 0; -        } +                if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) || +                    (uuid_compare (sh->buf[i].ia_gfid, +                                   sh->entrybuf.ia_gfid))) +                        continue; + +                afr_fresh_children_add_child (sh->fresh_children, +                                              i, priv->child_count); -        if (!type) { -                gf_log (this->name, GF_LOG_ERROR, -                        "no source found for %s. all nodes down?. returning.", -                        local->loc.path); -                /* subvolumes down and/or file does not exist */ -                sh_missing_entries_finish (frame, this); -                return 0;          } +        afr_sh_purge_entry_common (frame, this, +                                   afr_sh_purge_stale_entry_condition); +} -        if (enoent_count == 0) { -                gf_log (this->name, GF_LOG_INFO, -                        "no missing files - %s. proceeding to metadata check", -                        local->loc.path); -                /* proceed to next step - metadata self-heal */ -                sh_missing_entries_finish (frame, this); -                return 0; +void +afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs, +                                     struct iatt *save, +                                     unsigned int child_count) +{ +        int             i = 0; +        int             child = 0; +        gf_boolean_t    saved = _gf_false; + +        GF_ASSERT (save); +        //if iatt buf with gfid exists sets it +        for (i = 0; i < child_count; i++) { +                child = children[i]; +                if (child == -1) +                        break; +                *save = bufs[child]; +                saved = _gf_true; +                if (!uuid_is_null (save->ia_gfid)) +                        break;          } +        GF_ASSERT (saved); +} -        switch (type) { -        case IA_IFSOCK: -        case IA_IFREG: -        case IA_IFBLK: -        case IA_IFCHR: -        case IA_IFIFO: -                sh_missing_entries_mknod (frame, this); -                break; -        case IA_IFLNK: -                sh_missing_entries_readlink (frame, this); -                break; -        case IA_IFDIR: -                sh_missing_entries_mkdir (frame, this); -                break; -        default: -                gf_log (this->name, GF_LOG_ERROR, -                        "%s: unknown file type: 0%o", local->loc.path, type); +void +afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t      *local = NULL; +        afr_self_heal_t  *sh = NULL; +        afr_private_t    *priv = NULL; +        int32_t          fresh_child_enoents = 0; +        int32_t          fresh_parent_count = 0; +        int32_t          op_errno = 0; + +        local = frame->local; +        sh = &local->self_heal; +        priv = this->private; + +        if (afr_get_children_count (sh->child_success, +                                    priv->child_count) == 0) { +                op_errno = afr_resultant_errno_get (NULL, sh->child_errno, +                                                    priv->child_count); +                goto fail; +        } + +        //make intersection of (success_children & fresh_parent_dirs) fresh_children +        //the other success_children will be added to it if they are not stale +        afr_children_intersection_get (sh->child_success, +                                       sh->fresh_parent_dirs, +                                       sh->sources, priv->child_count); +        afr_get_fresh_children (sh->child_success, sh->sources, +                                sh->fresh_children, priv->child_count); +        memset (sh->sources, 0, sizeof (*sh->sources) * priv->child_count); + +        fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs, +                                                     priv->child_count); +        //we need the enoent count of the subvols present in fresh_parent_dirs +        fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs, +                                               sh->child_errno, +                                               priv->child_count, ENOENT); +        if (fresh_child_enoents == fresh_parent_count) { +                afr_sh_set_error (sh, ENOENT); +                sh->op_failed = 1; +                afr_sh_purge_entry (frame, this); +        } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children, +                                            priv->child_count, local->loc.path, +                                            this->name)) { +                afr_sh_save_child_iatts_from_policy (sh->fresh_children, +                                                     sh->buf, &sh->entrybuf, +                                                     priv->child_count); +                afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf, +                                            sh->fresh_children, +                                            priv->child_count); +                afr_sh_purge_stale_entry (frame, this); +        } else { +                op_errno = EIO;                  local->govinda_gOvinda = 1; -                sh_missing_entries_finish (frame, this); +                goto fail;          } -        return 0; -} +        return; +fail: +        afr_sh_set_error (sh, op_errno); +        afr_sh_missing_entries_finish (frame, this); +        return; +}  static int -sh_missing_entries_lookup_cbk (call_frame_t *frame, void *cookie, -                               xlator_t *this, -                               int32_t op_ret, int32_t op_errno, -                               inode_t *inode, struct iatt *buf, dict_t *xattr, -                               struct iatt *postparent) +afr_sh_children_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, inode_t *inode, +                            struct iatt *buf, dict_t *xattr, +                            struct iatt *postparent)  { -        int              child_index = 0; -        afr_local_t     *local = NULL;          int              call_count = 0; + +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent); +        call_count = afr_frame_return (frame); + +        if (call_count == 0) +                afr_sh_children_lookup_done (frame, this); + +        return 0; +} + +static int +afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_t *sh  = NULL;          afr_private_t   *priv = NULL; -        mode_t           st_mode = 0; +        afr_local_t     *local = NULL; +        int             enoent_count = 0; +        int             nsources = 0; +        int             source  = -1;          local = frame->local; +        sh = &local->self_heal;          priv = this->private; -        child_index = (long) cookie; - -        if (buf) -                st_mode = st_mode_from_ia (buf->ia_prot, buf->ia_type); +        /* If We can't find a fresh parent directory here, +         * we wont know which subvol is correct without finding a parent dir +         * upwards which has correct xattrs, for that we may have to +         * do lookups till root, we dont wanna do that, +         * instead make sure that if there are conflicting gfid +         * parent dirs, self-heal thus lookup is failed with EIO. +         * if there are missing entries we dont know whether to delete or +         * create so fail with EIO, +         * If there are conflicting xattr fail with EIO. +         */ +        if (afr_get_children_count (sh->child_success, +                                    priv->child_count) == 0) { +                gf_log (this->name, GF_LOG_ERROR, "Parent dir lookup failed " +                        "for %s, in missing entry self-heal, continuing with " +                        "the rest of the self-heals", local->loc.path); +                goto out; +        } -        LOCK (&frame->lock); -        { -                if (op_ret == 0) { -                        gf_log (this->name, GF_LOG_TRACE, -                                "path %s on subvolume %s is of mode 0%o", -                                local->loc.path, -                                priv->children[child_index]->name, -                                st_mode); +        enoent_count = afr_errno_count (NULL, sh->child_errno, +                                        priv->child_count, ENOENT); +        if (enoent_count > 0) { +                gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                goto out; +        } -                        local->self_heal.buf[child_index] = *buf; -                        local->self_heal.parentbuf        = *postparent; -                } else { -                        gf_log (this->name, GF_LOG_INFO, -                                "path %s on subvolume %s => -1 (%s)", -                                local->loc.path, -                                priv->children[child_index]->name, -                                strerror (op_errno)); +        if (afr_conflicting_iattrs (sh->buf, sh->child_success, +                                    priv->child_count, sh->parent_loc.path, +                                    this->name)) { +                gf_log (this->name, GF_LOG_INFO, "conflicting stat info for " +                        "parent dirs of %s", local->loc.path); +                goto out; +        } -                        local->self_heal.child_errno[child_index] = op_errno; -                } +        nsources = afr_build_sources (this, sh->xattr, sh->buf, +                                      sh->pending_matrix, sh->sources, +                                      sh->child_success, +                                      AFR_ENTRY_TRANSACTION); +        if (nsources < 0) { +                gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s," +                        " in missing entry self-heal, continuing with the rest" +                        " of the self-heals", local->loc.path); +                goto out; +        } +        source = afr_sh_select_source (sh->sources, priv->child_count); +        if (source == -1) { +                GF_ASSERT (0); +                gf_log (this->name, GF_LOG_DEBUG, "No active sources found."); +                goto out;          } -        UNLOCK (&frame->lock); +        afr_get_fresh_children (sh->child_success, sh->sources, +                                sh->fresh_parent_dirs, priv->child_count); +        afr_sh_common_lookup (frame, this, &local->loc, +                              afr_sh_children_lookup_cbk, _gf_false); +        return 0; + +out: +        afr_sh_set_error (sh, EIO); +        sh->op_failed = 1; +        afr_sh_missing_entries_finish (frame, this); +        return 0; +} +int +afr_sh_conflicting_entry_lookup_cbk (call_frame_t *frame, void *cookie, +                                     xlator_t *this, +                                     int32_t op_ret, int32_t op_errno, +                                     inode_t *inode, struct iatt *buf, +                                     dict_t *xattr, struct iatt *postparent) +{ +        int              call_count = 0; + +        afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret, +                                           op_errno, inode, buf, xattr, +                                           postparent);          call_count = afr_frame_return (frame); -        if (call_count == 0) { -                sh_missing_entries_create (frame, this); -        } +        if (call_count == 0) +                afr_sh_find_fresh_parents (frame, this);          return 0;  } +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count) +{ +        int             i = 0; -static int -sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this) +        for (i = 0; i < child_count; i++) { +                memset (&sh->buf[i], 0, sizeof (sh->buf[i])); +                memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i])); +                sh->child_errno[i] = 0; +        } +        memset (&sh->parentbuf, 0, sizeof (sh->parentbuf)); +        sh->success_count = 0; +        afr_reset_children (sh->child_success, child_count); +        afr_reset_children (sh->fresh_children, child_count); +        afr_reset_xattr (sh->xattr, child_count); +} + +/* afr self-heal state will be lost if this call is made + * please check the afr_sh_common_reset that is called in this function + */ +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, +                      afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid)  {          afr_local_t    *local = NULL;          int             i = 0;          int             call_count = 0;          afr_private_t  *priv = NULL;          dict_t         *xattr_req = NULL; -        int             ret = -1; +        afr_self_heal_t *sh = NULL;          local = frame->local;          priv  = this->private; +        sh    = &local->self_heal;          call_count = afr_up_children_count (priv->child_count,                                              local->child_up); @@ -1445,29 +1724,29 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)          xattr_req = dict_new();          if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "%s: failed to set value for %s", -                                        local->loc.path, priv->pending_key[i]); +                afr_xattr_req_prepare (this, xattr_req, loc->path); +                if (set_gfid) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "looking up %s with gfid: %s", +                                local->loc.path, uuid_utoa (sh->sh_gfid_req)); +                        GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); +                        afr_set_dict_gfid (xattr_req, sh->sh_gfid_req);                  }          } +        afr_sh_common_reset (sh, priv->child_count);          for (i = 0; i < priv->child_count; i++) {                  if (local->child_up[i]) { -                        gf_log (this->name, GF_LOG_TRACE, +                        gf_log (this->name, GF_LOG_DEBUG,                                  "looking up %s on subvolume %s",                                  local->loc.path, priv->children[i]->name);                          STACK_WIND_COOKIE (frame, -                                           sh_missing_entries_lookup_cbk, +                                           lookup_cbk,                                             (void *) (long) i,                                             priv->children[i],                                             priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); +                                           loc, xattr_req);                          if (!--call_count)                                  break; @@ -1483,13 +1762,15 @@ sh_missing_entries_lookup (call_frame_t *frame, xlator_t *this)  int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this) +afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; +        afr_self_heal_t     *sh       = NULL;          local    = frame->local;          int_lock = &local->internal_lock; +        sh       = &local->self_heal;          if (int_lock->lock_op_ret < 0) {                  gf_log (this->name, GF_LOG_INFO, @@ -1499,14 +1780,41 @@ afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)                  gf_log (this->name, GF_LOG_DEBUG,                          "Non blocking entrylks done. Proceeding to FOP"); -                sh_missing_entries_lookup (frame, this); +                afr_sh_common_lookup (frame, this, &sh->parent_loc, +                                      afr_sh_conflicting_entry_lookup_cbk, +                                      _gf_false);          }          return 0;  } -static int -afr_sh_entrylk (call_frame_t *frame, xlator_t *this) +int +afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this) +{ +        afr_internal_lock_t *int_lock = NULL; +        afr_local_t         *local    = NULL; + +        local    = frame->local; +        int_lock = &local->internal_lock; + +        if (int_lock->lock_op_ret < 0) { +                gf_log (this->name, GF_LOG_INFO, +                        "Non blocking entrylks failed."); +                afr_sh_missing_entries_done (frame, this); +        } else { +                gf_log (this->name, GF_LOG_DEBUG, +                        "Non blocking entrylks done. Proceeding to FOP"); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_missing_entries_lookup_cbk, +                                      _gf_true); +        } + +        return 0; +} + +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, +                char *base_name, afr_lock_cbk_t lock_cbk)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -1521,9 +1829,9 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)          afr_set_lock_number (frame, this); -        int_lock->lk_basename = local->loc.name; -        int_lock->lk_loc      = &sh->parent_loc; -        int_lock->lock_cbk    = afr_sh_post_nonblocking_entrylk_cbk; +        int_lock->lk_basename = base_name; +        int_lock->lk_loc      = loc; +        int_lock->lock_cbk    = lock_cbk;          afr_nonblocking_entrylk (frame, this); @@ -1531,7 +1839,8 @@ afr_sh_entrylk (call_frame_t *frame, xlator_t *this)  }  static int -afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this) +afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this, +                              afr_lock_cbk_t lock_cbk)  {          afr_internal_lock_t *int_lock = NULL;          afr_local_t         *local    = NULL; @@ -1547,9 +1856,27 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)                  "attempting to recreate missing entries for path=%s",                  local->loc.path); +        GF_ASSERT (local->loc.parent);          afr_build_parent_loc (&sh->parent_loc, &local->loc); -        afr_sh_entrylk (frame, this); +        afr_sh_entrylk (frame, this, &sh->parent_loc, NULL, +                        lock_cbk); +        return 0; +} + +static int +afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_parent_entrylk (frame, this, +                                      afr_sh_post_nb_entrylk_conflicting_sh_cbk); +        return 0; +} + +static int +afr_self_heal_gfids (call_frame_t *frame, xlator_t *this) +{ +        afr_self_heal_parent_entrylk (frame, this, +                                      afr_sh_post_nb_entrylk_gfid_sh_cbk);          return 0;  } @@ -1572,6 +1899,9 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)          shc = &lc->self_heal;          shc->unwind = sh->unwind; +        shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk; +        shc->need_missing_entry_self_heal = sh->need_missing_entry_self_heal; +        shc->need_gfid_self_heal = sh->need_gfid_self_heal;          shc->need_data_self_heal = sh->need_data_self_heal;          shc->need_metadata_self_heal = sh->need_metadata_self_heal;          shc->need_entry_self_heal = sh->need_entry_self_heal; @@ -1585,6 +1915,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)          shc->background = sh->background;          shc->type = sh->type; +        uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);          if (l->loc.path)                  loc_copy (&lc->loc, &l->loc); @@ -1640,18 +1971,16 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          afr_local_t *     local = NULL;          afr_self_heal_t * sh    = NULL;          char              sh_type_str[256] = {0,}; +        gf_boolean_t      split_brain = _gf_false;          priv  = this->private;          local = bgsh_frame->local;          sh    = &local->self_heal; -        if (local->govinda_gOvinda) { -                afr_set_split_brain (this, local->cont.lookup.inode, -                                     _gf_true); -        } else { -                afr_set_split_brain (this, local->cont.lookup.inode, -                                     _gf_false); -        } +        if (local->govinda_gOvinda) +                split_brain = _gf_true; + +        afr_set_split_brain (this, sh->inode, split_brain);          afr_self_heal_type_str_get(sh, sh_type_str,                                     sizeof(sh_type_str)); @@ -1668,7 +1997,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          FRAME_SU_UNDO (bgsh_frame, afr_local_t);          if (!sh->unwound) { -                sh->unwind (sh->orig_frame, this); +                sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);          }          if (sh->background) { @@ -1685,7 +2014,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)  }  int -afr_self_heal (call_frame_t *frame, xlator_t *this) +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)  {          afr_local_t     *local = NULL;          afr_self_heal_t *sh = NULL; @@ -1730,11 +2059,14 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)          sh              = &sh_local->self_heal;          sh->orig_frame  = frame; +        sh->inode = inode_ref (inode);          sh->completion_cbk = afr_self_heal_completion_cbk;          sh->buf = GF_CALLOC (priv->child_count, sizeof (struct iatt),                               gf_afr_mt_iatt); +        sh->parentbufs = GF_CALLOC (priv->child_count, sizeof (struct iatt), +                                    gf_afr_mt_iatt);          sh->child_errno = GF_CALLOC (priv->child_count, sizeof (int),                                       gf_afr_mt_int);          sh->success = GF_CALLOC (priv->child_count, sizeof (int), @@ -1763,13 +2095,17 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)                                                   priv->child_count,                                                   gf_afr_mt_int32_t);          } -        sh->child_success = GF_CALLOC (sizeof (*sh->child_success), -                                       priv->child_count, gf_afr_mt_int32_t); +        sh->child_success = afr_fresh_children_create (priv->child_count); +        sh->fresh_children = afr_fresh_children_create (priv->child_count); +        sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);          FRAME_SU_DO (sh_frame, afr_local_t); -        if (local->success_count && local->enoent_count) { -                afr_self_heal_missing_entries (sh_frame, this); +        if (sh->need_missing_entry_self_heal) { +                afr_self_heal_conflicting_entries (sh_frame, this); +        } else if (sh->need_gfid_self_heal) { +                GF_ASSERT (!uuid_is_null (sh->sh_gfid_req)); +                afr_self_heal_gfids (sh_frame, this);          } else {                  gf_log (this->name, GF_LOG_TRACE,                          "proceeding to metadata check on %s", @@ -1785,18 +2121,28 @@ void  afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,                              size_t size)  { -        GF_ASSERT (str && (size > strlen (" meta-data data entry"))); +        GF_ASSERT (str && (size > strlen (" missing-entry gfid " +                                          "meta-data data entry")));          if (self_heal_p->need_metadata_self_heal) { -                snprintf(str, size, " meta-data"); +                snprintf (str, size, " meta-data");          }          if (self_heal_p->need_data_self_heal) { -                snprintf(str + strlen(str), size - strlen(str), " data"); +                snprintf (str + strlen(str), size - strlen(str), " data");          }          if (self_heal_p->need_entry_self_heal) { -                snprintf(str + strlen(str), size - strlen(str), " entry"); +                snprintf (str + strlen(str), size - strlen(str), " entry"); +        } + +        if (self_heal_p->need_missing_entry_self_heal) { +                snprintf (str + strlen(str), size - strlen(str), +                         " missing-entry"); +        } + +        if (self_heal_p->need_gfid_self_heal) { +                snprintf (str + strlen(str), size - strlen(str), " gfid");          }  } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h index 676e933ae1c..043ebea2da6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.h +++ b/xlators/cluster/afr/src/afr-self-heal-common.h @@ -29,6 +29,11 @@ typedef enum {          AFR_SELF_HEAL_INVALID = -1,  } afr_self_heal_type; +typedef int +(*afr_lookup_cbk_t) (call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, inode_t *inode, +                     struct iatt *buf, dict_t *xattr, +                     struct iatt *postparent);  int  afr_sh_select_source (int sources[], int child_count); @@ -71,4 +76,23 @@ afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,  afr_self_heal_type  afr_self_heal_type_for_transaction (afr_transaction_type type); +int +afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs, +                   int32_t **pending_matrix, int32_t *sources, +                   int32_t *success_children, afr_transaction_type type); +void +afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count); +int +afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, +                      afr_lookup_cbk_t lookup_cbk, gf_boolean_t set_gfid); +int +afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this, +                             int active_src, struct iatt *buf); +int +afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc, +                char *base_name, afr_lock_cbk_t lock_cbk); +int +afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, +                             int child_index, struct iatt *buf, +                             struct iatt *postparent);  #endif /* __AFR_SELF_HEAL_COMMON_H__ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 38799db7042..6ce7106698d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -282,7 +282,7 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; -        gf_log (this->name, GF_LOG_TRACE, +        gf_log (this->name, GF_LOG_DEBUG,                  "finishing data selfheal of %s", local->loc.path);          if (!sh->data_lock_held) @@ -605,7 +605,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)                                       sh->child_success, this->name);          if (nsources == 0) { -                gf_log (this->name, GF_LOG_TRACE, +                gf_log (this->name, GF_LOG_DEBUG,                          "No self-heal needed for %s",                          local->loc.path); @@ -682,7 +682,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)                  orig_local->cont.lookup.xattr = dict_ref (orig_local->cont.lookup.xattrs[sh->source]);          if (sh->background) { -                sh->unwind (sh->orig_frame, this); +                sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);                  sh->unwound = _gf_true;          } @@ -793,7 +793,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          afr_private_t            *priv      = NULL;          int                      read_child = -1;          int                      ret        = -1; -        afr_self_heal_type       sh_type    = AFR_SELF_HEAL_INVALID;          int32_t                  **pending_matrix = NULL;          int32_t                  *sources         = NULL;          int32_t                  *valid_children  = NULL; @@ -802,6 +801,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          int32_t                  prev_read_child  = -1;          int32_t                  config_read_child = -1;          afr_self_heal_t          *sh = NULL; +        afr_self_heal_type       sh_type = AFR_SELF_HEAL_INVALID;          priv = this->private;          bufs = local->cont.lookup.bufs; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 8c619ff45a5..3977ae101f6 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -50,9 +50,6 @@  #include "afr-self-heal-common.h"  int -afr_sh_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this); - -int  afr_sh_entry_done (call_frame_t *frame, xlator_t *this)  {          afr_local_t     *local = NULL; @@ -362,7 +359,8 @@ afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,  int  afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this, -                                 int active_src) +                                 int active_src, int32_t op_ret, +                                 int32_t op_errno)  {          int              call_count = 0; @@ -385,21 +383,25 @@ afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,          afr_self_heal_t *expunge_sh    = NULL;          call_frame_t    *frame         = NULL;          int              active_src    = (long) cookie; +        afr_self_heal_t *sh            = NULL; +        afr_local_t     *local         = NULL;          priv          = this->private;          expunge_local = expunge_frame->local;          expunge_sh    = &expunge_local->self_heal;          frame         = expunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          if (op_ret != 0) { -                gf_log (this->name, GF_LOG_INFO, +                gf_log (this->name, GF_LOG_ERROR,                          "setattr on parent directory of %s on subvolume %s failed: %s",                          expunge_local->loc.path,                          priv->children[active_src]->name, strerror (op_errno));          }          AFR_STACK_DESTROY (expunge_frame); -        afr_sh_entry_expunge_entry_done (frame, this, active_src); +        sh->expunge_done (frame, this, active_src, op_ret, op_errno);          return 0;  } @@ -510,15 +512,17 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,          afr_private_t   *priv = NULL;          afr_local_t     *expunge_local = NULL;          afr_self_heal_t *expunge_sh = NULL; -        int              source = 0;          call_frame_t    *frame = NULL;          int              type = 0; +        afr_self_heal_t *sh            = NULL; +        afr_local_t     *local         = NULL;          priv = this->private;          expunge_local = expunge_frame->local;          expunge_sh = &expunge_local->self_heal;          frame = expunge_sh->sh_frame; -        source = expunge_sh->source; +        local         = frame->local; +        sh            = &local->self_heal;          type = buf->ia_type; @@ -538,7 +542,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,                  gf_log (this->name, GF_LOG_ERROR,                          "%s has unknown file type on %s: 0%o",                          expunge_local->loc.path, -                        priv->children[source]->name, type); +                        priv->children[active_src]->name, type);                  goto out;                  break;          } @@ -546,7 +550,7 @@ afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,          return 0;  out:          AFR_STACK_DESTROY (expunge_frame); -        afr_sh_entry_expunge_entry_done (frame, this, active_src); +        sh->expunge_done (frame, this, active_src, -1, EINVAL);          return 0;  } @@ -564,15 +568,19 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,          afr_self_heal_t *expunge_sh = NULL;          call_frame_t    *frame = NULL;          int              active_src = 0; +        afr_self_heal_t *sh            = NULL; +        afr_local_t     *local         = NULL;          priv = this->private;          expunge_local = expunge_frame->local;          expunge_sh = &expunge_local->self_heal;          frame = expunge_sh->sh_frame;          active_src = (long) cookie; +        local         = frame->local; +        sh            = &local->self_heal;          if (op_ret == -1) { -                gf_log (this->name, GF_LOG_DEBUG, +                gf_log (this->name, GF_LOG_ERROR,                          "lookup of %s on %s failed (%s)",                          expunge_local->loc.path,                          priv->children[active_src]->name, @@ -585,7 +593,7 @@ afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,          return 0;  out:          AFR_STACK_DESTROY (expunge_frame); -        afr_sh_entry_expunge_entry_done (frame, this, active_src); +        sh->expunge_done (frame, this, active_src, op_ret, op_errno);          return 0;  } @@ -628,7 +636,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,          call_frame_t    *frame = NULL;          int              active_src = 0;          int              need_expunge = 0; - +        afr_self_heal_t *sh            = NULL; +        afr_local_t     *local         = NULL;          priv = this->private;          expunge_local = expunge_frame->local; @@ -636,6 +645,8 @@ afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,          frame = expunge_sh->sh_frame;          active_src = expunge_sh->active_source;          source = (long) cookie; +        local         = frame->local; +        sh            = &local->self_heal;          if (op_ret == -1 && op_errno == ENOENT)                  need_expunge = 1; @@ -685,7 +696,7 @@ out:          }          AFR_STACK_DESTROY (expunge_frame); -        afr_sh_entry_expunge_entry_done (frame, this, active_src); +        sh->expunge_done (frame, this, active_src, op_ret, op_errno);          return 0;  } @@ -706,6 +717,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,          int              source = 0;          int              op_errno = 0;          char            *name = NULL; +        int             op_ret = -1;          priv = this->private;          local = frame->local; @@ -713,6 +725,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,          active_src = sh->active_source;          source = sh->source; +        sh->expunge_done = afr_sh_entry_expunge_entry_done;          name = entry->d_name; @@ -724,6 +737,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,                  gf_log (this->name, GF_LOG_TRACE,                          "skipping inspection of %s under %s",                          name, local->loc.path); +                op_ret = 0;                  goto out;          } @@ -733,6 +747,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,          expunge_frame = copy_frame (frame);          if (!expunge_frame) { +                op_errno = ENOMEM;                  goto out;          } @@ -746,6 +761,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,          ret = build_child_loc (this, &expunge_local->loc, &local->loc, name);          if (ret != 0) { +                op_errno = EINVAL;                  goto out;          } @@ -763,7 +779,7 @@ afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,          ret = 0;  out:          if (ret == -1) -                afr_sh_entry_expunge_entry_done (frame, this, active_src); +                sh->expunge_done (frame, this, active_src, op_ret, op_errno);          return 0;  } @@ -896,7 +912,8 @@ out:  int  afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this, -                                 int active_src) +                                 int active_src, int32_t op_ret, +                                 int32_t op_errno)  {          int              call_count = 0; @@ -924,6 +941,7 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,          call_frame_t    *frame = NULL;          int              active_src = 0;          int              child_index = 0; +        int32_t          impunge_ret_child = 0;          priv = this->private;          impunge_local = impunge_frame->local; @@ -931,7 +949,7 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,          frame = impunge_sh->sh_frame;          local = frame->local;          sh    = &local->self_heal; -        active_src = sh->active_source; +        active_src = impunge_sh->active_source;          child_index = (long) cookie;          if (op_ret == 0) { @@ -954,8 +972,10 @@ afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -972,9 +992,8 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,          afr_local_t     *impunge_local = NULL;          afr_self_heal_t *impunge_sh = NULL;          int              child_index = 0; - -        struct iatt stbuf; -        int32_t     valid = 0; +        struct iatt      stbuf = {0}; +        int32_t          valid = 0;          priv          = this->private;          impunge_local = impunge_frame->local; @@ -1066,6 +1085,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,          int32_t          valid            = 0;          loc_t           *parent_loc       = NULL;          struct iatt      parentbuf        = {0,}; +        int32_t          impunge_ret_child = 0;          priv = this->private;          impunge_local = impunge_frame->local; @@ -1073,7 +1093,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,          frame = impunge_sh->sh_frame;          local = frame->local;          sh    = &local->self_heal; -        active_src = sh->active_source; +        active_src = impunge_sh->active_source;          child_index = (long) cookie; @@ -1115,6 +1135,7 @@ afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,                                     pending_array, 3 * sizeof (int32_t));          if (ret < 0) {                  GF_FREE (pending_array); +                pending_array = NULL;                  gf_log (this->name, GF_LOG_WARNING,                          "Unable to set dict value.");          } @@ -1162,8 +1183,10 @@ out:          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, -1, +                                  op_errno);          }          return 0; @@ -1262,14 +1285,23 @@ int  afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,                                int child_index, const char *linkname)  { -        afr_private_t *priv          = NULL; -        afr_local_t   *impunge_local = NULL; -        dict_t        *dict          = NULL; -        struct iatt   *buf           = NULL; -        int            ret           = 0; +        afr_private_t   *priv          = NULL; +        afr_local_t     *impunge_local = NULL; +        dict_t          *dict          = NULL; +        struct iatt     *buf           = NULL; +        int              ret           = 0; +        call_frame_t    *frame         = NULL; +        afr_local_t     *local         = NULL; +        afr_self_heal_t *sh            = NULL; +        afr_self_heal_t *impunge_sh    = NULL; +        int32_t          impunge_ret_child = 0;          priv = this->private;          impunge_local = impunge_frame->local; +        impunge_sh    = &impunge_local->self_heal; +        frame         = impunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          buf = &impunge_local->cont.symlink.buf; @@ -1277,7 +1309,11 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,          if (!dict) {                  gf_log (this->name, GF_LOG_ERROR,                          "Out of memory"); -                afr_sh_entry_impunge_entry_done (impunge_frame, this, 0); +                impunge_ret_child = impunge_sh->impunge_ret_child; +                AFR_STACK_DESTROY (impunge_frame); +                sh->impunge_done (impunge_frame, this, impunge_ret_child, -1, +                                  ENOMEM); +                goto out;          }          ret = afr_set_dict_gfid (dict, buf->ia_gfid); @@ -1299,7 +1335,7 @@ afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,          if (dict)                  dict_unref (dict); - +out:          return 0;  } @@ -1318,11 +1354,16 @@ afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,          call_frame_t    *frame = NULL;          int              call_count = -1;          int              active_src = -1; +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int32_t          impunge_ret_child = 0;          priv          = this->private;          impunge_local = impunge_frame->local;          impunge_sh    = &impunge_local->self_heal;          frame         = impunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          active_src    = impunge_sh->active_source;          child_index = (long) cookie; @@ -1348,8 +1389,10 @@ out:          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -1394,11 +1437,16 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki          call_frame_t    *frame = NULL;          int              call_count = -1;          int              active_src = -1; +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int32_t          impunge_ret_child = 0;          priv          = this->private;          impunge_local = impunge_frame->local;          impunge_sh    = &impunge_local->self_heal;          frame         = impunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          active_src    = impunge_sh->active_source;          child_index = (long) cookie; @@ -1444,8 +1492,10 @@ out:          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -1489,11 +1539,16 @@ afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,          call_frame_t    *frame = NULL;          int              call_count = -1;          int              active_src = -1; +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int32_t          impunge_ret_child = 0;          priv = this->private;          impunge_local = impunge_frame->local;          impunge_sh = &impunge_local->self_heal;          frame = impunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          active_src = impunge_sh->active_source;          child_index = (long) cookie; @@ -1520,8 +1575,10 @@ out:          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -1552,44 +1609,22 @@ afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,          return 0;  } -  int -afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, -                                          void *cookie, xlator_t *this, -                                          int32_t op_ret, int32_t op_errno, -                                          inode_t *inode, struct iatt *buf, -                                          dict_t *xattr,struct iatt *postparent) +afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this, +                             int child_index, struct iatt *buf, +                             struct iatt *postparent)  { -        afr_private_t   *priv = NULL;          afr_local_t     *impunge_local = NULL;          afr_self_heal_t *impunge_sh = NULL; -        int              active_src = 0; -        int              type = 0; -        int              child_index = 0; -        call_frame_t    *frame = NULL; -        int              call_count = 0; +        afr_private_t   *priv = NULL; +        ia_type_t       type = IA_INVAL; +        int             ret = 0; +        int             active_src = 0; -        priv = this->private;          impunge_local = impunge_frame->local;          impunge_sh = &impunge_local->self_heal; -        frame = impunge_sh->sh_frame; - -        child_index = (long) cookie; - -        active_src = impunge_sh->active_source; - -        if (op_ret != 0) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "looking up %s on %s (for %s) failed (%s)", -                        impunge_local->loc.path, -                        priv->children[active_src]->name, -                        priv->children[child_index]->name, -                        strerror (op_errno)); -                goto out; -        } -          impunge_sh->parentbuf = *postparent; - +        active_src = impunge_sh->active_source;          impunge_local->cont.lookup.buf = *buf;          afr_update_loc_gfids (&impunge_local->loc, buf, postparent); @@ -1617,10 +1652,58 @@ afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame,                          "%s has unknown file type on %s: 0%o",                          impunge_local->loc.path,                          priv->children[active_src]->name, type); -                goto out; +                ret = -1;                  break;          } +        return ret; +} + +int +afr_sh_entry_impunge_recreate_lookup_cbk (call_frame_t *impunge_frame, +                                          void *cookie, xlator_t *this, +                                          int32_t op_ret, int32_t op_errno, +                                          inode_t *inode, struct iatt *buf, +                                          dict_t *xattr,struct iatt *postparent) +{ +        afr_private_t   *priv = NULL; +        afr_local_t     *impunge_local = NULL; +        afr_local_t     *local = NULL; +        afr_self_heal_t *impunge_sh = NULL; +        afr_self_heal_t *sh = NULL; +        int              active_src = 0; +        int              child_index = 0; +        call_frame_t    *frame = NULL; +        int              call_count = 0; +        int              ret = 0; +        int32_t          impunge_ret_child = 0; + +        priv = this->private; +        impunge_local = impunge_frame->local; +        impunge_sh = &impunge_local->self_heal; +        frame = impunge_sh->sh_frame; +        local = frame->local; +        sh    = &local->self_heal; + +        child_index = (long) cookie; + +        active_src = impunge_sh->active_source; + +        if (op_ret != 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "looking up %s on %s (for %s) failed (%s)", +                        impunge_local->loc.path, +                        priv->children[active_src]->name, +                        priv->children[child_index]->name, +                        strerror (op_errno)); +                goto out; +        } + +        ret = afr_sh_entry_impunge_create (impunge_frame, this, child_index, buf, +                                           postparent); +        if (ret) +                goto out; +          return 0;  out: @@ -1631,8 +1714,10 @@ out:          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -1680,11 +1765,16 @@ afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,          int              child_index = 0;          call_frame_t    *frame = NULL;          int              active_src = 0; +        afr_local_t     *local = NULL; +        afr_self_heal_t *sh = NULL; +        int32_t          impunge_ret_child = 0;          priv = this->private;          impunge_local = impunge_frame->local;          impunge_sh = &impunge_local->self_heal;          frame = impunge_sh->sh_frame; +        local         = frame->local; +        sh            = &local->self_heal;          child_index = (long) cookie;          active_src = impunge_sh->active_source; @@ -1730,8 +1820,10 @@ afr_sh_entry_impunge_entry_cbk (call_frame_t *impunge_frame, void *cookie,          UNLOCK (&impunge_frame->lock);          if (call_count == 0) { +                impunge_ret_child = impunge_sh->impunge_ret_child;                  AFR_STACK_DESTROY (impunge_frame); -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, impunge_ret_child, op_ret, +                                  op_errno);          }          return 0; @@ -1753,12 +1845,14 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,          int              i = 0;          int              call_count = 0;          int              op_errno = 0; +        int              op_ret = -1;          priv = this->private;          local = frame->local;          sh = &local->self_heal;          active_src = sh->active_source; +        sh->impunge_done = afr_sh_entry_impunge_entry_done;          if ((strcmp (entry->d_name, ".") == 0)              || (strcmp (entry->d_name, "..") == 0) @@ -1768,6 +1862,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,                  gf_log (this->name, GF_LOG_TRACE,                          "skipping inspection of %s under %s",                          entry->d_name, local->loc.path); +                op_ret = 0;                  goto out;          } @@ -1779,6 +1874,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,          if (!impunge_frame) {                  gf_log (this->name, GF_LOG_ERROR,                          "Out of memory."); +                op_errno = ENOMEM;                  goto out;          } @@ -1788,12 +1884,14 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,          impunge_sh = &impunge_local->self_heal;          impunge_sh->sh_frame = frame;          impunge_sh->active_source = active_src; +        impunge_sh->impunge_ret_child = active_src;          impunge_sh->impunging_entry_mode =                  st_mode_from_ia (entry->d_stat.ia_prot, entry->d_stat.ia_type);          ret = build_child_loc (this, &impunge_local->loc, &local->loc, entry->d_name);          if (ret != 0) { +                op_errno = ENOMEM;                  goto out;          } @@ -1835,7 +1933,7 @@ afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,          ret = 0;  out:          if (ret == -1) -                afr_sh_entry_impunge_entry_done (frame, this, active_src); +                sh->impunge_done (frame, this, active_src, op_ret, op_errno);          return 0;  } @@ -2225,63 +2323,6 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,          return 0;  } - - -int -afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this) -{ -        afr_local_t    *  local = NULL; -        afr_private_t  *  priv  = NULL; -        dict_t         *xattr_req = NULL; -        int ret = 0; -        int call_count = 0; -        int i = 0; -        afr_self_heal_t *sh = NULL; - -        priv  = this->private; -        local = frame->local; -        sh = &local->self_heal; - -        call_count = afr_up_children_count (priv->child_count, -                                            local->child_up); - -        local->call_count = call_count; - -        xattr_req = dict_new(); -        if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "%s: Unable to set dict value.", -                                        local->loc.path); -                } -        } - -        for (i = 0; i < priv->child_count; i++) -                sh->child_success[i] = -1; -        sh->success_count = 0; -        for (i = 0; i < priv->child_count; i++) { -                if (local->child_up[i]) { -                        STACK_WIND_COOKIE (frame, -                                           afr_sh_entry_lookup_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); -                        if (!--call_count) -                                break; -                } -        } - -        if (xattr_req) -                dict_unref (xattr_req); - -        return 0; -} -  int  afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)  { @@ -2302,38 +2343,14 @@ afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)                  gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "                          "for %s. Proceeding to FOP", local->loc.path); -                afr_sh_entry_lookup(frame, this); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_entry_lookup_cbk, _gf_false);          }          return 0;  }  int -afr_sh_entry_lock (call_frame_t *frame, xlator_t *this) -{ -        afr_internal_lock_t *int_lock = NULL; -        afr_local_t         *local    = NULL; - -        local    = frame->local; -        int_lock = &local->internal_lock; - -        int_lock->transaction_lk_type = AFR_SELFHEAL_LK; -        int_lock->selfheal_lk_type    = AFR_ENTRY_SELF_HEAL_LK; - -        afr_set_lock_number (frame, this); - -        int_lock->lk_basename = NULL; -        int_lock->lk_loc      = &local->loc; -        int_lock->lock_cbk    = afr_sh_post_nonblocking_entry_cbk; - -        afr_nonblocking_entrylk (frame, this); - - -        return 0; -} - - -int  afr_self_heal_entry (call_frame_t *frame, xlator_t *this)  {          afr_local_t   *local = NULL; @@ -2344,7 +2361,8 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this)          local = frame->local;          if (local->self_heal.need_entry_self_heal && priv->entry_self_heal) { -                afr_sh_entry_lock (frame, this); +                afr_sh_entrylk (frame, this, &local->loc, NULL, +                                afr_sh_post_nonblocking_entry_cbk);          } else {                  gf_log (this->name, GF_LOG_TRACE,                          "proceeding to completion on %s", diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 1214eefe25b..03b91dae300 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -604,64 +604,6 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } - -int -afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this) -{ -        afr_local_t     *local = NULL; -        afr_private_t   *priv = NULL; -        int              i = 0; -        int              call_count = 0; -        dict_t          *xattr_req = NULL; -        int              ret = 0; -        afr_self_heal_t *sh = NULL; - -        local = frame->local; -        priv = this->private; -        sh = &local->self_heal; - -        call_count = afr_up_children_count (priv->child_count, -                                            local->child_up); -        local->call_count = call_count; - -        xattr_req = dict_new(); - -        if (xattr_req) { -                for (i = 0; i < priv->child_count; i++) { -                        ret = dict_set_uint64 (xattr_req, -                                               priv->pending_key[i], -                                               3 * sizeof(int32_t)); -                        if (ret < 0) -                                gf_log (this->name, GF_LOG_WARNING, -                                        "Unable to set dict value."); -                } -        } - -        for (i = 0; i < priv->child_count; i++) -                sh->child_success[i] = -1; -        sh->success_count = 0; -        for (i = 0; i < priv->child_count; i++) { -                if (local->child_up[i]) { -                        gf_log (this->name, GF_LOG_TRACE, -                                "looking up %s on %s", -                                local->loc.path, priv->children[i]->name); - -                        STACK_WIND_COOKIE (frame, afr_sh_metadata_lookup_cbk, -                                           (void *) (long) i, -                                           priv->children[i], -                                           priv->children[i]->fops->lookup, -                                           &local->loc, xattr_req); -                        if (!--call_count) -                                break; -                } -        } - -        if (xattr_req) -                dict_unref (xattr_req); - -        return 0; -} -  int  afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,                                                xlator_t *this) @@ -683,7 +625,8 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,                  gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "                          "inodelks done for %s. Proceeding to FOP",                          local->loc.path); -                afr_sh_metadata_lookup (frame, this); +                afr_sh_common_lookup (frame, this, &local->loc, +                                      afr_sh_metadata_lookup_cbk, _gf_false);          }          return 0; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 00e9a1b1efb..5b211499acc 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -50,7 +50,7 @@ afr_self_heal_find_sources (xlator_t *this, afr_local_t *local, dict_t **xattr,                              afr_transaction_type transaction_type);  int -afr_self_heal (call_frame_t *frame, xlator_t *this); +afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);  gf_boolean_t  afr_is_fresh_read_child (int32_t *sources, int32_t child_count, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 8f7f54fafe0..bc85fd71d15 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -37,6 +37,17 @@  struct _pump_private; +typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, +                                       int child, int32_t op_error, +                                       int32_t op_errno); + +typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, +                                       int child, int32_t op_error, +                                       int32_t op_errno); +typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); + +typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); +  typedef struct _afr_private {          gf_lock_t lock;               /* to guard access to child_count, etc */          unsigned int child_count;     /* total number of children   */ @@ -101,9 +112,12 @@ typedef struct {          /* External interface: These are variables (some optional) that             are set by whoever has triggered self-heal */ +        inode_t      *inode;          gf_boolean_t need_data_self_heal;          gf_boolean_t need_metadata_self_heal;          gf_boolean_t need_entry_self_heal; +        gf_boolean_t need_gfid_self_heal; +        gf_boolean_t need_missing_entry_self_heal;          gf_boolean_t forced_merge;        /* Is this a self-heal triggered to                                               forcibly merge the directories? */ @@ -121,20 +135,28 @@ typedef struct {          ia_type_t type;                   /* st_mode of the entry we're doing                                               self-heal on */ +        uuid_t  sh_gfid_req;                 /* gfid self-heal needs to be done +                                             with this gfid if it is not null */          /* Function to call to unwind. If self-heal is being done in the             background, this function will be called as soon as possible. */ -        int (*unwind) (call_frame_t *frame, xlator_t *this); +        int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret, +                       int32_t op_errno);          /* End of external interface members */          /* array of stat's, one for each child */          struct iatt *buf; +        struct iatt *parentbufs;          struct iatt parentbuf;          struct iatt entrybuf; +        afr_expunge_done_cbk_t expunge_done; +        afr_impunge_done_cbk_t impunge_done; +        int32_t impunge_ret_child; +          /* array of xattr's, one for each child */          dict_t **xattr; @@ -142,12 +164,19 @@ typedef struct {           */          int32_t *child_success;          int     success_count; +        /* array containing the fresh children found in the self-heal process */ +        int32_t *fresh_children; +        /* array containing the fresh children found in the parent lookup */ +        int32_t *fresh_parent_dirs;          /* array of errno's, one for each child */          int *child_errno;          int32_t **pending_matrix;          int32_t **delta_matrix; +        int32_t op_ret; +        int32_t op_errno; +          int *sources;          int source;          int active_source; @@ -165,6 +194,7 @@ typedef struct {          blksize_t block_size;          off_t file_size;          off_t offset; +        afr_post_remove_call_t post_remove_call;          loc_t parent_loc; @@ -179,6 +209,7 @@ typedef struct {          int (*completion_cbk) (call_frame_t *frame, xlator_t *this);          int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);          int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this); +        void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);          call_frame_t *sh_frame;  } afr_self_heal_t; @@ -343,6 +374,7 @@ typedef struct _afr_local {                  } statfs;                  struct { +                        uuid_t  gfid_req;                          inode_t *inode;                          struct iatt buf;                          struct iatt postparent; @@ -958,4 +990,47 @@ int32_t  afr_marker_getxattr (call_frame_t *frame, xlator_t *this,                       loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv ); +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, +                        int32_t *fresh_children, unsigned int child_count); +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, +                              int32_t child_count); +void +afr_reset_children (int32_t *fresh_children, int32_t child_count); +gf_boolean_t +afr_error_more_important (int32_t old_errno, int32_t new_errno); +int +afr_errno_count (int32_t *children, int *child_errno, +                 unsigned int child_count, int32_t op_errno); +int +afr_get_children_count (int32_t *fresh_children, unsigned int child_count); +gf_boolean_t +afr_is_child_present (int32_t *success_children, int32_t child_count, +                      int32_t child); +void +afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs, +                            int32_t *success_children, +                            unsigned int child_count); +void +afr_reset_xattr (dict_t **xattr, unsigned int child_count); +gf_boolean_t +afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children, +                        unsigned int child_count, const char *path, +                        const char *xlator_name); +int +afr_gfid_missing_count (const char *xlator_name, int32_t *children, +                        struct iatt *bufs, unsigned int child_count, +                        const char *path); +void +afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path); +void +afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count); +afr_transaction_type +afr_transaction_type_get (ia_type_t ia_type); +int32_t +afr_resultant_errno_get (int32_t *children, +                         int *child_errno, unsigned int child_count); +int32_t* +afr_fresh_children_create (int32_t child_count);  #endif /* __AFR_H__ */  | 
