diff options
| -rw-r--r-- | libglusterfs/src/syncop-utils.c | 35 | ||||
| -rw-r--r-- | libglusterfs/src/syncop-utils.h | 4 | ||||
| -rw-r--r-- | tests/afr.rc | 5 | ||||
| -rw-r--r-- | tests/basic/ec/ec-background-heals.t | 4 | ||||
| -rw-r--r-- | tests/volume.rc | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 27 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-common.h | 3 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 254 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heald.c | 46 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 10 | 
10 files changed, 308 insertions, 85 deletions
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index 8f25db237f5..8c25dd1773a 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -591,3 +591,38 @@ out:          return ret;  } + +int +syncop_inode_find (xlator_t *this, xlator_t *subvol, +                   uuid_t gfid, inode_t **inode, +                   dict_t *xdata, dict_t **rsp_dict) +{ +        int         ret    = 0; +        loc_t       loc    = {0, }; +        struct iatt iatt   = {0, }; +	*inode =  NULL; + +        *inode = inode_find (this->itable, gfid); +        if (*inode) +                goto out; + +        loc.inode = inode_new (this->itable); +        if (!loc.inode) { +                ret = -ENOMEM; +                goto out; +        } +        gf_uuid_copy (loc.gfid, gfid); + +	ret = syncop_lookup (subvol, &loc, &iatt, NULL, xdata, rsp_dict); +        if (ret < 0) +                goto out; + +        *inode = inode_link (loc.inode, NULL, NULL, &iatt); +        if (!*inode) { +                ret = -ENOMEM; +                goto out; +        } +out: +        loc_wipe (&loc); +        return ret; +} diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h index 3968d758c6e..4761371c120 100644 --- a/libglusterfs/src/syncop-utils.h +++ b/libglusterfs/src/syncop-utils.h @@ -43,4 +43,8 @@ syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data,                       int (*fn) (xlator_t *subvol, gf_dirent_t *entry,                                  loc_t *parent, void *data),                       int count, int sleep_time); +int +syncop_inode_find (xlator_t *this, xlator_t *subvol, +                   uuid_t gfid, inode_t **inode, +                   dict_t *xdata, dict_t **rsp_dict);  #endif /* _SYNCOP_H */ diff --git a/tests/afr.rc b/tests/afr.rc index ed376f0b41f..bdf4075a233 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -85,11 +85,6 @@ function is_file_heal_done {  #count the number of entries marked for self-heal  #in brick $1's index -function count_sh_entries() -{ -    ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l -} -  function count_index_entries()  {      ls $1/.glusterfs/indices/xattrop | wc -l diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t index 726e60d8fea..28efc83ebc9 100644 --- a/tests/basic/ec/ec-background-heals.t +++ b/tests/basic/ec/ec-background-heals.t @@ -23,6 +23,9 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0  EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals  EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength  TEST touch $M0/a +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2  TEST kill_brick $V0 $H0 $B0/${V0}2  echo abc > $M0/a  EXPECT 2 get_pending_heal_count $V0 #One for each active brick @@ -30,7 +33,6 @@ $CLI volume start $V0 force  EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0  #Accessing file shouldn't heal the file  EXPECT "abc" cat $M0/a -sleep 3  EXPECT 2 get_pending_heal_count $V0 #One for each active brick  TEST $CLI volume set $V0 disperse.background-heals 1  EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals diff --git a/tests/volume.rc b/tests/volume.rc index aa614c50489..c8105567a38 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -704,3 +704,8 @@ function get_hard_link_count {          local path=$1;          stat -c %h $path  } + +function count_sh_entries() +{ +    ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l +} diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 7ccac919769..b76fa6adc00 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -153,22 +153,11 @@ unlock:  inode_t *  afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)  { -	int           ret      = 0; -        uint64_t       val     = IA_INVAL; -	loc_t         loc      = {0, }; +        int          ret       = 0; +        uint64_t     val       = IA_INVAL;          dict_t       *xdata    = NULL;          dict_t       *rsp_dict = NULL; -	inode_t      *inode    = NULL; -	struct iatt   iatt     = {0, }; - -	inode = inode_find (this->itable, gfid); -	if (inode) -		goto out; - -	loc.inode = inode_new (this->itable); -	if (!loc.inode) -		goto out; -	gf_uuid_copy (loc.gfid, gfid); +        inode_t      *inode    = NULL;          xdata = dict_new ();          if (!xdata) @@ -178,7 +167,8 @@ afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)          if (ret)                  goto out; -	ret = syncop_lookup (subvol, &loc, &iatt, NULL, xdata, &rsp_dict); +	ret = syncop_inode_find (this, subvol, gfid, &inode, +                                 xdata, &rsp_dict);  	if (ret < 0)  		goto out; @@ -188,15 +178,16 @@ afr_shd_inode_find (xlator_t *this, xlator_t *subvol, uuid_t gfid)                  if (ret)                          goto out;          } - -	inode = inode_link (loc.inode, NULL, NULL, &iatt);          ret = inode_ctx_set2 (inode, subvol, 0, &val);  out: +        if (ret && inode) { +                inode_unref (inode); +                inode = NULL; +        }          if (xdata)                  dict_unref (xdata);          if (rsp_dict)                  dict_unref (rsp_dict); -	loc_wipe (&loc);  	return inode;  } diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 8e724a81380..54c1c3cbadc 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -117,4 +117,7 @@ void ec_manager(ec_fop_data_t * fop, int32_t error);  gf_boolean_t ec_is_recoverable_error (int32_t op_errno);  void ec_handle_healers_done (ec_fop_data_t *fop); +int32_t +ec_get_heal_info (xlator_t *this, loc_t *loc, dict_t **dict); +  #endif /* __EC_COMMON_H__ */ diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 14255616830..bac8337cd3d 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -1489,22 +1489,29 @@ unlock:          return ret;  } -/*Data heal*/ +/*Find direction for data heal and heal info*/  int  ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies, -                             uint64_t *versions, uint64_t *dirty, -                             uint64_t *size, unsigned char *sources, -                             unsigned char *healed_sinks) +                       uint64_t *data_versions,  uint64_t *meta_versions, +                       uint64_t *dirty, uint64_t *size, unsigned char *sources, +                       unsigned char *healed_sinks, int which)  {          uint64_t        xattr[EC_VERSION_SIZE] = {0}; -        char            version_size[64] = {0}; +        char            version_size[128] = {0};          dict_t          *version_size_db = NULL; +        uint64_t        *m_versions      = NULL;          unsigned char   *same            = NULL;          int             max_same_count   = 0;          int             source           = 0;          int             i                = 0;          int             ret              = 0; +        dict_t          *dict            = NULL; +        if (!meta_versions) { +                m_versions = alloca0 (ec->nodes * sizeof (*m_versions)); +        } else { +                m_versions = meta_versions; +        }          version_size_db = dict_new ();          if (!version_size_db) {                  ret = -ENOMEM; @@ -1516,23 +1523,31 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,                          continue;                  if (replies[i].op_ret < 0)                          continue; -                ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_VERSION, +                dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata : +                                                     replies[i].xattr; + +                ret = ec_dict_del_array (dict, EC_XATTR_VERSION,                                           xattr, EC_VERSION_SIZE);                  if (ret == 0) { -                        versions[i] = xattr[EC_DATA_TXN]; +                        data_versions[i] = xattr[EC_DATA_TXN]; +                        if (meta_versions) { +                                m_versions[i] = xattr[EC_METADATA_TXN]; +                        }                  }                  memset (xattr, 0, sizeof (xattr)); -                ret = ec_dict_del_array (replies[i].xattr, EC_XATTR_DIRTY, +                ret = ec_dict_del_array (dict, EC_XATTR_DIRTY,                                           xattr, EC_VERSION_SIZE);                  if (ret == 0) {                          dirty[i] = xattr[EC_DATA_TXN];                  } -                ret = ec_dict_del_number (replies[i].xattr, EC_XATTR_SIZE, +                ret = ec_dict_del_number (dict, EC_XATTR_SIZE,                                            &size[i]); -                /*Build a db of same version, size*/ +                /*Build a db of same metadata and data version and size*/                  snprintf (version_size, sizeof (version_size), -                          "%"PRIu64"-%"PRIu64, versions[i], size[i]); +                          "%"PRIu64"-%"PRIu64"-%"PRIu64, data_versions[i], +                          m_versions[i], size[i]); +                  ret = dict_get_bin (version_size_db, version_size,                                      (void **)&same);                  if (ret < 0) { @@ -1562,7 +1577,11 @@ ec_heal_data_find_direction (ec_t *ec, default_args_cbk_t *replies,                  goto out;          } else {                  snprintf (version_size, sizeof (version_size), -                          "%"PRIu64"-%"PRIu64, versions[source], size[source]); +                          "%"PRIu64"-%"PRIu64"-%"PRIu64, +                          data_versions[source], +                          m_versions[source], +                          size[source]); +                  ret = dict_get_bin (version_size_db, version_size,                                      (void **)&same);                  if (ret < 0) @@ -1621,8 +1640,9 @@ __ec_heal_data_prepare (call_frame_t *frame, ec_t *ec, fd_t *fd,                  goto out;          } -        source = ec_heal_data_find_direction (ec, replies, versions, dirty, -                                              size, sources, healed_sinks); +        source = ec_heal_data_find_direction (ec, replies, versions, NULL, +                                              dirty, size, sources, +                                              healed_sinks, EC_COMBINE_DICT);          ret = source;          if (ret < 0)                  goto out; @@ -2602,7 +2622,7 @@ out:  int32_t  ec_launch_replace_heal (ec_t *ec)  { -	int ret = -1; +        int ret = -1;          if (!ec)                  return ret; @@ -2614,3 +2634,207 @@ ec_launch_replace_heal (ec_t *ec)          }          return ret;  } + +int32_t +ec_set_heal_info(dict_t **dict_rsp, char *status) +{ +        dict_t *dict = NULL; +        int    ret   = 0; + +        dict = dict_new (); +        if (!dict) { +                ret = -ENOMEM; +                goto out; +        } +        ret = dict_set_str (dict, "heal-info", status); +        if (ret) { +                gf_msg (THIS->name, GF_LOG_WARNING, -ret, +                        EC_MSG_HEAL_FAIL, +                        "Failed to set heal-info key to " +                        "%s", status); +                dict_unref(dict); +                dict = NULL; +        } +        *dict_rsp = dict; +out: +        return ret; +} + +int32_t +ec_need_heal (ec_t *ec, default_args_cbk_t *replies, gf_boolean_t *need_heal) +{ +        uint64_t           *dirty         = NULL; +        unsigned char      *sources       = NULL; +        unsigned char      *healed_sinks  = NULL; +        uint64_t           *data_versions = NULL; +        uint64_t           *meta_versions = NULL; +        uint64_t           *size          = NULL; +        int                ret            = 0; +        int                source_count   = 0; + +        sources = alloca0(ec->nodes); +        healed_sinks = alloca0(ec->nodes); +        dirty = alloca0 (ec->nodes * sizeof (*dirty)); +        size = alloca0 (ec->nodes * sizeof (*size)); +        data_versions = alloca0 (ec->nodes * sizeof (*data_versions)); +        meta_versions = alloca0 (ec->nodes * sizeof (*meta_versions)); + +        ret = ec_heal_data_find_direction (ec, replies, data_versions, +                                           meta_versions, dirty, size, +                                           sources, healed_sinks, +                                           EC_COMBINE_XDATA); +        if (ret < 0 && ret != -EIO) { +                goto out; +        } +        source_count = EC_COUNT (sources, ec->nodes); +        if (source_count != ec->nodes) { +                *need_heal = _gf_true; +        } +        ret = source_count; +out: +        return ret; +} + +int32_t +ec_heal_inspect (call_frame_t *frame, ec_t *ec, +                 inode_t *inode, unsigned char *locked_on, +                 gf_boolean_t *need_heal) +{ +        loc_t              loc           = {0}; +        int                ret           = 0; +        dict_t             *xdata        = NULL; +        uint64_t           zero_array[2] = {0}; +        uint64_t           zero_value    = 0; +        unsigned char      *output       = NULL; +        default_args_cbk_t *replies      = NULL; + +        EC_REPLIES_ALLOC (replies, ec->nodes); +        output = alloca0 (ec->nodes); + +        loc.inode = inode_ref (inode); +        gf_uuid_copy (loc.gfid, inode->gfid); + +        xdata = dict_new (); +        if (!xdata || +            dict_set_static_bin (xdata, EC_XATTR_VERSION, zero_array, +                                 sizeof (zero_array)) || +            dict_set_static_bin (xdata, EC_XATTR_DIRTY, zero_array, +                                 sizeof (zero_array)) || +            dict_set_static_bin (xdata, EC_XATTR_SIZE, &zero_value, +                                 sizeof (zero_value))) { +                ret = -ENOMEM; +                goto out; +        } +        ret = cluster_lookup (ec->xl_list, locked_on, ec->nodes, replies, +                              output, frame, ec->xl, &loc, xdata); +        if (ret != ec->nodes) { +                ret = ec->nodes; +                *need_heal = _gf_true; +                goto out; +        } +        ret = ec_need_heal (ec, replies, need_heal); + +out: +        cluster_replies_wipe (replies, ec->nodes); +        loc_wipe (&loc); +        if (xdata) { +                dict_unref(xdata); +        } +        return ret; +} + +int32_t +ec_heal_locked_inspect (call_frame_t *frame, ec_t *ec, inode_t *inode, +                        gf_boolean_t *need_heal) +{ +        unsigned char      *locked_on  = NULL; +        unsigned char      *up_subvols = NULL; +        unsigned char      *output     = NULL; +        default_args_cbk_t *replies    = NULL; +        int                ret         = 0; + +        EC_REPLIES_ALLOC (replies, ec->nodes); +        locked_on = alloca0(ec->nodes); +        output = alloca0(ec->nodes); +        up_subvols = alloca0(ec->nodes); +        ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + +        ret = cluster_inodelk (ec->xl_list, up_subvols, ec->nodes, +                               replies, locked_on, frame, ec->xl, +                               ec->xl->name, inode, 0, 0); +        if (ret != ec->nodes) { +                *need_heal = _gf_true; +                goto unlock; +        } +        ret = ec_heal_inspect (frame, ec, inode, +                               locked_on, need_heal); +unlock: +        cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, +                           replies, output, frame, ec->xl, +                           ec->xl->name, inode, 0, 0); +        cluster_replies_wipe (replies, ec->nodes); +        return ret; +} + +int32_t +ec_get_heal_info (xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp) +{ +        int             ret             = -ENOMEM; +        gf_boolean_t    need_heal       = _gf_false; +        call_frame_t    *frame          = NULL; +        ec_t            *ec             = NULL; +        unsigned char   *up_subvols     = NULL; +        loc_t           loc             = {0, }; + +        VALIDATE_OR_GOTO(this, out); +        GF_VALIDATE_OR_GOTO(this->name, entry_loc, out); + +        ec = this->private; +        up_subvols = alloca0(ec->nodes); +        ec_mask_to_char_array (ec->xl_up, up_subvols, ec->nodes); + +        frame = create_frame (this, this->ctx->pool); +        if (!frame) { +                goto out; +        } +        ec_owner_set(frame, frame->root); +        frame->root->uid = 0; +        frame->root->gid = 0; +        frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + +        if (loc_copy(&loc, entry_loc) != 0) { +                gf_msg (this->name, GF_LOG_ERROR, +                        ENOMEM, EC_MSG_LOC_COPY_FAIL, +                        "Failed to copy a location."); +                goto out; +        } +        if (!loc.inode) { +                ret = syncop_inode_find (this, this, loc.gfid, +                                         &loc.inode, NULL, NULL); +                if (ret < 0) +                        goto out; +        } + +        ret = ec_heal_inspect (frame, ec, loc.inode, up_subvols, +                               &need_heal); +        if (ret == ec->nodes) { +                goto set_heal; +        } +        need_heal = _gf_false; +        ret = ec_heal_locked_inspect (frame, ec, loc.inode, +                                      &need_heal); +        if (ret < 0) +                goto out; +set_heal: +        if (need_heal) { +                ret =  ec_set_heal_info (dict_rsp, "heal"); +        } else { +                ret =  ec_set_heal_info (dict_rsp, "no-heal"); +        } +out: +        if (frame) { +                STACK_DESTROY (frame->root); +        } +        loc_wipe (&loc); +        return ret; +} diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index c87f328db0f..9860f10eadd 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -126,42 +126,6 @@ unlock:          return ret;  } - -int -ec_shd_inode_find (xlator_t *this, xlator_t *subvol, -                   uuid_t gfid, inode_t **inode) -{ -        int         ret    = 0; -        loc_t       loc    = {0, }; -        struct iatt iatt   = {0, }; -	*inode =  NULL; - -        *inode = inode_find (this->itable, gfid); -        if (*inode) -                goto out; - -        loc.inode = inode_new (this->itable); -        if (!loc.inode) { -                ret = -ENOMEM; -                goto out; -        } -        gf_uuid_copy (loc.gfid, gfid); - -        ret = syncop_lookup (subvol, &loc, &iatt, NULL, NULL, NULL); -        if (ret < 0) -                goto out; - -        *inode = inode_link (loc.inode, NULL, NULL, &iatt); -        if (!*inode) { -                ret = -ENOMEM; -                goto out; -        } -out: -        loc_wipe (&loc); -        return ret; -} - -  int  ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode)  { @@ -190,7 +154,8 @@ ec_shd_index_inode (xlator_t *this, xlator_t *subvol, inode_t **inode)          gf_msg_debug (this->name, 0, "index-dir gfid for %s: %s",                  subvol->name, uuid_utoa (index_gfid)); -        ret = ec_shd_inode_find (this, subvol, index_gfid, inode); +        ret = syncop_inode_find (this, subvol, index_gfid, +                                 inode, NULL, NULL);  out:          loc_wipe (&rootloc); @@ -250,8 +215,8 @@ ec_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,          if (ret < 0)                  goto out; -        ret = ec_shd_inode_find (healer->this, healer->this, loc.gfid, -                                  &loc.inode); +        ret = syncop_inode_find (healer->this, healer->this, loc.gfid, +                                 &loc.inode, NULL, NULL);          if (ret < 0)                  goto out; @@ -329,7 +294,8 @@ ec_shd_full_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,          if (ret < 0)                  goto out; -        ret = ec_shd_inode_find (this, this, loc.gfid, &loc.inode); +        ret = syncop_inode_find (this, this, loc.gfid, +                                 &loc.inode, NULL, NULL);          if (ret < 0)                  goto out; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 2aff4374b82..73747f634cd 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -806,13 +806,11 @@ ec_handle_heal_commands (call_frame_t *frame, xlator_t *this, loc_t *loc,          if (!name || strcmp (name, GF_HEAL_INFO))                  return -1; -        dict_rsp = dict_new (); -        if (dict_rsp == NULL) -                goto out; +        op_errno = -ec_get_heal_info (this, loc, &dict_rsp); +        if (op_errno <= 0) { +                op_errno = op_ret = 0; +        } -        if (dict_set_str (dict_rsp, "heal-info", "heal") == 0) -                op_ret = 0; -out:          STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict_rsp, NULL);          if (dict_rsp)                  dict_unref (dict_rsp);  | 
