diff options
Diffstat (limited to 'xlators')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 64 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 8 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-mem-types.h | 1 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 88 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 57 | 
6 files changed, 211 insertions, 16 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index af6345ecc2a..8b4fd5cf37b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3001,6 +3001,8 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,   out:          return ret;  } + +  int  dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                             int op_ret, int op_errno, dict_t *xattr, @@ -3016,6 +3018,11 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          char         *next_uuid_str = NULL;          char         *saveptr       = NULL;          uuid_t        node_uuid     = {0,}; +        char         *uuid_list_copy = NULL; +        int           count          = 0; +        int           i              = 0; +        int           index          = 0; +        int           found          = 0;          VALIDATE_OR_GOTO (frame, out); @@ -3025,6 +3032,10 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          prev = cookie;          conf = this->private; +        VALIDATE_OR_GOTO (conf->defrag, out); + +        gf_msg_debug (this->name, 0, "subvol %s returned", prev->name); +          LOCK (&frame->lock);          {                  this_call_cnt = --local->call_cnt; @@ -3048,6 +3059,15 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          goto unlock;                  } +                /* As DHT will not know details of its child xlators +                 * we need to parse this twice to get the count first +                 * and allocate memory later. +                 */ +                count = 0; +                index = conf->local_subvols_cnt; + +                uuid_list_copy = gf_strdup (uuid_list); +                  for (uuid_str = strtok_r (uuid_list, " ", &saveptr);                       uuid_str;                       uuid_str = next_uuid_str) { @@ -3057,24 +3077,57 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          DHT_MSG_UUID_PARSE_ERROR,                                          "Failed to parse uuid" -                                        " failed for %s", prev->name); +                                        " for %s", prev->name);                                  local->op_ret = -1;                                  local->op_errno = EINVAL;                                  goto unlock;                          } +                        count++;                          if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) {                                  gf_msg_debug (this->name, 0, "subvol %s does not"                                                "belong to this node",                                                prev->name);                          } else { + +                                /* handle multiple bricks of the same replica +                                 * on the same node */ +                                if (found) +                                        continue;                                  conf->local_subvols[(conf->local_subvols_cnt)++] -                                        = prev; +                                                = prev; +                                found = 1;                                  gf_msg_debug (this->name, 0, "subvol %s belongs to"                                                " this node", prev->name); -                                break;                          }                  } + +                if (!found) { +                        local->op_ret = 0; +                        goto unlock; +                } + +                conf->local_nodeuuids[index].count = count; +                conf->local_nodeuuids[index].uuids +                                 = GF_CALLOC (count, sizeof (uuid_t), 1); + +                /* The node-uuids are guaranteed to be returned in the same +                 * order as the bricks +                 * A null node-uuid is returned for a brick that is down. +                 */ + +                saveptr = NULL; +                i = 0; + +                for (uuid_str = strtok_r (uuid_list_copy, " ", &saveptr); +                     uuid_str; +                     uuid_str = next_uuid_str) { + +                        next_uuid_str = strtok_r (NULL, " ", &saveptr); +                        gf_uuid_parse (uuid_str, +                                       conf->local_nodeuuids[index].uuids[i]); +                        i++; +                }          }          local->op_ret = 0; @@ -3092,8 +3145,13 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          goto out;   unwind: + +        GF_FREE (conf->local_nodeuuids[index].uuids); +        conf->local_nodeuuids[index].uuids = NULL; +          DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata);   out: +        GF_FREE (uuid_list_copy);          return 0;  } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index c8cec133960..f982bf6ac1a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -419,6 +419,7 @@ struct dht_container {          xlator_t        *this;          loc_t           *parent_loc;          dict_t          *migrate_data; +        int             local_subvol_index;  };  typedef enum tier_mode_ { @@ -490,6 +491,12 @@ typedef struct gf_tier_conf {          char                         volname[GD_VOLUME_NAME_MAX + 1];  } gf_tier_conf_t; +typedef struct subvol_nodeuuids { +        uuid_t *uuids; +        int count; +} subvol_nodeuuid_t; + +  struct gf_defrag_info_ {          uint64_t                     total_files;          uint64_t                     total_data; @@ -540,6 +547,7 @@ struct gf_defrag_info_ {          /* lock migration flag */          gf_boolean_t                 lock_migration_enabled; +  };  typedef struct gf_defrag_info_ gf_defrag_info_t; @@ -623,6 +631,7 @@ struct dht_conf {          /*local subvol storage for rebalance*/          xlator_t       **local_subvols; +        subvol_nodeuuid_t       *local_nodeuuids;          int32_t          local_subvols_cnt;          /* diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 6f08f557730..38965298325 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -505,7 +505,6 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)                          goto out;                  inode = loc->inode; -                local->hashed_subvol = dht_subvol_get_hashed (frame->this, loc);          }          if (fd) { @@ -844,7 +843,12 @@ dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf)          conf->local_subvols = GF_CALLOC (cnt, sizeof (xlator_t *),                                          gf_dht_mt_xlator_t); -        if (!conf->local_subvols) { + +        /* FIX FIX : do this dynamically*/ +        conf->local_nodeuuids = GF_CALLOC (cnt, sizeof (subvol_nodeuuid_t), +                                           gf_dht_nodeuuids_t); + +        if (!conf->local_subvols || !conf->local_nodeuuids) {                  return -1;          } diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 3554f3f9c2d..19cccef537b 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -39,6 +39,7 @@ enum gf_dht_mem_types_ {          gf_dht_mt_fd_ctx_t,          gf_tier_mt_qfile_array_t,          gf_dht_ret_cache_t, +        gf_dht_nodeuuids_t,          gf_dht_mt_end  };  #endif diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a5d00e37c0e..a1266502d63 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2439,6 +2439,43 @@ gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) {          return 0;  } + +/* Return value + * 0 : this node does not migrate the file + * 1 : this node migrates the file + */ +int +gf_defrag_should_i_migrate (xlator_t *this, int local_subvol_index, uuid_t gfid) +{ +        int         ret               = 0; +        int         i                 = local_subvol_index; +        char       *str               = NULL; +        uint32_t    hashval           = 0; +        int32_t     index        = 0; +        dht_conf_t *conf              = NULL; +        char        buf[UUID_CANONICAL_FORM_LEN + 1] = {0, }; + +        conf = this->private; + +        /* Pure distribute */ + +        if (conf->local_nodeuuids[i].count == 1) { +                return 1; +        } + +        str = uuid_utoa_r (gfid, buf); + +        ret = dht_hash_compute (this, 0, str, &hashval); +        if (ret == 0) { +                index = (hashval % conf->local_nodeuuids[i].count); +                if (!gf_uuid_compare (conf->defrag->node_uuid, +                                      conf->local_nodeuuids[i].uuids[index])) +                        ret = 1; +        } +        return ret; +} + +  int  gf_defrag_migrate_single_file (void *opaque)  { @@ -2517,6 +2554,13 @@ gf_defrag_migrate_single_file (void *opaque)                  goto out;          } +        if (!gf_defrag_should_i_migrate (this, rebal_entry->local_subvol_index, +                                         entry->d_stat.ia_gfid)) { +                gf_msg_debug (this->name, 0, "Don't migrate %s ", +                              entry_loc.path); +                goto out; +        } +          gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);          gf_uuid_copy (entry_loc.pargfid, loc->gfid); @@ -2531,6 +2575,7 @@ gf_defrag_migrate_single_file (void *opaque)                  goto out;          } +          hashed_subvol = dht_subvol_get_hashed (this, &entry_loc);          if (!hashed_subvol) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2953,6 +2998,8 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,                          goto out;                  } +                tmp_container->local_subvol_index = i; +                  tmp_container->df_entry->d_stat = df_entry->d_stat;                  tmp_container->df_entry->d_ino  = df_entry->d_ino; @@ -4032,6 +4079,33 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)  } + +int +dht_get_local_subvols_and_nodeuuids (xlator_t *this, dht_conf_t *conf, +                                     loc_t *loc) +{ + +        dict_t                  *dict                   = NULL; +        int                      ret                    = -1; + +                /* Find local subvolumes */ +        ret = syncop_getxattr (this, loc, &dict, +                               GF_REBAL_FIND_LOCAL_SUBVOL, +                               NULL, NULL); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local " +                        "subvolume determination failed with error: %d", +                        -ret); +                ret = -1; +                goto out; +        } + +        ret = 0; +out: +        return ret; +} + +  int  gf_defrag_start_crawl (void *data)  { @@ -4056,6 +4130,7 @@ gf_defrag_start_crawl (void *data)          gf_boolean_t            is_tier_detach          = _gf_false;          call_frame_t            *statfs_frame           = NULL;          xlator_t                *old_THIS               = NULL; +        int j = 0;          this = data;          if (!this) @@ -4184,14 +4259,8 @@ gf_defrag_start_crawl (void *data)                          goto out;                  } -                /* Find local subvolumes */ -                ret = syncop_getxattr (this, &loc, &dict, -                                       GF_REBAL_FIND_LOCAL_SUBVOL, -                                       NULL, NULL); +                ret = dht_get_local_subvols_and_nodeuuids (this, conf, &loc);                  if (ret) { -                        gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local " -                                "subvolume determination failed with error: %d", -                                -ret);                          ret = -1;                          goto out;                  } @@ -4199,6 +4268,11 @@ gf_defrag_start_crawl (void *data)                  for (i = 0 ; i < conf->local_subvols_cnt; i++) {                          gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvols "                                  "are %s", conf->local_subvols[i]->name); +                        for (j = 0; j < conf->local_nodeuuids[i].count; j++) { +                                gf_msg (this->name, GF_LOG_INFO, 0, 0, +                                        "node uuids are %s", +                                  uuid_utoa(conf->local_nodeuuids[i].uuids[j])); +                        }                  }                  ret = gf_defrag_total_file_cnt (this, &loc); diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index a8aebe00f69..e4b910eb0e6 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -198,10 +198,17 @@ out:  static int  tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)  { -        int     ret            = -1; -        dict_t *dict           = NULL; -        char   *uuid_str       = NULL; -        uuid_t  node_uuid      = {0,}; +        int         ret                     = -1; +        dict_t     *dict                    = NULL; +        char       *uuid_str                = NULL; +        uuid_t      node_uuid               = {0,}; +        char       *dup_str                 = NULL; +        char       *str                     = NULL; +        char       *save_ptr                = NULL; +        int         count                   = 0; +        uint32_t    hashval                 = 0; +        int32_t     index                   = 0; +        char        buf[GF_UUID_BUF_SIZE]   = {0,};          GF_VALIDATE_OR_GOTO ("tier", this, out);          GF_VALIDATE_OR_GOTO (this->name, loc, out); @@ -215,15 +222,56 @@ tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)                  goto out;          } + +        /*  This returns multiple node-uuids now - one for each brick +         *  of the subvol. +         */ +          if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {                  gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,                          "Failed to get node-uuid for %s", loc->path);                  goto out;          } +        dup_str = gf_strdup (uuid_str); +        str = dup_str; + +        /* How many uuids returned? +         * No need to check if one of these is that of the current node. +         */ + +        count = 1; +        while ((str = strchr (str, ' '))) { +                count++; +                str++; +        } + +        /* Only one node-uuid - pure distribute? */ +        if (count == 1) +                goto check_node; + +        uuid_utoa_r (loc->gfid, buf); +        ret = dht_hash_compute (this, 0, buf, &hashval); +        if (ret == 0) { +                index = (hashval % count); +        } + +        count = 0; +        str = dup_str; +        while ((uuid_str = strtok_r (str, " ", &save_ptr))) { +                if (count == index) +                        break; +                count++; +                str = NULL; +        } + + +check_node: +          if (gf_uuid_parse (uuid_str, node_uuid)) {                  gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,                          "uuid_parse failed for %s", loc->path); +                ret = -1;                  goto out;          } @@ -239,6 +287,7 @@ out:          if (dict)                  dict_unref(dict); +        GF_FREE (dup_str);          return ret;  }  | 
