diff options
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 64 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 9 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 8 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 88 | ||||
-rw-r--r-- | xlators/cluster/dht/src/tier.c | 57 |
6 files changed, 211 insertions, 16 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index af6345ecc2a..8b4fd5cf37b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3001,6 +3001,8 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, out: return ret; } + + int dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, @@ -3016,6 +3018,11 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, char *next_uuid_str = NULL; char *saveptr = NULL; uuid_t node_uuid = {0,}; + char *uuid_list_copy = NULL; + int count = 0; + int i = 0; + int index = 0; + int found = 0; VALIDATE_OR_GOTO (frame, out); @@ -3025,6 +3032,10 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, prev = cookie; conf = this->private; + VALIDATE_OR_GOTO (conf->defrag, out); + + gf_msg_debug (this->name, 0, "subvol %s returned", prev->name); + LOCK (&frame->lock); { this_call_cnt = --local->call_cnt; @@ -3048,6 +3059,15 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unlock; } + /* As DHT will not know details of its child xlators + * we need to parse this twice to get the count first + * and allocate memory later. + */ + count = 0; + index = conf->local_subvols_cnt; + + uuid_list_copy = gf_strdup (uuid_list); + for (uuid_str = strtok_r (uuid_list, " ", &saveptr); uuid_str; uuid_str = next_uuid_str) { @@ -3057,24 +3077,57 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR, "Failed to parse uuid" - " failed for %s", prev->name); + " for %s", prev->name); local->op_ret = -1; local->op_errno = EINVAL; goto unlock; } + count++; if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) { gf_msg_debug (this->name, 0, "subvol %s does not" "belong to this node", prev->name); } else { + + /* handle multiple bricks of the same replica + * on the same node */ + if (found) + continue; conf->local_subvols[(conf->local_subvols_cnt)++] - = prev; + = prev; + found = 1; gf_msg_debug (this->name, 0, "subvol %s belongs to" " this node", prev->name); - break; } } + + if (!found) { + local->op_ret = 0; + goto unlock; + } + + conf->local_nodeuuids[index].count = count; + conf->local_nodeuuids[index].uuids + = GF_CALLOC (count, sizeof (uuid_t), 1); + + /* The node-uuids are guaranteed to be returned in the same + * order as the bricks + * A null node-uuid is returned for a brick that is down. + */ + + saveptr = NULL; + i = 0; + + for (uuid_str = strtok_r (uuid_list_copy, " ", &saveptr); + uuid_str; + uuid_str = next_uuid_str) { + + next_uuid_str = strtok_r (NULL, " ", &saveptr); + gf_uuid_parse (uuid_str, + conf->local_nodeuuids[index].uuids[i]); + i++; + } } local->op_ret = 0; @@ -3092,8 +3145,13 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; unwind: + + GF_FREE (conf->local_nodeuuids[index].uuids); + conf->local_nodeuuids[index].uuids = NULL; + DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata); out: + GF_FREE (uuid_list_copy); return 0; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index c8cec133960..f982bf6ac1a 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -419,6 +419,7 @@ struct dht_container { xlator_t *this; loc_t *parent_loc; dict_t *migrate_data; + int local_subvol_index; }; typedef enum tier_mode_ { @@ -490,6 +491,12 @@ typedef struct gf_tier_conf { char volname[GD_VOLUME_NAME_MAX + 1]; } gf_tier_conf_t; +typedef struct subvol_nodeuuids { + uuid_t *uuids; + int count; +} subvol_nodeuuid_t; + + struct gf_defrag_info_ { uint64_t total_files; uint64_t total_data; @@ -540,6 +547,7 @@ struct gf_defrag_info_ { /* lock migration flag */ gf_boolean_t lock_migration_enabled; + }; typedef struct gf_defrag_info_ gf_defrag_info_t; @@ -623,6 +631,7 @@ struct dht_conf { /*local subvol storage for rebalance*/ xlator_t **local_subvols; + subvol_nodeuuid_t *local_nodeuuids; int32_t local_subvols_cnt; /* diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 6f08f557730..38965298325 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -505,7 +505,6 @@ dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop) goto out; inode = loc->inode; - local->hashed_subvol = dht_subvol_get_hashed (frame->this, loc); } if (fd) { @@ -844,7 +843,12 @@ dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf) conf->local_subvols = GF_CALLOC (cnt, sizeof (xlator_t *), gf_dht_mt_xlator_t); - if (!conf->local_subvols) { + + /* FIX FIX : do this dynamically*/ + conf->local_nodeuuids = GF_CALLOC (cnt, sizeof (subvol_nodeuuid_t), + gf_dht_nodeuuids_t); + + if (!conf->local_subvols || !conf->local_nodeuuids) { return -1; } diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 3554f3f9c2d..19cccef537b 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -39,6 +39,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_fd_ctx_t, gf_tier_mt_qfile_array_t, gf_dht_ret_cache_t, + gf_dht_nodeuuids_t, gf_dht_mt_end }; #endif diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a5d00e37c0e..a1266502d63 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2439,6 +2439,43 @@ gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) { return 0; } + +/* Return value + * 0 : this node does not migrate the file + * 1 : this node migrates the file + */ +int +gf_defrag_should_i_migrate (xlator_t *this, int local_subvol_index, uuid_t gfid) +{ + int ret = 0; + int i = local_subvol_index; + char *str = NULL; + uint32_t hashval = 0; + int32_t index = 0; + dht_conf_t *conf = NULL; + char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, }; + + conf = this->private; + + /* Pure distribute */ + + if (conf->local_nodeuuids[i].count == 1) { + return 1; + } + + str = uuid_utoa_r (gfid, buf); + + ret = dht_hash_compute (this, 0, str, &hashval); + if (ret == 0) { + index = (hashval % conf->local_nodeuuids[i].count); + if (!gf_uuid_compare (conf->defrag->node_uuid, + conf->local_nodeuuids[i].uuids[index])) + ret = 1; + } + return ret; +} + + int gf_defrag_migrate_single_file (void *opaque) { @@ -2517,6 +2554,13 @@ gf_defrag_migrate_single_file (void *opaque) goto out; } + if (!gf_defrag_should_i_migrate (this, rebal_entry->local_subvol_index, + entry->d_stat.ia_gfid)) { + gf_msg_debug (this->name, 0, "Don't migrate %s ", + entry_loc.path); + goto out; + } + gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid); gf_uuid_copy (entry_loc.pargfid, loc->gfid); @@ -2531,6 +2575,7 @@ gf_defrag_migrate_single_file (void *opaque) goto out; } + hashed_subvol = dht_subvol_get_hashed (this, &entry_loc); if (!hashed_subvol) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2953,6 +2998,8 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, goto out; } + tmp_container->local_subvol_index = i; + tmp_container->df_entry->d_stat = df_entry->d_stat; tmp_container->df_entry->d_ino = df_entry->d_ino; @@ -4032,6 +4079,33 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc) } + +int +dht_get_local_subvols_and_nodeuuids (xlator_t *this, dht_conf_t *conf, + loc_t *loc) +{ + + dict_t *dict = NULL; + int ret = -1; + + /* Find local subvolumes */ + ret = syncop_getxattr (this, loc, &dict, + GF_REBAL_FIND_LOCAL_SUBVOL, + NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local " + "subvolume determination failed with error: %d", + -ret); + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + + int gf_defrag_start_crawl (void *data) { @@ -4056,6 +4130,7 @@ gf_defrag_start_crawl (void *data) gf_boolean_t is_tier_detach = _gf_false; call_frame_t *statfs_frame = NULL; xlator_t *old_THIS = NULL; + int j = 0; this = data; if (!this) @@ -4184,14 +4259,8 @@ gf_defrag_start_crawl (void *data) goto out; } - /* Find local subvolumes */ - ret = syncop_getxattr (this, &loc, &dict, - GF_REBAL_FIND_LOCAL_SUBVOL, - NULL, NULL); + ret = dht_get_local_subvols_and_nodeuuids (this, conf, &loc); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local " - "subvolume determination failed with error: %d", - -ret); ret = -1; goto out; } @@ -4199,6 +4268,11 @@ gf_defrag_start_crawl (void *data) for (i = 0 ; i < conf->local_subvols_cnt; i++) { gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvols " "are %s", conf->local_subvols[i]->name); + for (j = 0; j < conf->local_nodeuuids[i].count; j++) { + gf_msg (this->name, GF_LOG_INFO, 0, 0, + "node uuids are %s", + uuid_utoa(conf->local_nodeuuids[i].uuids[j])); + } } ret = gf_defrag_total_file_cnt (this, &loc); diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index a8aebe00f69..e4b910eb0e6 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -198,10 +198,17 @@ out: static int tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { - int ret = -1; - dict_t *dict = NULL; - char *uuid_str = NULL; - uuid_t node_uuid = {0,}; + int ret = -1; + dict_t *dict = NULL; + char *uuid_str = NULL; + uuid_t node_uuid = {0,}; + char *dup_str = NULL; + char *str = NULL; + char *save_ptr = NULL; + int count = 0; + uint32_t hashval = 0; + int32_t index = 0; + char buf[GF_UUID_BUF_SIZE] = {0,}; GF_VALIDATE_OR_GOTO ("tier", this, out); GF_VALIDATE_OR_GOTO (this->name, loc, out); @@ -215,15 +222,56 @@ tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) goto out; } + + /* This returns multiple node-uuids now - one for each brick + * of the subvol. + */ + if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, "Failed to get node-uuid for %s", loc->path); goto out; } + dup_str = gf_strdup (uuid_str); + str = dup_str; + + /* How many uuids returned? + * No need to check if one of these is that of the current node. + */ + + count = 1; + while ((str = strchr (str, ' '))) { + count++; + str++; + } + + /* Only one node-uuid - pure distribute? */ + if (count == 1) + goto check_node; + + uuid_utoa_r (loc->gfid, buf); + ret = dht_hash_compute (this, 0, buf, &hashval); + if (ret == 0) { + index = (hashval % count); + } + + count = 0; + str = dup_str; + while ((uuid_str = strtok_r (str, " ", &save_ptr))) { + if (count == index) + break; + count++; + str = NULL; + } + + +check_node: + if (gf_uuid_parse (uuid_str, node_uuid)) { gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, "uuid_parse failed for %s", loc->path); + ret = -1; goto out; } @@ -239,6 +287,7 @@ out: if (dict) dict_unref(dict); + GF_FREE (dup_str); return ret; } |