summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-common.c64
-rw-r--r--xlators/cluster/dht/src/dht-common.h9
-rw-r--r--xlators/cluster/dht/src/dht-helper.c7
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c97
-rw-r--r--xlators/cluster/dht/src/tier.c57
6 files changed, 215 insertions, 20 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index fe0dc0d3455..22061ef5682 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -2998,6 +2998,8 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this,
out:
return ret;
}
+
+
int
dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xattr,
@@ -3013,6 +3015,11 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
char *next_uuid_str = NULL;
char *saveptr = NULL;
uuid_t node_uuid = {0,};
+ char *uuid_list_copy = NULL;
+ int count = 0;
+ int i = 0;
+ int index = 0;
+ int found = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -3022,6 +3029,10 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
conf = this->private;
+ VALIDATE_OR_GOTO (conf->defrag, out);
+
+ gf_msg_debug (this->name, 0, "subvol %s returned", prev->name);
+
LOCK (&frame->lock);
{
this_call_cnt = --local->call_cnt;
@@ -3045,6 +3056,15 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto unlock;
}
+ /* As DHT will not know details of its child xlators
+ * we need to parse this twice to get the count first
+ * and allocate memory later.
+ */
+ count = 0;
+ index = conf->local_subvols_cnt;
+
+ uuid_list_copy = gf_strdup (uuid_list);
+
for (uuid_str = strtok_r (uuid_list, " ", &saveptr);
uuid_str;
uuid_str = next_uuid_str) {
@@ -3054,24 +3074,57 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_UUID_PARSE_ERROR,
"Failed to parse uuid"
- " failed for %s", prev->name);
+ " for %s", prev->name);
local->op_ret = -1;
local->op_errno = EINVAL;
goto unlock;
}
+ count++;
if (gf_uuid_compare (node_uuid, conf->defrag->node_uuid)) {
gf_msg_debug (this->name, 0, "subvol %s does not"
"belong to this node",
prev->name);
} else {
+
+ /* handle multiple bricks of the same replica
+ * on the same node */
+ if (found)
+ continue;
conf->local_subvols[(conf->local_subvols_cnt)++]
- = prev;
+ = prev;
+ found = 1;
gf_msg_debug (this->name, 0, "subvol %s belongs to"
" this node", prev->name);
- break;
}
}
+
+ if (!found) {
+ local->op_ret = 0;
+ goto unlock;
+ }
+
+ conf->local_nodeuuids[index].count = count;
+ conf->local_nodeuuids[index].uuids
+ = GF_CALLOC (count, sizeof (uuid_t), 1);
+
+ /* The node-uuids are guaranteed to be returned in the same
+ * order as the bricks
+ * A null node-uuid is returned for a brick that is down.
+ */
+
+ saveptr = NULL;
+ i = 0;
+
+ for (uuid_str = strtok_r (uuid_list_copy, " ", &saveptr);
+ uuid_str;
+ uuid_str = next_uuid_str) {
+
+ next_uuid_str = strtok_r (NULL, " ", &saveptr);
+ gf_uuid_parse (uuid_str,
+ conf->local_nodeuuids[index].uuids[i]);
+ i++;
+ }
}
local->op_ret = 0;
@@ -3089,8 +3142,13 @@ dht_find_local_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
unwind:
+
+ GF_FREE (conf->local_nodeuuids[index].uuids);
+ conf->local_nodeuuids[index].uuids = NULL;
+
DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, xdata);
out:
+ GF_FREE (uuid_list_copy);
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index ffe53d99ce1..c74beb26d96 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -373,6 +373,7 @@ struct dht_container {
xlator_t *this;
loc_t *parent_loc;
dict_t *migrate_data;
+ int local_subvol_index;
};
typedef enum tier_mode_ {
@@ -444,6 +445,12 @@ typedef struct gf_tier_conf {
char volname[GD_VOLUME_NAME_MAX + 1];
} gf_tier_conf_t;
+typedef struct subvol_nodeuuids {
+ uuid_t *uuids;
+ int count;
+} subvol_nodeuuid_t;
+
+
struct gf_defrag_info_ {
uint64_t total_files;
uint64_t total_data;
@@ -494,6 +501,7 @@ struct gf_defrag_info_ {
/* lock migration flag */
gf_boolean_t lock_migration_enabled;
+
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -577,6 +585,7 @@ struct dht_conf {
/*local subvol storage for rebalance*/
xlator_t **local_subvols;
+ subvol_nodeuuid_t *local_nodeuuids;
int32_t local_subvols_cnt;
/*
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 81d1dffa0af..8e7989822cd 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -1000,7 +1000,12 @@ dht_init_local_subvolumes (xlator_t *this, dht_conf_t *conf)
conf->local_subvols = GF_CALLOC (cnt, sizeof (xlator_t *),
gf_dht_mt_xlator_t);
- if (!conf->local_subvols) {
+
+ /* FIX FIX : do this dynamically*/
+ conf->local_nodeuuids = GF_CALLOC (cnt, sizeof (subvol_nodeuuid_t),
+ gf_dht_nodeuuids_t);
+
+ if (!conf->local_subvols || !conf->local_nodeuuids) {
return -1;
}
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 3554f3f9c2d..19cccef537b 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -39,6 +39,7 @@ enum gf_dht_mem_types_ {
gf_dht_mt_fd_ctx_t,
gf_tier_mt_qfile_array_t,
gf_dht_ret_cache_t,
+ gf_dht_nodeuuids_t,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 0c83c15a4dc..90e45b1c293 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2094,6 +2094,43 @@ gf_defrag_ctx_subvols_init (dht_dfoffset_ctx_t *offset_var, xlator_t *this) {
return 0;
}
+
+/* Return value
+ * 0 : this node does not migrate the file
+ * 1 : this node migrates the file
+ */
+int
+gf_defrag_should_i_migrate (xlator_t *this, int local_subvol_index, uuid_t gfid)
+{
+ int ret = 0;
+ int i = local_subvol_index;
+ char *str = NULL;
+ uint32_t hashval = 0;
+ int32_t index = 0;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {0, };
+
+ conf = this->private;
+
+ /* Pure distribute */
+
+ if (conf->local_nodeuuids[i].count == 1) {
+ return 1;
+ }
+
+ str = uuid_utoa_r (gfid, buf);
+
+ ret = dht_hash_compute (this, 0, str, &hashval);
+ if (ret == 0) {
+ index = (hashval % conf->local_nodeuuids[i].count);
+ if (!gf_uuid_compare (conf->defrag->node_uuid,
+ conf->local_nodeuuids[i].uuids[index]))
+ ret = 1;
+ }
+ return ret;
+}
+
+
int
gf_defrag_migrate_single_file (void *opaque)
{
@@ -2165,6 +2202,13 @@ gf_defrag_migrate_single_file (void *opaque)
goto out;
}
+ if (!gf_defrag_should_i_migrate (this, rebal_entry->local_subvol_index,
+ entry->d_stat.ia_gfid)) {
+ gf_msg_debug (this->name, 0, "Don't migrate %s ",
+ entry_loc.path);
+ goto out;
+ }
+
gf_uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
gf_uuid_copy (entry_loc.pargfid, loc->gfid);
@@ -2179,6 +2223,7 @@ gf_defrag_migrate_single_file (void *opaque)
goto out;
}
+
inode = inode_link (entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
inode_unref (entry_loc.inode);
/* use the inode returned by inode_link */
@@ -2659,6 +2704,8 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
goto out;
}
+ tmp_container->local_subvol_index = i;
+
tmp_container->df_entry->d_stat = df_entry->d_stat;
tmp_container->df_entry->d_ino = df_entry->d_ino;
@@ -3740,6 +3787,33 @@ int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc)
}
+
+int
+dht_get_local_subvols_and_nodeuuids (xlator_t *this, dht_conf_t *conf,
+ loc_t *loc)
+{
+
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ /* Find local subvolumes */
+ ret = syncop_getxattr (this, loc, &dict,
+ GF_REBAL_FIND_LOCAL_SUBVOL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
int
gf_defrag_start_crawl (void *data)
{
@@ -3757,12 +3831,12 @@ gf_defrag_start_crawl (void *data)
glusterfs_ctx_t *ctx = NULL;
dht_methods_t *methods = NULL;
int i = 0;
- int thread_index = 0;
- int err = 0;
- int thread_spawn_count = 0;
+ int thread_index = 0;
+ int err = 0;
+ int thread_spawn_count = 0;
pthread_t *tid = NULL;
- gf_boolean_t is_tier_detach = _gf_false;
-
+ gf_boolean_t is_tier_detach = _gf_false;
+ int j = 0;
this = data;
if (!this)
@@ -3876,14 +3950,8 @@ gf_defrag_start_crawl (void *data)
goto out;
}
- /* Find local subvolumes */
- ret = syncop_getxattr (this, &loc, &dict,
- GF_REBAL_FIND_LOCAL_SUBVOL,
- NULL, NULL);
+ ret = dht_get_local_subvols_and_nodeuuids (this, conf, &loc);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local "
- "subvolume determination failed with error: %d",
- -ret);
ret = -1;
goto out;
}
@@ -3891,6 +3959,11 @@ gf_defrag_start_crawl (void *data)
for (i = 0 ; i < conf->local_subvols_cnt; i++) {
gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvols "
"are %s", conf->local_subvols[i]->name);
+ for (j = 0; j < conf->local_nodeuuids[i].count; j++) {
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+ "node uuids are %s",
+ uuid_utoa(conf->local_nodeuuids[i].uuids[j]));
+ }
}
ret = gf_defrag_total_file_cnt (this, &loc);
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 41032743c04..4cbcc81f311 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -198,10 +198,17 @@ out:
static int
tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
{
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_str = NULL;
- uuid_t node_uuid = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
+ char *dup_str = NULL;
+ char *str = NULL;
+ char *save_ptr = NULL;
+ int count = 0;
+ uint32_t hashval = 0;
+ int32_t index = 0;
+ char buf[GF_UUID_BUF_SIZE] = {0,};
GF_VALIDATE_OR_GOTO ("tier", this, out);
GF_VALIDATE_OR_GOTO (this->name, loc, out);
@@ -215,15 +222,56 @@ tier_check_same_node (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag)
goto out;
}
+
+ /* This returns multiple node-uuids now - one for each brick
+ * of the subvol.
+ */
+
if (dict_get_str (dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
"Failed to get node-uuid for %s", loc->path);
goto out;
}
+ dup_str = gf_strdup (uuid_str);
+ str = dup_str;
+
+ /* How many uuids returned?
+ * No need to check if one of these is that of the current node.
+ */
+
+ count = 1;
+ while ((str = strchr (str, ' '))) {
+ count++;
+ str++;
+ }
+
+ /* Only one node-uuid - pure distribute? */
+ if (count == 1)
+ goto check_node;
+
+ uuid_utoa_r (loc->gfid, buf);
+ ret = dht_hash_compute (this, 0, buf, &hashval);
+ if (ret == 0) {
+ index = (hashval % count);
+ }
+
+ count = 0;
+ str = dup_str;
+ while ((uuid_str = strtok_r (str, " ", &save_ptr))) {
+ if (count == index)
+ break;
+ count++;
+ str = NULL;
+ }
+
+
+check_node:
+
if (gf_uuid_parse (uuid_str, node_uuid)) {
gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR,
"uuid_parse failed for %s", loc->path);
+ ret = -1;
goto out;
}
@@ -239,6 +287,7 @@ out:
if (dict)
dict_unref(dict);
+ GF_FREE (dup_str);
return ret;
}