summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2020-09-02 09:30:45 +0530
committerAmar Tumballi <amar@kadalu.io>2020-09-03 07:23:40 +0530
commit718f030b3a30af43a65588d90b8cd0553c9b78cf (patch)
treeccdca967c11b80e4ed283714c23a7d95ec15eb82 /xlators
parent54d29a47dd5bd807b93ace02b7a6887d8cf26aad (diff)
Revert "dht: optimize rebalance crawl path"
Based on the discussion on the issue, it is decided that it is better to not have this implementation of the feature. This reverts commit 3af9443c770837abe4f54db399623380ab9767a7. Change-Id: I4e3bf18fc376cdb0cf29f1d98a915deca17c3496 Updates: #1422 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/dht/src/dht-common.c114
-rw-r--r--xlators/cluster/dht/src/dht-common.h11
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c400
-rw-r--r--xlators/cluster/dht/src/dht-shared.c4
4 files changed, 5 insertions, 524 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index c564a078da7..d874415699c 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -11461,117 +11461,3 @@ dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
/* Returning the first xlator error as all xlators have errors */
return layout->list[0].err;
}
-
-/* Get brick paths from all the local subvols and store for use.
- *
- * TODO: Make sure newly added brick is not picked for migration.
- * Otherwise there will be no rebalance as directory entries won't be present
- * on a newly added brick */
-int
-dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc)
-{
- dict_t *dict = NULL;
- gf_defrag_info_t *defrag = conf->defrag;
- char *key = NULL;
- char *tmp = NULL;
- char *str = NULL;
- char *token;
- char *saveptr = NULL;
- int i = 1;
- int j = 0;
- int ret = 0;
-
- key = gf_strdup("glusterfs.pathinfo");
- if (!key) {
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
- "failed to allocate "
- "memory");
- ret = -1;
- goto out;
- }
-
- defrag->local_brick_paths = GF_CALLOC(conf->local_subvols_cnt,
- sizeof(*defrag->local_brick_paths),
- gf_common_mt_pointer);
-
- for (j = 0; j < conf->local_subvols_cnt; j++) {
- ret = syncop_getxattr(conf->local_subvols[j], loc, &dict, key, NULL,
- NULL);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_WARNING, 0, 0,
- "failed to get path,"
- " errno %d",
- ret);
- /* TODO: We need not break out from here and can resume operation.
- * We need a place holder in gf_defrag_info_t to mark which
- * local_brick_paths we are working on. Right now, we blindly
- * take defrag->local_brick_path[0]. This can be dynamic based on
- * need */
- goto out;
- }
-
- str = NULL;
- ret = dict_get_str(dict, key, &str);
- if (ret != 0) {
- gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "dict get failed for :%s",
- key);
- goto out;
- }
- if (str == NULL) {
- gf_msg(this->name, GF_LOG_ERROR, 0, 0, "key:%s not found", key);
- ret = -1;
- goto out;
- }
-
- if (!defrag->is_pure_distribute) {
- tmp = strstr(str, "REPLICATE");
- if (tmp) {
- defrag->is_pure_distribute = _gf_false;
- break;
- }
-
- /*TODO: fetching glusterfs.pathinfo on erasure volume is failing.
- *Function the old way till we get it resolved */
- tmp = strstr(str, "ERASURE");
- if (tmp) {
- defrag->is_pure_distribute = _gf_false;
- break;
- }
-
- defrag->is_pure_distribute = _gf_true;
- }
-
- saveptr = NULL;
-
- for (token = strtok_r(str, ":", &saveptr), i = 1; token;) {
- token = strtok_r(NULL, ":", &saveptr);
- i++;
- if (i == 3) {
- token = strtok_r(token, ">", &saveptr);
- break;
- } else {
- continue;
- }
- }
-
- defrag->local_brick_paths[j] = gf_strdup(token);
- }
-
-out:
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_INFO, 0, 0,
- "failed to get brick path. "
- "Will operate old way");
- for (j = 0; j < conf->local_subvols_cnt; j++) {
- GF_FREE(defrag->local_brick_paths[j]);
- }
- defrag->is_pure_distribute = _gf_false;
- }
-
- if (defrag->is_pure_distribute) {
- gf_msg(this->name, GF_LOG_INFO, 0, 0, "volume type : pure distribute");
- }
-
- GF_FREE(key);
- return ret;
-}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index e3fb434e516..07bbe204997 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -598,15 +598,6 @@ struct gf_defrag_info_ {
gf_boolean_t stats;
/* lock migration flag */
gf_boolean_t lock_migration_enabled;
-
- /* local system crawl */
- char **local_brick_paths;
-
- /* whether the volume is pure distribute */
- gf_boolean_t is_pure_distribute;
-
- /*TODO: Introduce a glusterd option to tune this behaviour*/
- gf_boolean_t operate_dist;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -1491,6 +1482,4 @@ dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
int
dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
-int
-dht_get_brick_paths(xlator_t *this, dht_conf_t *conf, loc_t *loc);
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 000fc7ef926..358e1c6d533 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -14,7 +14,6 @@
#include <signal.h>
#include <glusterfs/events.h>
#include "glusterfs/compat-errno.h" // for ENODATA on BSD
-#include <string.h>
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
@@ -4051,368 +4050,6 @@ out:
}
int
-gf_defrag_fix_layout_puredist(xlator_t *this, gf_defrag_info_t *defrag,
- loc_t *loc, dict_t *fix_layout,
- dict_t *migrate_data)
-{
- int ret = -1;
- loc_t entry_loc = {
- 0,
- };
- fd_t *fd = NULL;
- inode_t *linked_inode = NULL, *inode = NULL;
- dht_conf_t *conf = NULL;
- int should_commit_hash = 1;
- int perrno = 0;
- /* absolute brick path length */
- int brick_len = 0;
- /* dir path length (relative to gluster mount) */
- int dir_len = 0;
- /* absolute dir path length */
- int total_len = 0;
- struct dirent *entry = NULL;
- struct dirent scratch[2] = {{
- 0,
- }};
- DIR *dirp = NULL;
- int full_entry_length = 0;
- int entry_len = 0;
- char full_entry_path[4096] = {
- 0,
- };
- char full_dir_path[4096] = {
- 0,
- };
- ssize_t size = 0;
- uuid_t tmp_gfid;
- struct stat tmpbuf = {
- 0,
- };
- struct iatt iatt = {
- 0,
- };
-
- struct stat lstatbuf = {
- 0,
- };
- struct iatt stbuf = {
- 0,
- };
-
- conf = this->private;
- if (!conf) {
- ret = -1;
- goto out;
- }
-
- /*
- * Since the primary target for the following lookup is to figure out if the
- * entry still exists, going to do a direct stat call rather than going
- * through the whole gluster stack. There are some benefits of doing gluster
- * lookup, but this is redundant since we have done already one gluster
- * lookup in the parent function.
- *
- * Randomly selecting the first local subvol to read, since it is expected
- * that the directory structure is present in all the subvols identically
- */
-
- brick_len = strlen(defrag->local_brick_paths[0]);
- /* discarding the first "/" */
- dir_len = strlen(loc->path) - 1;
- /* Extra two: one for "/" at the end and one more for '\0'*/
- total_len = brick_len + dir_len + 2;
-
- snprintf(full_dir_path, total_len, "%s%s/", defrag->local_brick_paths[0],
- loc->path + 1);
-
- ret = sys_lstat(full_dir_path, &tmpbuf);
- if (ret == -1) {
- gf_log(this->name, GF_LOG_ERROR,
- "[absolutepath %s] directory "
- "not found, path %s error %d",
- full_dir_path, loc->path, errno);
- goto out;
- }
-
- dirp = sys_opendir(full_dir_path);
- if (!dirp) {
- ret = -1;
- gf_msg(this->name, GF_LOG_ERROR, errno, 0, "failed to open dir : %s",
- loc->path);
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- goto out;
- }
-
- while ((entry = sys_readdir(dirp, scratch)) != NULL) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
- if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..") ||
- !strcmp(entry->d_name, ".glusterfs"))
- continue;
-
- /* TODO: Need to add a check for _DIRENT_HAVE_D_TYPE flag to fall back
- to stat in case d_type is not defined */
- if (entry->d_type != DT_DIR) {
- continue;
- }
-
- entry_len = strlen(entry->d_name);
- full_entry_length = total_len + entry_len + 1; /* one more for "/"*/
-
- snprintf(full_entry_path, full_entry_length, "%s%s/", full_dir_path,
- entry->d_name);
-
- size = sys_lgetxattr(full_entry_path, GFID_XATTR_KEY, tmp_gfid, 16);
- if (size != 16) {
- gf_log(this->name, GF_LOG_ERROR, "gfid not found, path %s",
- full_entry_path);
- continue;
- }
-
- loc_wipe(&entry_loc);
-
- ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
- if (ret) {
- gf_log(this->name, GF_LOG_ERROR,
- "Child loc"
- " build failed for entry: %s",
- entry->d_name);
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
-
- goto out;
- } else {
- should_commit_hash = 0;
-
- continue;
- }
- }
-
- if (gf_uuid_is_null(tmp_gfid)) {
- gf_log(this->name, GF_LOG_ERROR,
- "%s/%s"
- " gfid not present",
- loc->path, entry->d_name);
- continue;
- }
-
- gf_uuid_copy(entry_loc.gfid, tmp_gfid);
-
- /*In case the gfid stored in the inode by inode_link
- *and the gfid obtained in the lookup differs, then
- *client3_3_lookup_cbk will return ESTALE and proper
- *error will be captured.
- */
- memset(&lstatbuf, 0, sizeof(struct stat));
- ret = sys_lstat(full_entry_path, &lstatbuf);
- if (ret == -1) {
- gf_msg(this->name, GF_LOG_ERROR, errno, 0, "lstat failed for %s",
- entry->d_name);
- }
-
- memset(&stbuf, 0, sizeof(struct iatt));
- iatt_from_stat(&stbuf, &lstatbuf);
- gf_uuid_copy(stbuf.ia_gfid, entry_loc.gfid);
- linked_inode = inode_link(entry_loc.inode, loc->inode, entry->d_name,
- &stbuf);
-
- inode = entry_loc.inode;
- entry_loc.inode = linked_inode;
- inode_unref(inode);
-
- if (gf_uuid_is_null(loc->gfid)) {
- gf_log(this->name, GF_LOG_ERROR,
- "%s/%s"
- " gfid not present",
- loc->path, entry->d_name);
- continue;
- }
-
- gf_uuid_copy(entry_loc.pargfid, loc->gfid);
-
- ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
- if (ret) {
- if (-ret == ENOENT || -ret == ESTALE) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
- "Dir:%s renamed or removed. "
- "Skipping",
- loc->path);
- ret = 0;
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- continue;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s",
- entry_loc.path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- ret = -1;
- goto out;
- } else {
- should_commit_hash = 0;
- continue;
- }
- }
- }
-
- /* A return value of 2 means, either process_dir or
- * lookup of a dir failed. Hence, don't commit hash
- * for the current directory*/
-
- ret = gf_defrag_fix_layout_puredist(this, defrag, &entry_loc,
- fix_layout, migrate_data);
-
- if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
- goto out;
- }
-
- if (ret && ret != 2) {
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
- "Fix layout failed for %s", entry_loc.path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
-
- goto out;
- } else {
- /* Let's not commit-hash if
- * gf_defrag_fix_layout failed*/
- continue;
- }
- }
- }
-
- ret = sys_closedir(dirp);
- if (ret) {
- gf_msg_debug(this->name, 0,
- "Failed to close dir %s. Reason :"
- " %s",
- full_dir_path, strerror(errno));
- ret = 0;
- }
-
- dirp = NULL;
-
- /* A directory layout is fixed only after its subdirs are healed to
- * any newly added bricks. If the layout is fixed before subdirs are
- * healed, the newly added brick will get a non-null layout.
- * Any subdirs which hash to that layout will no longer show up
- * in a directory listing until they are healed.
- */
-
- ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
-
- /* In case of a race where the directory is deleted just before
- * layout setxattr, the errors are updated in the layout structure.
- * We can use this information to make a decision whether the directory
- * is deleted entirely.
- */
- if (ret == 0) {
- ret = dht_dir_layout_error_check(this, loc->inode);
- ret = -ret;
- }
-
- if (ret) {
- if (-ret == ENOENT || -ret == ESTALE) {
- gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
- "Setxattr failed. Dir %s "
- "renamed or removed",
- loc->path);
- if (conf->decommission_subvols_cnt) {
- defrag->total_failures++;
- }
- ret = 0;
- goto out;
- } else {
- gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
- "Setxattr failed for %s", loc->path);
-
- defrag->total_failures++;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- ret = -1;
- goto out;
- }
- }
- }
-
- if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
- (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
-
- if (ret && (ret != 2)) {
- if (perrno == ENOENT || perrno == ESTALE) {
- ret = 0;
- goto out;
- } else {
- defrag->total_failures++;
-
- gf_msg(this->name, GF_LOG_ERROR, 0,
- DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
- "gf_defrag_process_dir failed for "
- "directory: %s",
- loc->path);
-
- if (conf->decommission_in_progress) {
- goto out;
- }
-
- should_commit_hash = 0;
- }
- } else if (ret == 2) {
- should_commit_hash = 0;
- }
- }
-
- gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
-
- if (should_commit_hash &&
- gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
- defrag->total_failures++;
-
- gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
- "Settle hash failed for %s", loc->path);
-
- ret = -1;
-
- if (conf->decommission_in_progress) {
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- goto out;
- }
- }
-
- ret = 0;
-out:
- loc_wipe(&entry_loc);
-
- if (fd)
- fd_unref(fd);
-
- if (ret == 0 && should_commit_hash == 0) {
- ret = 2;
- }
-
- if (dirp) {
- sys_closedir(dirp);
- }
-
- return ret;
-}
-
-int
dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
loc_t *loc)
{
@@ -4762,7 +4399,6 @@ gf_defrag_start_crawl(void *data)
pthread_t *tid = NULL;
pthread_t filecnt_thread;
gf_boolean_t fc_thread_started = _gf_false;
- int i = 0;
this = data;
if (!this)
@@ -4898,12 +4534,6 @@ gf_defrag_start_crawl(void *data)
goto out;
}
- ret = dht_get_brick_paths(this, conf, &loc);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING, "could not get brick path");
- ret = 0;
- }
-
/* Initialise the structures required for parallel migration */
ret = gf_defrag_parallel_migration_init(this, defrag, &tid,
&thread_index);
@@ -4921,23 +4551,11 @@ gf_defrag_start_crawl(void *data)
}
}
- /* TODO: Need to introduce a flag to safely operate in the old way */
- if (defrag->operate_dist && defrag->is_pure_distribute) {
- ret = gf_defrag_fix_layout_puredist(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
- } else {
- ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout,
- migrate_data);
- if (ret && ret != 2) {
- defrag->total_failures++;
- ret = -1;
- goto out;
- }
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret && ret != 2) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
}
if (ret != 2 &&
@@ -4983,14 +4601,6 @@ out:
}
UNLOCK(&defrag->lock);
- for (i = 0; i < conf->local_subvols_cnt; i++) {
- if (defrag->local_brick_paths[i]) {
- GF_FREE(defrag->local_brick_paths[i]);
- }
- }
-
- GF_FREE(defrag->local_brick_paths);
-
GF_FREE(defrag);
conf->defrag = NULL;
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index e9b5a773d3c..96caf40d0b1 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -700,10 +700,6 @@ dht_init(xlator_t *this)
pthread_cond_init(&defrag->fc_wakeup_cond, 0);
defrag->global_error = 0;
-
- defrag->is_pure_distribute = _gf_false;
-
- defrag->operate_dist = _gf_true;
}
conf->use_fallocate = 1;