diff options
author | Amar Tumballi <amar@gluster.com> | 2011-09-09 09:42:51 +0530 |
---|---|---|
committer | Vijay Bellur <vijay@gluster.com> | 2011-09-13 02:10:12 -0700 |
commit | 25daa42911d2ff697880ee29c591cac5f2abebed (patch) | |
tree | 9555284c052e1e205909e91f578a8b46b522ec56 /xlators/cluster/dht | |
parent | 17e57f27c714c94dd5d9fa91650f83d069f2f4e4 (diff) |
support for de-commissioning a node using 'remove-brick'
to achieve this, we now create volume-file with
'decommissioned-nodes' option in distribute volume, then just
perform the rebalance set of operations (with 'force' flag set).
now onwards, the 'remove-brick' (with 'start' option) operation tries
to migrate data from removed bricks to existing bricks.
'remove-brick' also supports similar options as of replace-brick.
* (no options) -> works as 'force', will have the current behavior
of remove-brick, ie., no data-migration, volume changes.
* start (starts remove-brick with data-migration/draining process,
which takes care of migrating data and once complete, will
commit the changes to volume file)
* pause (stop data migration, but keep the volume file intact with
extra options whatever is set)
* abort (stop data-migration, and fall back to old configuration)
* commit (if volume is stopped, commits the changes to volumefile)
* force (stops the data-migration and commits the changes to
volume file)
Change-Id: I3952bcfbe604a0952e68b6accace7014d5e401d3
BUG: 1952
Reviewed-on: http://review.gluster.com/118
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vijay@gluster.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 62 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 14 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 62 |
5 files changed, 141 insertions, 6 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 6f8594e30..e221e10ab 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1690,6 +1690,46 @@ dht_common_setxattr_cbk (call_frame_t *frame, void *cookie, } int +dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr) +{ + int i = -1; + int ret = -1; + char *value = NULL; + dht_local_t *local = NULL; + dht_conf_t *conf = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + + local = frame->local; + prev = cookie; + conf = this->private; + + if (op_ret == -1) + goto out; + + + ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value); + if (ret) + goto out; + + if (!strcmp (value, local->key)) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->subvolumes[i] == prev->this) + conf->decommissioned_bricks[i] = prev->this; + } + } + +out: + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) { + DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP); + } + return 0; + +} + +int dht_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, int flags) { @@ -1771,6 +1811,28 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, } + tmp = dict_get (xattr, "decommission-brick"); + if (tmp) { + /* This operation should happen only on '/' */ + if (__is_root_gfid (loc->inode->gfid) != 0) { + op_errno = ENOTSUP; + goto err; + } + + memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095)); + local->key = gf_strdup (value); + local->call_cnt = conf->subvolume_cnt; + + for (i = 0 ; i < conf->subvolume_cnt; i++) { + /* Get the pathinfo, and then compare */ + STACK_WIND (frame, dht_checking_pathinfo_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->getxattr, + loc, GF_XATTR_PATHINFO_KEY); + } + return 0; + } + tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY); if (tmp) { gf_log (this->name, GF_LOG_INFO, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index ab1b82af2..3545c0f99 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -202,6 +202,9 @@ struct dht_conf { uint32_t dir_spread_cnt; struct syncenv *env; /* The env pointer to the rebalance synctask */ + + /* to keep track of nodes which are decomissioned */ + xlator_t **decommissioned_bricks; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 99abe023b..d8138067e 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -579,6 +579,12 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) return -1; } + conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *), + gf_dht_mt_xlator_t); + if (!conf->decommissioned_bricks) { + return -1; + } + return 0; } diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 882e0209e..1c881be39 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -460,8 +460,22 @@ static inline int dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout) { int i = 0; + int j = 0; int err = 0; int count = 0; + dht_conf_t *conf = NULL; + + /* Gets in use only for replace-brick, remove-brick */ + conf = this->private; + for (i = 0; i < layout->cnt; i++) { + for (j = 0; j < conf->subvolume_cnt; j++) { + if (conf->decommissioned_bricks[j] && + conf->decommissioned_bricks[j] == layout->list[i].xlator) { + layout->list[i].err = -EINVAL; + break; + } + } + } for (i = 0; i < layout->cnt; i++) { err = layout->list[i].err; diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 87a575654..d9499a407 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -255,6 +255,47 @@ out: int +dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf, + const char *bricks) +{ + int i = 0; + int ret = -1; + char *tmpstr = NULL; + char *dup_brick = NULL; + char *node = NULL; + + if (!conf || !bricks) + goto out; + + dup_brick = gf_strdup (bricks); + node = strtok_r (dup_brick, ",", &tmpstr); + while (node) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!strcmp (conf->subvolumes[i]->name, node)) { + conf->decommissioned_bricks[i] = + conf->subvolumes[i]; + gf_log (this->name, GF_LOG_INFO, + "decommissioning subvolume %s", + conf->subvolumes[i]->name); + break; + } + } + if (i == conf->subvolume_cnt) { + /* Wrong node given. */ + goto out; + } + node = strtok_r (NULL, ",", &tmpstr); + } + + ret = 0; +out: + if (dup_brick) + GF_FREE (dup_brick); + + return ret; +} + +int reconfigure (xlator_t *this, dict_t *options) { dht_conf_t *conf = NULL; @@ -299,6 +340,12 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); + if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { + ret = dht_parse_decommissioned_bricks (this, conf, temp_str); + if (ret == -1) + goto out; + } + ret = 0; out: return ret; @@ -360,14 +407,14 @@ init (xlator_t *this) goto err; } - ret = dht_layouts_init (this, conf); - if (ret == -1) { - goto err; + if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) { + ret = dht_parse_decommissioned_bricks (this, conf, temp_str); + if (ret == -1) + goto err; } - conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t), - gf_dht_mt_dht_du_t); - if (!conf->du_stats) { + ret = dht_layouts_init (this, conf); + if (ret == -1) { goto err; } @@ -501,5 +548,8 @@ struct volume_options options[] = { { .key = {"directory-layout-spread"}, .type = GF_OPTION_TYPE_INT, }, + { .key = {"decommissioned-bricks"}, + .type = GF_OPTION_TYPE_ANY, + }, { .key = {NULL} }, }; |