summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmar Tumballi <amar@gluster.com>2011-09-09 09:42:51 +0530
committerVijay Bellur <vijay@gluster.com>2011-09-13 02:10:12 -0700
commit25daa42911d2ff697880ee29c591cac5f2abebed (patch)
tree9555284c052e1e205909e91f578a8b46b522ec56
parent17e57f27c714c94dd5d9fa91650f83d069f2f4e4 (diff)
support for de-commissioning a node using 'remove-brick'
to achieve this, we now create volume-file with 'decommissioned-nodes' option in distribute volume, then just perform the rebalance set of operations (with 'force' flag set). now onwards, the 'remove-brick' (with 'start' option) operation tries to migrate data from removed bricks to existing bricks. 'remove-brick' also supports similar options as of replace-brick. * (no options) -> works as 'force', will have the current behavior of remove-brick, ie., no data-migration, volume changes. * start (starts remove-brick with data-migration/draining process, which takes care of migrating data and once complete, will commit the changes to volume file) * pause (stop data migration, but keep the volume file intact with extra options whatever is set) * abort (stop data-migration, and fall back to old configuration) * commit (if volume is stopped, commits the changes to volumefile) * force (stops the data-migration and commits the changes to volume file) Change-Id: I3952bcfbe604a0952e68b6accace7014d5e401d3 BUG: 1952 Reviewed-on: http://review.gluster.com/118 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--cli/src/cli-cmd-parser.c51
-rw-r--r--cli/src/cli-cmd-volume.c18
-rw-r--r--cli/src/cli-rpc-ops.c160
-rw-r--r--cli/src/cli.h2
-rw-r--r--rpc/xdr/src/cli1-xdr.c22
-rw-r--r--rpc/xdr/src/cli1-xdr.h29
-rw-r--r--rpc/xdr/src/cli1-xdr.x25
-rw-r--r--xlators/cluster/dht/src/dht-common.c62
-rw-r--r--xlators/cluster/dht/src/dht-common.h3
-rw-r--r--xlators/cluster/dht/src/dht-helper.c6
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c14
-rw-r--r--xlators/cluster/dht/src/dht.c62
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c369
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c11
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c104
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h34
-rw-r--r--xlators/storage/posix/src/posix.c2
22 files changed, 855 insertions, 177 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 3e68b2cce..8ea6581af 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -753,7 +753,7 @@ out:
int32_t
cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options)
+ dict_t **options, int *question)
{
dict_t *dict = NULL;
char *volname = NULL;
@@ -765,6 +765,10 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
int32_t j = 0;
char *tmp_brick = NULL;
char *tmp_brick1 = NULL;
+ char *opwords[] = { "start", "commit", "pause", "abort", "status",
+ "force", NULL };
+ char *w = NULL;
+ int32_t command = GF_OP_CMD_NONE;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -782,19 +786,53 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
GF_ASSERT (volname);
ret = dict_set_str (dict, "volname", volname);
-
if (ret)
goto out;
+ w = str_getunamb (words[wordcount - 1], opwords);
+ if (!w) {
+ /* Should be default 'force' */
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
+ } else {
+ /* handled this option */
+ wordcount--;
+ if (!strcmp ("start", w)) {
+ command = GF_OP_CMD_START;
+ } else if (!strcmp ("commit", w)) {
+ command = GF_OP_CMD_COMMIT;
+ if (question)
+ *question = 1;
+ } else if (!strcmp ("pause", w)) {
+ command = GF_OP_CMD_PAUSE;
+ } else if (!strcmp ("abort", w)) {
+ command = GF_OP_CMD_ABORT;
+ } else if (!strcmp ("status", w)) {
+ command = GF_OP_CMD_STATUS;
+ } else if (!strcmp ("force", w)) {
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
+ } else {
+ GF_ASSERT (!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
+ }
+
if (wordcount < 4) {
ret = -1;
goto out;
}
- brick_index = 3;
-
+ ret = dict_set_int32 (dict, "command", command);
if (ret)
- goto out;
+ gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d",
+ command);
+
+
+ brick_index = 3;
tmp_index = brick_index;
tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
@@ -805,7 +843,7 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
ret = -1;
goto out;
}
-
+
tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
if (!tmp_brick1) {
@@ -850,7 +888,6 @@ cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
}
ret = dict_set_int32 (dict, "count", brick_count);
-
if (ret)
goto out;
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 68c5ef578..16dc32328 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -800,6 +800,7 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
gf_answer_t answer = GF_ANSWER_NO;
int sent = 0;
int parse_error = 0;
+ int need_question = 0;
const char *question = "Removing brick(s) can result in data loss. "
"Do you want to Continue?";
@@ -808,7 +809,8 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
if (!frame)
goto out;
- ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options);
+ ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
+ &need_question);
if (ret) {
cli_usage_out (word->pattern);
@@ -816,11 +818,13 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
goto out;
}
- answer = cli_cmd_get_confirmation (state, question);
-
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ /* we need to ask question only in case of 'commit or force' */
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
}
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
@@ -1304,7 +1308,7 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_add_brick_cbk,
"add brick to volume <VOLNAME>"},
- { "volume remove-brick <VOLNAME> <BRICK> ...",
+ { "volume remove-brick <VOLNAME> <BRICK> ... {start|pause|abort|status|commit|force}",
cli_cmd_volume_remove_brick_cbk,
"remove brick from volume <VOLNAME>"},
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 3d6ce25ef..d7a5988f2 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -856,23 +856,36 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
"rebalance process");
goto done;
}
- if (rsp.op_errno == 0)
+
+ switch (rsp.op_errno) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
status = "not started";
- if (rsp.op_errno == 1)
+ break;
+ case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
status = "step 1: layout fix in progress";
- if (rsp.op_errno == 2)
+ break;
+ case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
status = "step 2: data migration in progress";
- if (rsp.op_errno == 3)
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
status = "stopped";
- if (rsp.op_errno == 4)
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
status = "completed";
- if (rsp.op_errno == 5)
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
status = "failed";
- if (rsp.op_errno == 6)
+ break;
+ case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
status = "step 1: layout fix complete";
- if (rsp.op_errno == 7)
+ break;
+ case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
status = "step 2: data migration complete";
-
+ break;
+ case GF_DEFRAG_STATUS_PAUSED:
+ status = "paused";
+ break;
+ }
if (rsp.files && (rsp.op_errno == 1)) {
cli_out ("rebalance %s: fixed layout %"PRId64,
status, rsp.files);
@@ -1064,6 +1077,87 @@ out:
return ret;
}
+int
+gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf2_cli_defrag_vol_rsp rsp = {0,};
+ char *status = "unknown";
+ int ret = 0;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp,
+ (xdrproc_t)xdr_gf2_cli_defrag_vol_rsp);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_ERROR, "error");
+ goto out;
+ }
+
+ ret = rsp.op_ret;
+ if (rsp.op_ret == -1) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_out ("%s", rsp.op_errstr);
+ else
+ cli_out ("failed to get the status of "
+ "remove-brick process");
+ goto out;
+ }
+
+ switch (rsp.op_errno) {
+ case GF_DEFRAG_STATUS_NOT_STARTED:
+ status = "not started";
+ break;
+ case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
+ case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
+ case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
+ status = "in progress";
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ status = "stopped";
+ break;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
+ status = "completed";
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ status = "failed";
+ break;
+ case GF_DEFRAG_STATUS_PAUSED:
+ status = "paused";
+ break;
+ }
+
+ if (rsp.files && (rsp.op_errno == 1)) {
+ cli_out ("remove-brick %s: fixed layout %"PRId64,
+ status, rsp.files);
+ goto out;
+ }
+ if (rsp.files && (rsp.op_errno == 6)) {
+ cli_out ("remove-brick %s: fixed layout %"PRId64,
+ status, rsp.files);
+ goto out;
+ }
+ if (rsp.files) {
+ cli_out ("remove-brick %s: decommissioned %"PRId64
+ " files of size %"PRId64, status,
+ rsp.files, rsp.size);
+ goto out;
+ }
+
+ cli_out ("remove-brick %s", status);
+
+out:
+ if (rsp.op_errstr)
+ free (rsp.op_errstr); //malloced by xdr
+ if (rsp.volname)
+ free (rsp.volname); //malloced by xdr
+ cli_cmd_broadcast_response (ret);
+ return ret;
+}
+
int
gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
@@ -2160,8 +2254,11 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
gf1_cli_remove_brick_req req = {0,};
+ gf1_cli_defrag_vol_req status_req = {0,};
int ret = 0;
- dict_t *dict = NULL;
+ dict_t *dict = NULL;
+ int32_t command = 0;
+ char *volname = NULL;
if (!frame || !this || !data) {
ret = -1;
@@ -2170,30 +2267,45 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &req.volname);
-
+ ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
ret = dict_get_int32 (dict, "count", &req.count);
-
if (ret)
goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.bricks.bricks_val,
- (size_t *)&req.bricks.bricks_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+ ret = dict_get_int32 (dict, "command", &command);
+ if (ret)
goto out;
- }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REMOVE_BRICK, NULL,
- this, gf_cli3_1_remove_brick_cbk,
- (xdrproc_t) xdr_gf1_cli_remove_brick_req);
+ if (command != GF_OP_CMD_STATUS) {
+ req.volname = volname;
+
+ ret = dict_allocate_and_serialize (dict,
+ &req.bricks.bricks_val,
+ (size_t *)&req.bricks.bricks_len);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
+ GLUSTER_CLI_REMOVE_BRICK, NULL,
+ this, gf_cli3_1_remove_brick_cbk,
+ (xdrproc_t) xdr_gf1_cli_remove_brick_req);
+ } else {
+ /* Need rebalance status to e sent :-) */
+ status_req.volname = volname;
+ status_req.cmd = GF_DEFRAG_CMD_STATUS;
+ ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog,
+ GLUSTER_CLI_DEFRAG_VOLUME, NULL,
+ this, gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t) xdr_gf1_cli_defrag_vol_req);
+
+ }
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
diff --git a/cli/src/cli.h b/cli/src/cli.h
index d3e1fc21b..1e0d69cd8 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -212,7 +212,7 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
int32_t
cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options);
+ dict_t **options, int *question);
int32_t
cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
index 250efc935..9030d3067 100644
--- a/rpc/xdr/src/cli1-xdr.c
+++ b/rpc/xdr/src/cli1-xdr.c
@@ -38,6 +38,17 @@ xdr_gf_cli_defrag_type (XDR *xdrs, gf_cli_defrag_type *objp)
}
bool_t
+xdr_gf_defrag_status_t (XDR *xdrs, gf_defrag_status_t *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
xdr_gf1_cluster_type (XDR *xdrs, gf1_cluster_type *objp)
{
register int32_t *buf;
@@ -60,6 +71,17 @@ xdr_gf1_cli_replace_op (XDR *xdrs, gf1_cli_replace_op *objp)
}
bool_t
+xdr_gf1_op_commands (XDR *xdrs, gf1_op_commands *objp)
+{
+ register int32_t *buf;
+ buf = NULL;
+
+ if (!xdr_enum (xdrs, (enum_t *) objp))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
xdr_gf_quota_type (XDR *xdrs, gf_quota_type *objp)
{
register int32_t *buf;
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
index 0d606e79f..d502c30b4 100644
--- a/rpc/xdr/src/cli1-xdr.h
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -42,9 +42,23 @@ enum gf_cli_defrag_type {
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
GF_DEFRAG_CMD_START_MIGRATE_DATA = 1 + 4,
GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE = 1 + 5,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 6,
};
typedef enum gf_cli_defrag_type gf_cli_defrag_type;
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED = 0,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED = 1,
+ GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED = 2,
+ GF_DEFRAG_STATUS_STOPPED = 3,
+ GF_DEFRAG_STATUS_COMPLETE = 4,
+ GF_DEFRAG_STATUS_FAILED = 5,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE = 6,
+ GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE = 7,
+ GF_DEFRAG_STATUS_PAUSED = 8,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
enum gf1_cluster_type {
GF_CLUSTER_TYPE_NONE = 0,
GF_CLUSTER_TYPE_STRIPE = 0 + 1,
@@ -64,6 +78,17 @@ enum gf1_cli_replace_op {
};
typedef enum gf1_cli_replace_op gf1_cli_replace_op;
+enum gf1_op_commands {
+ GF_OP_CMD_NONE = 0,
+ GF_OP_CMD_START = 0 + 1,
+ GF_OP_CMD_COMMIT = 0 + 2,
+ GF_OP_CMD_PAUSE = 0 + 3,
+ GF_OP_CMD_ABORT = 0 + 4,
+ GF_OP_CMD_STATUS = 0 + 5,
+ GF_OP_CMD_COMMIT_FORCE = 0 + 6,
+};
+typedef enum gf1_op_commands gf1_op_commands;
+
enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_NONE = 0,
GF_QUOTA_OPTION_TYPE_ENABLE = 0 + 1,
@@ -593,8 +618,10 @@ typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
#if defined(__STDC__) || defined(__cplusplus)
extern bool_t xdr_gf_cli_defrag_type (XDR *, gf_cli_defrag_type*);
+extern bool_t xdr_gf_defrag_status_t (XDR *, gf_defrag_status_t*);
extern bool_t xdr_gf1_cluster_type (XDR *, gf1_cluster_type*);
extern bool_t xdr_gf1_cli_replace_op (XDR *, gf1_cli_replace_op*);
+extern bool_t xdr_gf1_op_commands (XDR *, gf1_op_commands*);
extern bool_t xdr_gf_quota_type (XDR *, gf_quota_type*);
extern bool_t xdr_gf1_cli_friends_list (XDR *, gf1_cli_friends_list*);
extern bool_t xdr_gf1_cli_get_volume (XDR *, gf1_cli_get_volume*);
@@ -663,8 +690,10 @@ extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
#else /* K&R C */
extern bool_t xdr_gf_cli_defrag_type ();
+extern bool_t xdr_gf_defrag_status_t ();
extern bool_t xdr_gf1_cluster_type ();
extern bool_t xdr_gf1_cli_replace_op ();
+extern bool_t xdr_gf1_op_commands ();
extern bool_t xdr_gf_quota_type ();
extern bool_t xdr_gf1_cli_friends_list ();
extern bool_t xdr_gf1_cli_get_volume ();
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 9fc9f02d2..ff2f09af3 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -4,7 +4,20 @@
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
GF_DEFRAG_CMD_START_MIGRATE_DATA,
- GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE
+ GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE,
+ GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
+} ;
+
+ enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE,
+ GF_DEFRAG_STATUS_PAUSED
} ;
enum gf1_cluster_type {
@@ -24,6 +37,16 @@
GF_REPLACE_OP_COMMIT_FORCE
} ;
+ enum gf1_op_commands {
+ GF_OP_CMD_NONE = 0,
+ GF_OP_CMD_START,
+ GF_OP_CMD_COMMIT,
+ GF_OP_CMD_PAUSE,
+ GF_OP_CMD_ABORT,
+ GF_OP_CMD_STATUS,
+ GF_OP_CMD_COMMIT_FORCE
+} ;
+
enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_NONE = 0,
GF_QUOTA_OPTION_TYPE_ENABLE,
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 6f8594e30..e221e10ab 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1690,6 +1690,46 @@ dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
}
int
+dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr)
+{
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+
+ ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
+
+ if (!strcmp (value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev->this)
+ conf->decommissioned_bricks[i] = prev->this;
+ }
+ }
+
+out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
+ DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP);
+ }
+ return 0;
+
+}
+
+int
dht_setxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xattr, int flags)
{
@@ -1771,6 +1811,28 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
}
+ tmp = dict_get (xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (__is_root_gfid (loc->inode->gfid) != 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
+ local->key = gf_strdup (value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0 ; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND (frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr,
+ loc, GF_XATTR_PATHINFO_KEY);
+ }
+ return 0;
+ }
+
tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
if (tmp) {
gf_log (this->name, GF_LOG_INFO,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index ab1b82af2..3545c0f99 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -202,6 +202,9 @@ struct dht_conf {
uint32_t dir_spread_cnt;
struct syncenv *env; /* The env pointer to the rebalance synctask */
+
+ /* to keep track of nodes which are decomissioned */
+ xlator_t **decommissioned_bricks;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 99abe023b..d8138067e 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -579,6 +579,12 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
return -1;
}
+ conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+
return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 882e0209e..1c881be39 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -460,8 +460,22 @@ static inline int
dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
{
int i = 0;
+ int j = 0;
int err = 0;
int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = -EINVAL;
+ break;
+ }
+ }
+ }
for (i = 0; i < layout->cnt; i++) {
err = layout->list[i].err;
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 87a575654..d9499a407 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -255,6 +255,47 @@ out:
int
+dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup (bricks);
+ node = strtok_r (dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp (conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] =
+ conf->subvolumes[i];
+ gf_log (this->name, GF_LOG_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r (NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+out:
+ if (dup_brick)
+ GF_FREE (dup_brick);
+
+ return ret;
+}
+
+int
reconfigure (xlator_t *this, dict_t *options)
{
dht_conf_t *conf = NULL;
@@ -299,6 +340,12 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
options, uint32, out);
+ if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ }
+
ret = 0;
out:
return ret;
@@ -360,14 +407,14 @@ init (xlator_t *this)
goto err;
}
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+ if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
+ if (ret == -1)
+ goto err;
}
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
+ ret = dht_layouts_init (this, conf);
+ if (ret == -1) {
goto err;
}
@@ -501,5 +548,8 @@ struct volume_options options[] = {
{ .key = {"directory-layout-spread"},
.type = GF_OPTION_TYPE_INT,
},
+ { .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 8b3a03b6f..8832c69ed 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -312,7 +312,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "distribute");
/* Do not allow remove-brick if the volume is plain stripe */
- if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->sub_count)) {
+ if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
+ (volinfo->brick_count == volinfo->sub_count)) {
snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed");
gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
ret = -1;
@@ -321,8 +322,8 @@ glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than the replica count
or stripe count */
- if (((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || (volinfo->type == GF_CLUSTER_TYPE_STRIPE))
- && !(volinfo->brick_count <= volinfo->sub_count)) {
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ !(volinfo->brick_count <= volinfo->sub_count)) {
if (volinfo->sub_count && (count % volinfo->sub_count != 0)) {
snprintf (err_str, 2048, "Remove brick incorrect"
" brick count of %d for %s %d",
@@ -512,16 +513,20 @@ out:
int
-glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick)
+glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate)
{
-
glusterd_brickinfo_t *brickinfo = NULL;
char *dup_brick = NULL;
- int32_t ret = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
GF_ASSERT (volinfo);
GF_ASSERT (brick);
+ priv = THIS->private;
+ GF_ASSERT (priv);
+
dup_brick = gf_strdup (brick);
if (!dup_brick)
goto out;
@@ -534,15 +539,26 @@ glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick)
if (ret)
goto out;
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_stop (volinfo, brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to stop "
- "glusterfs, ret: %d", ret);
- goto out;
+ if (!uuid_compare (brickinfo->uuid, priv->uuid)) {
+ /* Only if the brick is in this glusterd, do the rebalance */
+ if (need_migrate)
+ *need_migrate = 1;
+ }
+
+ if (force) {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
+ "glusterfs, ret: %d", ret);
+ goto out;
+ }
}
+ glusterd_delete_brick (volinfo, brickinfo);
+ goto out;
}
- glusterd_delete_brick (volinfo, brickinfo);
+
+ brickinfo->decommissioned = 1;
out:
if (dup_brick)
GF_FREE (dup_brick);
@@ -700,17 +716,18 @@ out:
}
int
-glusterd_op_stage_remove_brick (dict_t *dict)
+glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- dict_t *ctx = NULL;
- char *errstr = NULL;
- int32_t brick_count = 0;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = {0,};
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
ret = dict_get_str (dict, "volname", &volname);
-
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
goto out;
@@ -723,25 +740,64 @@ glusterd_op_stage_remove_brick (dict_t *dict)
goto out;
}
- if (glusterd_is_defrag_on(volinfo)) {
- ctx = glusterd_op_get_ctx ();
- errstr = gf_strdup("Rebalance is in progress. Please retry"
- " after completion");
- if (!errstr) {
- ret = -1;
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ errstr = gf_strdup ("no remove-brick command issued");
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_START:
+ {
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf (msg, sizeof (msg), "Volume %s needs to be started "
+ "before remove-brick (you can use 'force' or "
+ "'commit' to override this behavior)",
+ volinfo->volname);
+ errstr = gf_strdup (msg);
+ gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
- ret = dict_set_dynstr (ctx, "errstr", errstr);
- if (ret) {
- GF_FREE (errstr);
- gf_log ("", GF_LOG_DEBUG,
- "failed to set errstr ctx");
+ if (glusterd_is_defrag_on(volinfo)) {
+ errstr = gf_strdup("Rebalance is in progress. Please retry"
+ " after completion");
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
goto out;
}
+ break;
+ }
- ret = -1;
- goto out;
+ case GF_OP_CMD_PAUSE:
+ case GF_OP_CMD_ABORT:
+ {
+ if (!volinfo->decommission_in_progress) {
+ errstr = gf_strdup("remove-brick is not in progress");
+ gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ break;
+ }
+
+ case GF_OP_CMD_COMMIT:
+ if (volinfo->decommission_in_progress) {
+ errstr = gf_strdup ("use 'force' option as migration "
+ "is in progress");
+ goto out;
+ }
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+ break;
}
ret = dict_get_int32 (dict, "count", &brick_count);
@@ -750,41 +806,96 @@ glusterd_op_stage_remove_brick (dict_t *dict)
goto out;
}
+ ret = 0;
if (volinfo->brick_count == brick_count) {
- ctx = glusterd_op_get_ctx ();
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR,
- "Operation Context is not present");
- ret = -1;
- goto out;
- }
errstr = gf_strdup ("Deleting all the bricks of the "
"volume is not allowed");
- if (!errstr) {
- gf_log ("", GF_LOG_ERROR, "Out of memory");
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (ctx, "errstr", errstr);
- if (ret) {
- GF_FREE (errstr);
- gf_log ("", GF_LOG_DEBUG,
- "failed to set pump status in ctx");
- goto out;
- }
-
ret = -1;
goto out;
}
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret && errstr) {
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
return ret;
}
int
+glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status)
+{
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+
+ switch (status) {
+ case GF_DEFRAG_STATUS_PAUSED:
+ case GF_DEFRAG_STATUS_FAILED:
+ /* No changes required in the volume file.
+ everything should remain as is */
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ /* Fall back to the old volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ }
+ break;
+
+ case GF_DEFRAG_STATUS_COMPLETE:
+ /* Done with the task, you can remove the brick from the
+ volume file */
+ list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ if (!brickinfo->decommissioned)
+ continue;
+ gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s",
+ brickinfo->path);
+ brickinfo->decommissioned = 0;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_stop (volinfo, brickinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to stop glusterfs (%d)", ret);
+ }
+ }
+ glusterd_delete_brick (volinfo, brickinfo);
+ }
+ break;
+
+ default:
+ GF_ASSERT (!"cbk function called with wrong status");
+ break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to write volume files (%d)", ret);
+
+ ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to store volume info (%d)", ret);
+
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_check_generate_start_nfs ();
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Unable to start nfs process (%d)", ret);
+ }
+
+ volinfo->decommission_in_progress = 0;
+ return 0;
+}
+
+
+int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
{
int ret = 0;
@@ -848,15 +959,20 @@ out:
}
int
-glusterd_op_remove_brick (dict_t *dict)
+glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t flag = 0;
+ char err_str[4096] = {0,};
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
ret = dict_get_str (dict, "volname", &volname);
@@ -866,12 +982,99 @@ glusterd_op_remove_brick (dict_t *dict)
}
ret = glusterd_volinfo_find (volname, &volinfo);
-
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
goto out;
}
+ ret = dict_get_int32 (dict, "command", &flag);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = -1;
+ switch (cmd) {
+ case GF_OP_CMD_NONE:
+ goto out;
+
+ case GF_OP_CMD_STATUS:
+ ret = 0;
+ goto out;
+
+ case GF_OP_CMD_PAUSE:
+ {
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ }
+
+ /* rebalance '_cbk()' will take care of volume file updates */
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_ABORT:
+ {
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ }
+
+ /* rebalance '_cbk()' will take care of volume file updates */
+ ret = 0;
+ goto out;
+ }
+
+ case GF_OP_CMD_START:
+ force = 0;
+ break;
+
+ case GF_OP_CMD_COMMIT:
+ force = 1;
+ break;
+
+ case GF_OP_CMD_COMMIT_FORCE:
+
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->defrag == (void *)1)
+ volinfo->defrag = NULL;
+
+ if (volinfo->defrag) {
+ LOCK (&volinfo->defrag->lock);
+ /* Fake 'rebalance-complete' so the graph change
+ happens right away */
+ volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+
+ UNLOCK (&volinfo->defrag->lock);
+ }
+ ret = 0;
+ /* Graph change happens in rebalance _cbk function,
+ no need to do anything here */
+ goto out;
+ }
+
+ force = 1;
+ break;
+ }
+
ret = dict_get_int32 (dict, "count", &count);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to get count");
@@ -887,26 +1090,46 @@ glusterd_op_remove_brick (dict_t *dict)
goto out;
}
- ret = glusterd_op_perform_remove_brick (volinfo, brick);
+ ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
+ (i == 1) ? &need_rebalance : NULL);
if (ret)
goto out;
i++;
}
ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret)
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
goto out;
-
- volinfo->defrag_status = 0;
+ }
ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
-
- if (ret)
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
goto out;
+ }
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_check_generate_start_nfs ();
+ volinfo->defrag_status = 0;
+ if (!force && need_rebalance) {
+ /* perform the rebalance operations */
+ ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
+ GF_DEFRAG_CMD_START_FORCE,
+ glusterd_remove_brick_migrate_cbk);
+ if (!ret)
+ volinfo->decommission_in_progress = 1;
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to start the rebalance");
+ }
+ } else {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_check_generate_start_nfs ();
+ }
out:
+ if (ret && err_str[0] && op_errstr)
+ *op_errstr = gf_strdup (err_str);
+
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 3b30fb080..53556984a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2297,7 +2297,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REMOVE_BRICK:
- ret = glusterd_op_stage_remove_brick (dict);
+ ret = glusterd_op_stage_remove_brick (dict, op_errstr);
break;
case GD_OP_LOG_FILENAME:
@@ -2387,7 +2387,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REMOVE_BRICK:
- ret = glusterd_op_remove_brick (dict);
+ ret = glusterd_op_remove_brick (dict, op_errstr);
break;
case GD_OP_LOG_FILENAME:
@@ -2565,6 +2565,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)
int32_t i = 1;
char key[256] = {0,};
glusterd_pending_node_t *pending_node = NULL;
+ int32_t force = 0;
ret = dict_get_str (dict, "volname", &volname);
@@ -2586,6 +2587,12 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ ret = dict_get_int32 (dict, "force", &force);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_INFO, "force flag is not set");
+ ret = 0;
+ goto out;
+ }
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index a013d0adf..defaf947f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -46,6 +46,7 @@
#include "cli1-xdr.h"
#include "xdr-generic.h"
+/* return values - 0: success, +ve: stopped, -ve: failure */
int
gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
{
@@ -66,7 +67,8 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
if (!fd)
goto out;
- if (defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) {
+ if ((defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) ||
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) {
strcpy (force_string, "force");
} else {
strcpy (force_string, "not-force");
@@ -105,9 +107,11 @@ gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
}
UNLOCK (&defrag->lock);
- if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) {
+ if (volinfo->defrag_status !=
+ GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) {
+ /* It can be one of 'stopped|paused|commit' etc */
closedir (fd);
- ret = -1;
+ ret = 1;
goto out;
}
}
@@ -144,6 +148,7 @@ out:
return ret;
}
+/* return values - 0: success, +ve: stopped, -ve: failure */
int
gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)
{
@@ -187,9 +192,11 @@ gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)
break;
}
- if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) {
+ if (volinfo->defrag_status !=
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) {
+ /* It can be one of 'stopped|paused|commit' etc */
closedir (fd);
- ret = -1;
+ ret = 1;
goto out;
}
}
@@ -210,6 +217,7 @@ glusterd_defrag_start (void *data)
int ret = -1;
struct stat stbuf = {0,};
+ THIS = volinfo->xl;
defrag = volinfo->defrag;
if (!defrag)
goto out;
@@ -240,8 +248,10 @@ glusterd_defrag_start (void *data)
/* Step 1: Fix layout of all the directories */
ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount);
+ if (ret < 0)
+ volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ /* in both 'stopped' or 'failure' cases goto out */
if (ret) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
goto out;
}
@@ -257,8 +267,10 @@ glusterd_defrag_start (void *data)
/* Step 2: Iterate over directories to move data */
ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount);
+ if (ret < 0)
+ volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ /* in both 'stopped' or 'failure' cases goto out */
if (ret) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
goto out;
}
@@ -267,7 +279,8 @@ glusterd_defrag_start (void *data)
}
/* Completed whole process */
- if (defrag->cmd == GF_DEFRAG_CMD_START)
+ if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE))
volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
volinfo->rebalance_files = defrag->total_files;
@@ -281,9 +294,13 @@ out:
ret = runcmd ("umount", "-l", defrag->mount, NULL);
LOCK_DESTROY (&defrag->lock);
+
+ if (defrag->cbk_fn) {
+ defrag->cbk_fn (volinfo, volinfo->defrag_status);
+ }
+
GF_FREE (defrag);
}
-
return NULL;
}
@@ -332,7 +349,7 @@ glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files,
LOCK (&volinfo->defrag->lock);
{
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED;
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
*files = volinfo->defrag->total_files;
*size = volinfo->defrag->total_data;
}
@@ -497,7 +514,7 @@ out:
int
glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
- size_t len, int cmd)
+ size_t len, int cmd, defrag_cbk_fn_t cbk)
{
int ret = -1;
glusterd_defrag_info_t *defrag = NULL;
@@ -552,6 +569,9 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
+ if (cbk)
+ defrag->cbk_fn = cbk;
+
ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start,
volinfo);
if (ret) {
@@ -635,7 +655,7 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)
case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
{
ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
- cli_req.cmd);
+ cli_req.cmd, NULL);
rsp.op_ret = ret;
break;
}
@@ -845,7 +865,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START_MIGRATE_DATA:
case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
- cmd);
+ cmd, NULL);
break;
case GF_DEFRAG_CMD_STOP:
ret = glusterd_defrag_stop (volinfo, &files, &size,
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 060d40bed..4a4289910 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -1486,7 +1486,7 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
volinfo->brick_count++;
- ret = glusterd_op_perform_remove_brick (volinfo, old_brick);
+ ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL);
if (ret)
goto out;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 53fdcf484..dab075db0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -321,6 +321,10 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
value);
+ snprintf (value, sizeof(value), "%d", brickinfo->decommissioned);
+ ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ value);
+
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -1637,6 +1641,9 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
pmap = pmap_registry_get (THIS);
if (pmap->last_alloc <= brickinfo->rdma_port)
pmap->last_alloc = brickinfo->rdma_port + 1;
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ gf_string2int (value, &brickinfo->decommissioned);
} else {
gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
key);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 3ca232a9a..61bda195d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -63,6 +63,7 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 8694f7536..5b247b6a9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -548,6 +548,8 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo)
goto out;
}
+ new_volinfo->xl = THIS;
+
*volinfo = new_volinfo;
ret = 0;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index cb198dfb9..d0533b1fc 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1811,10 +1811,15 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
char **cluster_args = NULL;
int i = 0;
int j = 0;
- int ret = 0;
+ int ret = -1;
xlator_t *xl = NULL;
xlator_t *txl = NULL;
xlator_t *trav = NULL;
+ int removed_bricks = 0;
+ int index_of_removed_brick = 0;
+ char *removed_bricklist = NULL;
+ char volume_name[1024] = {0,};
+ int idx = 0;
volname = volinfo->volname;
dict = volinfo->dict;
@@ -1824,7 +1829,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
gf_log ("", GF_LOG_ERROR,
"volume inconsistency: brick count is 0");
- return -1;
+ goto out;
}
if (volinfo->sub_count && volinfo->sub_count < volinfo->brick_count &&
volinfo->brick_count % volinfo->sub_count != 0) {
@@ -1834,7 +1839,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
"number of bricks per cluster (%d) in a multi-cluster "
"setup",
volinfo->brick_count, volinfo->sub_count);
- return -1;
+ goto out;
}
get_transport_type (volinfo, set_dict, transt, _gf_false);
@@ -1844,19 +1849,32 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
i = 0;
list_for_each_entry (brick, &volinfo->bricks, brick_list) {
+ ret = -1;
xl = volgen_graph_add_nolink (graph, "protocol/client",
"%s-client-%d", volname, i);
if (!xl)
- return -1;
+ goto out;
ret = xlator_set_option (xl, "remote-host", brick->hostname);
if (ret)
- return -1;
+ goto out;
ret = xlator_set_option (xl, "remote-subvolume", brick->path);
if (ret)
- return -1;
+ goto out;
ret = xlator_set_option (xl, "transport-type", transt);
if (ret)
- return -1;
+ goto out;
+ if (brick->decommissioned) {
+ if (!removed_bricklist) {
+ removed_bricklist = GF_CALLOC (16 * GF_UNIT_KB,
+ 1, gf_common_mt_char);
+ index_of_removed_brick = i;
+ }
+ if (removed_bricks)
+ strcat (removed_bricklist, ",");
+ snprintf (volume_name, 1024, "%s-client-%d", volname, i);
+ strcat (removed_bricklist, volume_name);
+ removed_bricks++;
+ }
i++;
}
@@ -1866,7 +1884,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
"differs from brick count (%d)", i,
volinfo->brick_count);
- return -1;
+ goto out;
}
sub_count = volinfo->sub_count;
@@ -1880,15 +1898,18 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
/* Replicate after the clients, then stripe */
- if (volinfo->replica_count == 0)
- return -1;
+ if (volinfo->replica_count == 0) {
+ ret = -1;
+ goto out;
+ }
sub_count = volinfo->replica_count;
cluster_args = replicate_args;
break;
default:
gf_log ("", GF_LOG_ERROR, "volume inconsistency: "
"unrecognized clustering type");
- return -1;
+ ret = -1;
+ goto out;
}
i = 0;
@@ -1901,14 +1922,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
cluster_args[0],
cluster_args[1],
volname, j);
- if (!xl)
- return -1;
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
j++;
}
ret = volgen_xlator_link (xl, trav);
if (ret)
- return -1;
+ goto out;
if (trav == txl)
break;
@@ -1928,14 +1951,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
cluster_args[0],
cluster_args[1],
volname, j);
- if (!xl)
- return -1;
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
j++;
}
ret = volgen_xlator_link (xl, trav);
if (ret)
- return -1;
+ goto out;
if (trav == txl)
break;
@@ -1953,8 +1978,10 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
if (dist_count > 1) {
xl = volgen_graph_add_nolink (graph, "cluster/distribute",
"%s-dht", volname);
- if (!xl)
- return -1;
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
trav = xl;
for (i = 0; i < dist_count; i++)
@@ -1962,28 +1989,50 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
for (; trav != xl; trav = trav->prev) {
ret = volgen_xlator_link (xl, trav);
if (ret)
- return -1;
+ goto out;
+ }
+
+ if (removed_bricks) {
+ if (volinfo->sub_count) {
+ idx = index_of_removed_brick / volinfo->sub_count;
+ if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) {
+ snprintf (volume_name, 1024, "%s-replicate-%d",
+ volname, idx);
+ strcpy (removed_bricklist, volume_name);
+ } else if (volinfo->type != GF_CLUSTER_TYPE_NONE) {
+ snprintf (volume_name, 1024, "%s-stripe-%d ",
+ volname, idx);
+ strcpy (removed_bricklist, volume_name);
+ }
+ }
+ ret = xlator_set_option (xl, "decommissioned-bricks",
+ removed_bricklist);
+ if (ret)
+ goto out;
}
}
ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
if (ret == -1)
- return -1;
+ goto out;
+
if (ret) {
xl = volgen_graph_add (graph, "features/quota", volname);
- if (!xl)
- return -1;
+ if (!xl) {
+ ret = -1;
+ goto out;
+ }
}
ret = volgen_graph_set_options_generic (graph, set_dict, volname,
&perfxl_option_handler);
if (ret)
- return -1;
+ goto out;
xl = volgen_graph_add_as (graph, "debug/io-stats", volname);
if (!xl)
- return -1;
+ goto out;
ret = volgen_graph_set_options_generic (graph, set_dict, "client",
&loglevel_option_handler);
@@ -1991,6 +2040,11 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
if (!ret)
ret = volgen_graph_set_options_generic (graph, set_dict, "client",
&sys_loglevel_option_handler);
+
+out:
+ if (removed_bricklist)
+ GF_FREE (removed_bricklist);
+
return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 86eeaeb1c..c8fa82819 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -45,7 +45,7 @@
#include "glusterd1-xdr.h"
#include "protocol-common.h"
#include "glusterd-pmap.h"
-
+#include "cli1-xdr.h"
#define GLUSTERD_MAX_VOLUME_NAME 1000
#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
@@ -132,6 +132,7 @@ struct glusterd_brickinfo {
gf_brick_status_t status;
struct rpc_clnt *rpc;
gf_timer_t *timer;
+ int decommissioned;
};
typedef struct glusterd_brickinfo glusterd_brickinfo_t;
@@ -142,16 +143,11 @@ struct gf_defrag_brickinfo_ {
int size;
};
-typedef enum gf_defrag_status_ {
- GF_DEFRAG_STATUS_NOT_STARTED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED,
- GF_DEFRAG_STATUS_STOPED,
- GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
- GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE,
-} gf_defrag_status_t;
+struct glusterd_volinfo_;
+typedef struct glusterd_volinfo_ glusterd_volinfo_t;
+
+typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status);
struct glusterd_defrag_info_ {
uint64_t total_files;
@@ -163,6 +159,8 @@ struct glusterd_defrag_info_ {
char mount[1024];
char databuf[131072];
struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */
+
+ defrag_cbk_fn_t cbk_fn;
};
@@ -219,9 +217,10 @@ struct glusterd_volinfo_ {
char *logdir;
dict_t *gsync_slaves;
-};
-typedef struct glusterd_volinfo_ glusterd_volinfo_t;
+ int decommission_in_progress;
+ xlator_t *xl;
+};
typedef struct glusterd_pending_node_ {
void *node;
@@ -540,6 +539,8 @@ int glusterd_handle_cli_start_volume (rpcsvc_request_t *req);
int glusterd_handle_cli_stop_volume (rpcsvc_request_t *req);
int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req);
+int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk);
/* op-sm functions */
int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr);
@@ -565,9 +566,9 @@ int glusterd_op_stop_volume (dict_t *dict);
int glusterd_op_delete_volume (dict_t *dict);
int glusterd_op_add_brick (dict_t *dict, char **op_errstr);
-int glusterd_op_remove_brick (dict_t *dict);
+int glusterd_op_remove_brick (dict_t *dict, char **op_errstr);
int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr);
-int glusterd_op_stage_remove_brick (dict_t *dict);
+int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr);
int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr);
int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
@@ -575,7 +576,8 @@ int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict);
/* misc */
void glusterd_do_replace_brick (void *data);
-int glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick);
+int glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate);
int glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags);
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 09818ce89..a8b7b67a4 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -2545,7 +2545,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
}
goto done;
}
- if (loc->inode && IA_ISREG (loc->inode->ia_type) && name &&
+ if (loc->inode && name &&
(strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
snprintf (host_buf, 1024, "<POSIX:%s:%s>", priv->hostname,
real_path);