summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishirng <shishirng@gluster.com>2012-01-18 15:29:15 +0530
committerVijay Bellur <vijay@gluster.com>2012-02-19 01:31:19 -0800
commit7ba1e1ed45cee56ef51b9c04df99c976546d5d04 (patch)
treed3e4121729d51852a120ba5f067aa8a64f39b624
parent061d70e8195d082043b071118333b7e3173fa3ec (diff)
cluster/dht: Rebalance will be a new glusterfs process
rebalance will not use any maintainance clients. It is replaced by syncops, with the volfile. Brickop (communication between glusterd<->glusterfs process) is used for status and stop commands. Dept-first traversal of dir is maintained, but data is migrated as and when encounterd. fix-layout (dir) do Complete migrate-data of dir fix-layout (subdir) done Rebalance state is saved in the vol file, for restart-ability. A disconnect event and pidfile state determine the defrag-status Signed-off-by: shishirng <shishirng@gluster.com> Change-Id: Iec6c80c84bbb2142d840242c28db3d5f5be94d01 BUG: 763844 Reviewed-on: http://review.gluster.com/2540 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
-rw-r--r--cli/src/cli-cmd-volume.c30
-rw-r--r--cli/src/cli-rpc-ops.c71
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c79
-rw-r--r--libglusterfs/src/glusterfs.h1
-rw-r--r--rpc/rpc-lib/src/protocol-common.h1
-rw-r--r--rpc/xdr/src/cli1-xdr.h16
-rw-r--r--rpc/xdr/src/cli1-xdr.x10
-rw-r--r--xlators/cluster/dht/src/dht-common.c67
-rw-r--r--xlators/cluster/dht/src/dht-common.h47
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c606
-rw-r--r--xlators/cluster/dht/src/dht.c46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c6
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c80
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c601
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c9
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c12
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c54
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h29
23 files changed, 1260 insertions, 532 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 6bea948e968..6ab1515e302 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -731,7 +731,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!dict)
goto out;
- if (!((wordcount == 4) || (wordcount == 5) || (wordcount == 6))) {
+ if (!((wordcount == 4) || (wordcount == 5))) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
@@ -741,7 +741,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
index = 3;
} else {
if (strcmp (words[3], "fix-layout") &&
- strcmp (words[3], "migrate-data")) {
+ strcmp (words[3], "start")) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
@@ -750,7 +750,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
}
if (strcmp (words[index], "start") && strcmp (words[index], "stop") &&
- strcmp (words[index], "status")) {
+ strcmp (words[index], "status") && strcmp (words[index], "force")) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
@@ -766,27 +766,19 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
if (wordcount == 5) {
- ret = dict_set_str (dict, "start-type", (char *)words[3]);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[4]);
- if (ret)
- goto out;
- }
-
- /* 'force' option is valid only for the 'migrate-data' key */
- if (wordcount == 6) {
- if (strcmp (words[3], "migrate-data") ||
- strcmp (words[4], "start") ||
- strcmp (words[5], "force")) {
+ if ((strcmp (words[3], "fix-layout") ||
+ strcmp (words[4], "start")) &&
+ (strcmp (words[3], "start") ||
+ strcmp (words[4], "force"))) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
}
- ret = dict_set_str (dict, "start-type", "migrate-data-force");
+
+ ret = dict_set_str (dict, "option", (char *)words[4]);
if (ret)
goto out;
- ret = dict_set_str (dict, "command", (char *)words[4]);
+ ret = dict_set_str (dict, "command", (char *)words[3]);
if (ret)
goto out;
}
@@ -1776,7 +1768,7 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_remove_brick_cbk,
"remove brick from volume <VOLNAME>"},
- { "volume rebalance <VOLNAME> [fix-layout|migrate-data] {start|stop|status} [force]",
+ { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",
cli_cmd_volume_defrag_cbk,
"rebalance operations"},
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 6ed380bffa8..d1888415cef 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1052,9 +1052,9 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
volname);
} else {
snprintf (msg, sizeof (msg),
- "stopped rebalance process of volume %s \n"
- "(after rebalancing %"PRId64" files totaling"
- " %"PRId64" bytes)", volname, files, size);
+ "Stopped rebalance process on volume %s \n"
+ "(after rebalancing %"PRId64" bytes - "
+ "%"PRId64" files)", volname, size, files);
}
goto done;
}
@@ -1065,7 +1065,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
"%s", rsp.op_errstr);
else
snprintf (msg, sizeof (msg),
- "failed to get the status of "
+ "Failed to get the status of "
"rebalance process");
goto done;
}
@@ -1074,11 +1074,8 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
case GF_DEFRAG_STATUS_NOT_STARTED:
status = "not started";
break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
- status = "step 1: layout fix in progress";
- break;
- case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
- status = "step 2: data migration in progress";
+ case GF_DEFRAG_STATUS_STARTED:
+ status = "in progress";
break;
case GF_DEFRAG_STATUS_STOPPED:
status = "stopped";
@@ -1089,38 +1086,17 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
case GF_DEFRAG_STATUS_FAILED:
status = "failed";
break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- status = "step 1: layout fix complete";
- break;
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
- status = "step 2: data migration complete";
- break;
- case GF_DEFRAG_STATUS_PAUSED:
- status = "paused";
- break;
}
- if (files && (rsp.op_errno == 1)) {
- snprintf (msg, sizeof (msg),
- "rebalance %s: fixed layout %"PRId64,
- status, files);
- goto done;
- }
- if (files && (rsp.op_errno == 6)) {
- snprintf (msg, sizeof (msg),
- "rebalance %s: fixed layout %"PRId64,
- status, files);
- goto done;
- }
- if (files) {
- snprintf (msg, sizeof (msg),
- "rebalance %s: rebalanced %"PRId64
+ if (files || size || lookup) {
+ snprintf (msg, sizeof(msg),
+ "Rebalance %s: rebalanced %"PRId64
" files of size %"PRId64" (total files"
" scanned %"PRId64")", status,
files, size, lookup);
goto done;
}
- snprintf (msg, sizeof (msg), "rebalance %s", status);
+ snprintf (msg, sizeof (msg), "Rebalance %s", status);
goto done;
}
@@ -1129,7 +1105,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
snprintf (msg, sizeof (msg),
- "starting rebalance on volume %s has been %s",
+ "Starting rebalance on volume %s has been %s",
volname, (rsp.op_ret) ? "unsuccessful":
"successful");
@@ -1398,24 +1374,18 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
case GF_DEFRAG_STATUS_NOT_STARTED:
status = "not started";
break;
- case GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
+ case GF_DEFRAG_STATUS_STARTED:
status = "in progress";
break;
case GF_DEFRAG_STATUS_STOPPED:
status = "stopped";
break;
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
status = "completed";
break;
case GF_DEFRAG_STATUS_FAILED:
status = "failed";
break;
- case GF_DEFRAG_STATUS_PAUSED:
- status = "paused";
- break;
}
if (rsp.dict.dict_len) {
@@ -2479,20 +2449,19 @@ gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
if (strcmp (cmd_str, "start") == 0) {
cmd = GF_DEFRAG_CMD_START;
- ret = dict_get_str (dict, "start-type", &cmd_str);
+ ret = dict_get_str (dict, "option", &cmd_str);
if (!ret) {
- if (strcmp (cmd_str, "fix-layout") == 0) {
- cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
- }
- if (strcmp (cmd_str, "migrate-data") == 0) {
- cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA;
- }
- if (strcmp (cmd_str, "migrate-data-force") == 0) {
- cmd = GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE;
+ if (strcmp (cmd_str, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
}
}
goto done;
}
+
+ if (strcmp (cmd_str, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
if (strcmp (cmd_str, "stop") == 0) {
cmd = GF_DEFRAG_CMD_STOP;
goto done;
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index e2b658a98a2..08f8a05f9de 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -736,6 +736,79 @@ out:
return ret;
}
+
+int
+glusterfs_handle_defrag (rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT (req);
+ this = THIS;
+ GF_ASSERT (this);
+
+ ctx = glusterfs_ctx_get ();
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ any = active->first;
+ if (!xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator->notify (xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
+
+ ret = glusterfs_translator_info_response_send (req, ret,
+ msg, output);
+out:
+ if (dict)
+ dict_unref (dict);
+ if (xlator_req.input.input_val)
+ free (xlator_req.input.input_val); // malloced by xdr
+ if (output)
+ dict_unref (output);
+ if (xlator_req.name)
+ free (xlator_req.name); //malloced by xdr
+
+ return ret;
+
+}
int
glusterfs_handle_brick_status (rpcsvc_request_t *req)
{
@@ -887,6 +960,9 @@ glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
case GLUSTERD_BRICK_STATUS:
ret = glusterfs_handle_brick_status (req);
break;
+ case GLUSTERD_BRICK_XLATOR_DEFRAG:
+ ret = glusterfs_handle_defrag (req);
+ break;
default:
break;
}
@@ -943,7 +1019,8 @@ rpcsvc_actor_t glusterfs_actors[] = {
[GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},
[GLUSTERD_BRICK_XLATOR_HEAL] = { "TRANSLATOR HEAL", GLUSTERD_BRICK_XLATOR_HEAL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0}
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},
+ [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0}
};
struct rpcsvc_program glusterfs_mop_prog = {
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index ccfdc11dc17..ef9cdfe8591 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -383,6 +383,7 @@ typedef enum {
GF_EVENT_TRANSLATOR_INFO,
GF_EVENT_TRIGGER_HEAL,
GF_EVENT_AUTH_FAILED,
+ GF_EVENT_VOLUME_DEFRAG,
GF_EVENT_MAXVAL,
} glusterfs_event_t;
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 6ef4cb702c3..874f46e0b70 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -184,6 +184,7 @@ enum glusterd_brick_procnum {
GLUSTERD_BRICK_XLATOR_HEAL,
GLUSTERD_BRICK_STATUS,
GLUSTERD_BRICK_OP,
+ GLUSTERD_BRICK_XLATOR_DEFRAG,
GLUSTERD_BRICK_MAXVALUE,
};
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
index d136ec255b8..903b6ff724f 100644
--- a/rpc/xdr/src/cli1-xdr.h
+++ b/rpc/xdr/src/cli1-xdr.h
@@ -47,22 +47,16 @@ enum gf_cli_defrag_type {
GF_DEFRAG_CMD_STOP = 1 + 1,
GF_DEFRAG_CMD_STATUS = 1 + 2,
GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_MIGRATE_DATA = 1 + 4,
- GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE = 1 + 5,
- GF_DEFRAG_CMD_START_FORCE = 1 + 6,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
};
typedef enum gf_cli_defrag_type gf_cli_defrag_type;
enum gf_defrag_status_t {
GF_DEFRAG_STATUS_NOT_STARTED = 0,
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED = 1,
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED = 2,
- GF_DEFRAG_STATUS_STOPPED = 3,
- GF_DEFRAG_STATUS_COMPLETE = 4,
- GF_DEFRAG_STATUS_FAILED = 5,
- GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE = 6,
- GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE = 7,
- GF_DEFRAG_STATUS_PAUSED = 8,
+ GF_DEFRAG_STATUS_STARTED = 1,
+ GF_DEFRAG_STATUS_STOPPED = 2,
+ GF_DEFRAG_STATUS_COMPLETE = 3,
+ GF_DEFRAG_STATUS_FAILED = 4,
};
typedef enum gf_defrag_status_t gf_defrag_status_t;
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 5f491c7b47e..f45712ce0c1 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -3,21 +3,15 @@
GF_DEFRAG_CMD_STOP,
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
- GF_DEFRAG_CMD_START_MIGRATE_DATA,
- GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE,
GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
} ;
enum gf_defrag_status_t {
GF_DEFRAG_STATUS_NOT_STARTED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
GF_DEFRAG_STATUS_STOPPED,
GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED,
- GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
- GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE,
- GF_DEFRAG_STATUS_PAUSED
+ GF_DEFRAG_STATUS_FAILED
} ;
enum gf1_cluster_type {
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 29b3dca83e5..360a432cd5f 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4296,16 +4296,22 @@ dht_forget (xlator_t *this, inode_t *inode)
int
dht_notify (xlator_t *this, int event, void *data, ...)
{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ struct timeval time = {0,};
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
conf = this->private;
if (!conf)
@@ -4418,6 +4424,36 @@ dht_notify (xlator_t *this, int event, void *data, ...)
UNLOCK (&conf->subvolume_lock);
break;
+ case GF_EVENT_VOLUME_DEFRAG:
+ {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dict_get_int32 (dict, "rebalance-command",
+ (int32_t*)&cmd);
+ if (ret)
+ return ret;
+ LOCK (&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if (cmd == GF_DEFRAG_CMD_STATUS)
+ gf_defrag_status_get (defrag, output);
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ gf_defrag_stop (defrag, output);
+ }
+unlock:
+ UNLOCK (&defrag->lock);
+ return 0;
+ break;
+ }
+
default:
propagate = 1;
break;
@@ -4433,8 +4469,19 @@ dht_notify (xlator_t *this, int event, void *data, ...)
/* if all subvols have reported status, no need to hide anything
or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
+ if (have_heard_from_all) {
propagate = 1;
+ if (conf->defrag) {
+ ret = pthread_create (&conf->defrag->th, NULL,
+ gf_defrag_start, this);
+ if (ret) {
+ conf->defrag = NULL;
+ GF_FREE (conf->defrag);
+ kill (getpid(), SIGTERM);
+ }
+ }
+ }
+
if (!had_heard_from_all && have_heard_from_all) {
/* This is the first event which completes aggregation
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 749abe5380d..d97ef9f5853 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -175,6 +175,43 @@ struct dht_du {
};
typedef struct dht_du dht_du_t;
+enum gf_defrag_type {
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+};
+typedef enum gf_defrag_type gf_defrag_type;
+
+enum gf_defrag_status_t {
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+};
+typedef enum gf_defrag_status_t gf_defrag_status_t;
+
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ gf_lock_t lock;
+ int cmd;
+ pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ inode_t *root_inode;
+
+};
+
+typedef struct gf_defrag_info_ gf_defrag_info_t;
+
struct dht_conf {
gf_lock_t subvolume_lock;
int subvolume_cnt;
@@ -208,6 +245,9 @@ struct dht_conf {
/* to keep track of nodes which are decomissioned */
xlator_t **decommissioned_bricks;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
};
typedef struct dht_conf dht_conf_t;
@@ -608,6 +648,13 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
struct iatt *postparent);
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+void*
+gf_defrag_start (void *this);
#endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 21fb5a7ca20..a12ed153499 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -37,6 +37,7 @@ enum gf_dht_mem_types_ {
gf_switch_mt_switch_struct,
gf_dht_mt_subvol_time,
gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index dfd6f3b6ecc..46fc8773eff 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -669,7 +669,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
loc->path, from->name, strerror (errno));
}
- if (uuid_compare (empty_iatt.ia_gfid, loc->inode->gfid) == 0) {
+ if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
/* take out the source from namespace */
ret = syncop_unlink (from, loc);
if (ret) {
@@ -805,3 +805,607 @@ dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
frame, frame);
return ret;
}
+
+int
+gf_listener_stop (void)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ ctx = glusterfs_ctx_get ();
+ GF_ASSERT (ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = unlink (cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
+ }
+ }
+
+ if (ret) {
+ this = THIS;
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
+ "socket %s, error: %s", cmd_args->sock_file,
+ strerror (errno));
+ }
+ return ret;
+}
+
+void
+dht_build_root_inode (xlator_t *this, inode_t **inode)
+{
+ inode_table_t *itable = NULL;
+ uuid_t root_gfid = {0, };
+
+ itable = inode_table_new (0, this);
+ if (!itable)
+ return;
+
+ root_gfid[15] = 1;
+ *inode = inode_find (itable, root_gfid);
+}
+
+void
+dht_build_root_loc (inode_t *inode, loc_t *loc)
+{
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset (loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
+}
+
+
+/* return values: 1 -> error, bug ignore and continue
+ 0 -> proceed
+ -1 -> error, handle it */
+int32_t
+gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+{
+ /* if errno is not ENOSPC or ENOTCONN, we can still continue
+ with rebalance process */
+ if ((errno != ENOSPC) || (errno != ENOTCONN))
+ return 1;
+
+ if (errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ if (errno == ENOSPC) {
+ /* rebalance process itself failed, may be
+ remote brick went down, or write failed due to
+ disk full etc etc.. */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ return -1;
+ }
+
+ return 0;
+}
+
+/* We do a depth first traversal of directories. But before we move into
+ * subdirs, we complete the data migration of those directories whose layouts
+ * have been fixed
+ */
+
+int
+gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ dict_t *dict = NULL;
+ struct iatt iatt = {0,};
+ int32_t op_errno = 0;
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+ if ((ret < 0) || (ret && (errno == ENOENT)))
+ break;
+
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list))
+ break;
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ defrag->num_files_lookedup++;
+ if (entry->d_stat.ia_nlink > 1)
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ " gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ /* if distribute is present, it will honor this key.
+ * -1 is returned if distribute is not present or file
+ * doesn't have a link-file. If file has link-file, the
+ * path of link-file will be the value, and also that
+ * guarantees that file has to be mostly migrated */
+
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_LINKINFO_KEY);
+ if (ret < 0) {
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, migrate_data,
+ 0);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "setxattr "
+ "failed for %s", entry_loc.path);
+
+ if (ret == -1) {
+ op_errno = errno;
+ ret = gf_defrag_handle_migrate_error (op_errno,
+ defrag);
+
+ if (!ret)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "setxattr on %s failed: %s",
+ entry_loc.path,
+ strerror (op_errno));
+ else if (ret == 1)
+ continue;
+ else if (ret == -1)
+ goto out;
+ }
+
+ LOCK (&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK (&defrag->lock);
+ }
+
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+ return ret;
+
+}
+
+
+int
+gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
+{
+ int ret = -1;
+ loc_t entry_loc = {0,};
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ dict_t *dict = NULL;
+ off_t offset = 0;
+ struct iatt iatt = {0,};
+
+ ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
+ loc->path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX)
+ gf_defrag_migrate_data (this, defrag, loc, migrate_data);
+
+ gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+
+ fd = fd_create (loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir (this, loc, fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
+ loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
+ &entries)) != 0)
+ {
+ if ((ret < 0) || (ret && (errno == ENOENT)))
+ break;
+ free_entries = _gf_true;
+
+ if (list_empty (&entries.list))
+ break;
+ list_for_each_entry_safe (entry, tmp, &entries.list, list) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
+ goto out;
+ }
+
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (!IA_ISDIR (entry->d_stat.ia_type))
+ continue;
+
+ loc_wipe (&entry_loc);
+ ret =dht_build_child_loc (this, &entry_loc, loc,
+ entry->d_name);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Child loc"
+ " build failed");
+ goto out;
+ }
+
+ if (uuid_is_null (entry->d_stat.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ "gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ entry_loc.inode->ia_type = entry->d_stat.ia_type;
+
+ uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (uuid_is_null (loc->gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "%s/%s"
+ "gfid not present", loc->path,
+ entry->d_name);
+ continue;
+ }
+
+ uuid_copy (entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
+ NULL, NULL);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "%s"
+ " lookup failed", entry_loc.path);
+ continue;
+ }
+
+ ret = syncop_setxattr (this, &entry_loc, fix_layout,
+ 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Setxattr "
+ "failed for %s", entry_loc.path);
+ defrag->defrag_status =
+ GF_DEFRAG_STATUS_FAILED;
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
+ fix_layout, migrate_data);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Fix layout "
+ "failed for %s", entry_loc.path);
+ goto out;
+ }
+
+ }
+ gf_dirent_free (&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD (&entries.list);
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free (&entries);
+
+ loc_wipe (&entry_loc);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+
+}
+
+
+int
+gf_defrag_start_crawl (void *data)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = -1;
+ loc_t loc = {0,};
+ struct iatt iatt = {0,};
+ struct iatt parent = {0,};
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+
+ this = data;
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ dht_build_root_inode (this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc (defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ goto out;
+ }
+
+ fix_layout = dict_new ();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
+ goto out;
+ }
+
+ ret = syncop_setxattr (this, &loc, fix_layout, 0);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
+ loc.path);
+ goto out;
+ }
+
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ migrate_data = dict_new ();
+ if (!migrate_data) {
+ ret = -1;
+ goto out;
+ }
+ if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data", "force");
+ else
+ ret = dict_set_str (migrate_data,
+ "distribute.migrate-data",
+ "non-force");
+ if (ret)
+ goto out;
+ }
+ ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
+ migrate_data);
+
+out:
+ LOCK (&defrag->lock);
+ {
+ gf_defrag_status_get (defrag, NULL);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (defrag)
+ GF_FREE (defrag);
+
+ return ret;
+}
+
+
+static int
+gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop();
+
+ GF_FREE (data);
+ STACK_DESTROY (sync_frame->root);
+ kill (getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start (void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
+ gf_defrag_done, frame, this);
+
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Could not create"
+ " task for rebalance");
+out:
+ return NULL;
+}
+
+int
+gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64 (dict, "files", files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (dict, "size", size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (dict, "lookups", lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set lookedup file count");
+
+ ret = dict_set_int32 (dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set status");
+log:
+ gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
+ PRIu64", lookups: %"PRIu64, files, size, lookup);
+
+
+out:
+ return 0;
+}
+
+int
+gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+{
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ GF_ASSERT (defrag);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+
+ gf_defrag_status_get (defrag, output);
+ ret = 0;
+out:
+ gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 18fee7cd3c6..816bf868e88 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -182,11 +182,20 @@ out:
int
notify (xlator_t *this, int event, void *data, ...)
{
- int ret = -1;
+ int ret = -1;
+ va_list ap;
+ dict_t *output = NULL;
GF_VALIDATE_OR_GOTO ("dht", this, out);
- ret = dht_notify (this, event, data);
+
+ if (!data)
+ goto out;
+
+ va_start (ap, data);
+ output = va_arg (ap, dict_t*);
+
+ ret = dht_notify (this, event, data, output);
out:
return ret;
@@ -343,10 +352,13 @@ out:
int
init (xlator_t *this)
{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -366,6 +378,24 @@ init (xlator_t *this)
goto err;
}
+ ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
+ gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO (this->name, defrag, err);
+
+ LOCK_INIT (&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ defrag->cmd = cmd;
+
+ conf->defrag = defrag;
+ }
+
+
conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
/* If option is not "auto", other options _should_ be boolean */
@@ -550,5 +580,9 @@ struct volume_options options[] = {
{ .key = {"decommissioned-bricks"},
.type = GF_OPTION_TYPE_ANY,
},
+ { .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index c170972ce83..fc9c9cf0a93 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -31,6 +31,7 @@
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
#include "run.h"
+#include <sys/signal.h>
/* misc */
@@ -1384,8 +1385,6 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
switch (volinfo->defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
volinfo->defrag_status = 0;
default:
break;
@@ -1420,6 +1419,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
int32_t replica_count = 0;
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *tmp = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX];
+
ret = dict_get_str (dict, "volname", &volname);
@@ -1456,7 +1458,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
if (volinfo->defrag) {
LOCK (&volinfo->defrag->lock);
- volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
+ //volinfo->defrag_status = GF_DEFRAG_STATUS_PAUSED;
UNLOCK (&volinfo->defrag->lock);
}
@@ -1470,13 +1472,14 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
case GF_OP_CMD_ABORT:
{
if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
+ priv = THIS->private;
+ if (!priv)
+ return ret;
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ glusterd_service_stop ("rebalance", pidfile, SIGTERM, 1);
- UNLOCK (&volinfo->defrag->lock);
- }
}
/* Fall back to the old volume file */
@@ -1577,8 +1580,6 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
switch (volinfo->defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- case GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE:
- case GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE:
volinfo->defrag_status = 0;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index b80164e8d0e..b06dd28cf3f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -346,6 +346,11 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ snprintf (key, 256, "volume%d.rebalance", count);
+ ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd);
+ if (ret)
+ goto out;
+
list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
char brick[1024] = {0,};
snprintf (key, 256, "volume%d.brick%d", count, i);
@@ -2046,6 +2051,7 @@ glusterd_rpc_create (struct rpc_clnt **rpc,
GF_ASSERT (this);
GF_ASSERT (options);
+
new_rpc = rpc_clnt_new (options, this->ctx, this->name);
if (!new_rpc)
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 4a0561d1e6e..2a4bf82ee69 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -150,6 +150,8 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
{
int ret = -1;
gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+ char name[1024] = {0,};
GF_ASSERT (op < GD_OP_MAX);
GF_ASSERT (op > GD_OP_NONE);
@@ -204,6 +206,21 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
brick_req->name = "";
}
break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ brick_req = GF_CALLOC (1, sizeof (*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req)
+ goto out;
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ snprintf (name, 1024, "%s-dht",volname);
+ brick_req->name = gf_strdup (name);
+
+ break;
default:
goto out;
break;
@@ -1617,6 +1634,7 @@ glusterd_op_build_payload (dict_t **req)
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
{
dict_t *dict = ctx;
dict_copy (dict, req_dict);
@@ -2173,6 +2191,7 @@ glusterd_need_brick_op (glusterd_op_t op)
switch (op) {
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = _gf_true;
break;
default:
@@ -2368,6 +2387,7 @@ glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = glusterd_op_stage_rebalance (dict, op_errstr);
break;
@@ -2464,6 +2484,7 @@ glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
break;
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
break;
@@ -2613,6 +2634,10 @@ glusterd_handle_brick_rsp (glusterd_brickinfo_t *brickinfo,
op_ctx, op_errstr);
break;
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ dict_copy (rsp_dict, op_ctx);
+ break;
+
default:
break;
}
@@ -2754,6 +2779,7 @@ glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)
priv = this->private;
GF_ASSERT (priv);
+
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
@@ -2963,6 +2989,56 @@ out:
}
+
+static int
+glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {0,};
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT (this);
+
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "Volume %s does not exist",
+ volname);
+
+ *op_errstr = gf_strdup (msg);
+ gf_log ("", GF_LOG_ERROR, "%s", msg);
+ goto out;
+ }
+ pending_node = GF_CALLOC (1, sizeof (*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = volinfo;
+ pending_node->type = GD_NODE_REBALANCE;
+ list_add_tail (&pending_node->list,
+ &opinfo.pending_bricks);
+ pending_node = NULL;
+ }
+
+out:
+ return ret;
+}
+
+
+
+
static int
glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr)
{
@@ -3196,6 +3272,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)
ret = glusterd_bricks_select_status_volume (dict, op_errstr);
break;
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr);
+ break;
default:
break;
}
@@ -3758,6 +3837,7 @@ glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
dict_unref (ctx);
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index e5a907eaa46..0fe8d3899fe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -46,363 +46,35 @@
#include "cli1-xdr.h"
#include "xdr-generic.h"
-/* return values - 0: success, +ve: stopped, -ve: failure */
-int
-gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir)
-{
- int ret = -1;
- DIR *fd = NULL;
- glusterd_defrag_info_t *defrag = NULL;
- struct dirent *entry = NULL;
- struct stat stbuf = {0,};
- char full_path[PATH_MAX] = {0,};
- char linkinfo[PATH_MAX] = {0,};
- char force_string[64] = {0,};
-
- if (!volinfo->defrag)
- goto out;
-
- defrag = volinfo->defrag;
-
- fd = opendir (dir);
- if (!fd)
- goto out;
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)) {
- strcpy (force_string, "force");
- } else {
- strcpy (force_string, "not-force");
- }
-
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, PATH_MAX, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (S_ISDIR (stbuf.st_mode))
- continue;
-
- defrag->num_files_lookedup += 1;
-
- /* TODO: bring in feature to support hardlink rebalance */
- if (stbuf.st_nlink > 1)
- continue;
-
- /* if distribute is present, it will honor this key.
- -1 is returned if distribute is not present or file doesn't
- have a link-file. If file has link-file, the path of
- link-file will be the value, and also that guarantees
- that file has to be mostly migrated */
- ret = sys_lgetxattr (full_path, GF_XATTR_LINKINFO_KEY,
- &linkinfo, PATH_MAX);
- if (ret <= 0)
- continue;
-
- ret = sys_lsetxattr (full_path, "distribute.migrate-data",
- force_string, strlen (force_string), 0);
-
- /* if errno is not ENOSPC or ENOTCONN, we can still continue
- with rebalance process */
- if ((ret == -1) && ((errno != ENOSPC) ||
- (errno != ENOTCONN)))
- continue;
-
- if ((ret == -1) && (errno == ENOTCONN)) {
- /* Most probably mount point went missing (mostly due
- to a brick down), say rebalance failure to user,
- let him restart it if everything is fine */
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- break;
- }
-
- if ((ret == -1) && (errno == ENOSPC)) {
- /* rebalance process itself failed, may be
- remote brick went down, or write failed due to
- disk full etc etc.. */
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- break;
- }
-
- LOCK (&defrag->lock);
- {
- defrag->total_files += 1;
- defrag->total_data += stbuf.st_size;
- }
- UNLOCK (&defrag->lock);
- }
- closedir (fd);
-
- fd = opendir (dir);
- if (!fd)
- goto out;
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (!S_ISDIR (stbuf.st_mode))
- continue;
-
- ret = gf_glusterd_rebalance_move_data (volinfo, full_path);
- if (ret)
- break;
- }
- closedir (fd);
-
- if (!entry)
- ret = 0;
-out:
- return ret;
-}
+int32_t
+glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe);
-/* return values - 0: success, +ve: stopped, -ve: failure */
int
-gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir)
+glusterd_defrag_update_state (glusterd_volinfo_t *volinfo,
+ glusterd_defrag_info_t *defrag)
{
- int ret = -1;
- char full_path[1024] = {0,};
- struct stat stbuf = {0,};
- DIR *fd = NULL;
- struct dirent *entry = NULL;
+ int ret = -1;
+ int cmd = 0;
- if (!volinfo->defrag)
- goto out;
-
- fd = opendir (dir);
- if (!fd)
- goto out;
-
- while ((entry = readdir (fd))) {
- if (!entry)
- break;
-
- /* We have to honor 'stop' (or 'pause'|'commit') as early
- as possible */
- if (volinfo->defrag_status !=
- GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED) {
- /* It can be one of 'stopped|paused|commit' etc */
- closedir (fd);
- ret = 1;
- goto out;
- }
-
- if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, ".."))
- continue;
-
- snprintf (full_path, 1024, "%s/%s", dir, entry->d_name);
-
- ret = lstat (full_path, &stbuf);
- if (ret == -1)
- continue;
-
- if (S_ISDIR (stbuf.st_mode)) {
- /* Fix the layout of the directory */
- /* TODO: isn't error code not important ? */
- sys_lsetxattr (full_path, "distribute.fix.layout",
- "yes", 3, 0);
-
- volinfo->defrag->total_files += 1;
-
- /* Traverse into subdirectory */
- ret = gf_glusterd_rebalance_fix_layout (volinfo,
- full_path);
- if (ret)
- break;
- }
- }
- closedir (fd);
-
- if (!entry)
- ret = 0;
-
-out:
- return ret;
-}
-
-void *
-glusterd_defrag_start (void *data)
-{
- glusterd_volinfo_t *volinfo = data;
- glusterd_defrag_info_t *defrag = NULL;
- int ret = -1;
- struct stat stbuf = {0,};
-
- THIS = volinfo->xl;
- defrag = volinfo->defrag;
- if (!defrag)
- goto out;
-
- sleep (1);
- ret = lstat (defrag->mount, &stbuf);
- if ((ret == -1) && (errno == ENOTCONN)) {
- /* Wait for some more time before starting rebalance */
- sleep (2);
- ret = lstat (defrag->mount, &stbuf);
- if (ret == -1) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- volinfo->rebalance_files = 0;
- volinfo->rebalance_data = 0;
- volinfo->lookedup_files = 0;
- goto out;
- }
- }
-
- /* Fix the root ('/') first */
- sys_lsetxattr (defrag->mount, "distribute.fix.layout",
- "yes", 3, 0);
-
- if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX)) {
- /* root's layout got fixed */
- defrag->total_files = 1;
-
- /* Step 1: Fix layout of all the directories */
- ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount);
- if (ret < 0)
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- /* in both 'stopped' or 'failure' cases goto out */
- if (ret) {
- goto out;
- }
-
- /* Completed first step */
- volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
- }
-
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- /* It was used by number of layout fixes on directories */
- defrag->total_files = 0;
-
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED;
-
- /* Step 2: Iterate over directories to move data */
- ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount);
- if (ret < 0)
- volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
- /* in both 'stopped' or 'failure' cases goto out */
- if (ret) {
- goto out;
- }
-
- /* Completed second step */
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_COMPLETE;
- }
-
- /* Completed whole process */
- if ((defrag->cmd == GF_DEFRAG_CMD_START) ||
- (defrag->cmd == GF_DEFRAG_CMD_START_FORCE))
- volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
-
- volinfo->rebalance_files = defrag->total_files;
- volinfo->rebalance_data = defrag->total_data;
- volinfo->lookedup_files = defrag->num_files_lookedup;
-out:
- volinfo->defrag = NULL;
- if (defrag) {
- gf_log ("rebalance", GF_LOG_INFO, "rebalance on %s complete",
- defrag->mount);
-
- ret = runcmd ("umount", "-l", defrag->mount, NULL);
- LOCK_DESTROY (&defrag->lock);
-
- if (defrag->cbk_fn) {
- defrag->cbk_fn (volinfo, volinfo->defrag_status);
- }
-
- GF_FREE (defrag);
- }
- return NULL;
-}
-
-int
-glusterd_defrag_stop_validate (glusterd_volinfo_t *volinfo,
- char *op_errstr, size_t len)
-{
- int ret = -1;
- if (glusterd_is_defrag_on (volinfo) == 0) {
- snprintf (op_errstr, len, "Rebalance on %s is either Completed "
- "or not yet started", volinfo->volname);
- goto out;
- }
- ret = 0;
-out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
-
-int
-glusterd_defrag_stop (glusterd_volinfo_t *volinfo, u_quad_t *files,
- u_quad_t *size, char *op_errstr, size_t len)
-{
- /* TODO: set a variable 'stop_defrag' here, it should be checked
- in defrag loop */
- int ret = -1;
GF_ASSERT (volinfo);
- GF_ASSERT (files);
- GF_ASSERT (size);
- GF_ASSERT (op_errstr);
-
- if (!volinfo) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT (defrag);
- ret = glusterd_defrag_stop_validate (volinfo, op_errstr, len);
- if (ret) {
- /* rebalance may be happening on other nodes */
- ret = 0;
- goto out;
- }
-
- ret = 0;
if (volinfo->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
goto out;
}
- LOCK (&volinfo->defrag->lock);
+ LOCK (&defrag->lock);
{
- volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
- *files = volinfo->defrag->total_files;
- *size = volinfo->defrag->total_data;
+ cmd = defrag->cmd;
+ if ((cmd == GF_DEFRAG_CMD_START) || (cmd ==
+ GF_DEFRAG_CMD_START_FORCE) || (cmd ==
+ GF_DEFRAG_CMD_START_LAYOUT_FIX))
+ volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ else if (cmd == GF_DEFRAG_CMD_STOP)
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STOPPED;
}
- UNLOCK (&volinfo->defrag->lock);
+ UNLOCK (&defrag->lock);
ret = 0;
out:
@@ -475,10 +147,9 @@ glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)
gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
"volume start layout fix on %s", volname);
break;
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: start data migrate attempted",
+ "cmd: start data force attempted",
volname);
gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
"volume start migrate data on %s", volname);
@@ -539,6 +210,97 @@ out:
return ret;
}
+int32_t
+glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ char pidfile[PATH_MAX];
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ if (!priv)
+ return 0;
+
+ volinfo = mydata;
+ if (!volinfo)
+ return 0;
+
+ defrag = volinfo->defrag;
+ if (!defrag)
+ return 0;
+
+ if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+ volinfo->defrag = NULL;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ {
+ if (defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 1;
+ }
+ UNLOCK (&defrag->lock);
+
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT:
+ {
+ if (!defrag->connected)
+ return 0;
+
+ LOCK (&defrag->lock);
+ {
+ defrag->connected = 0;
+ }
+ UNLOCK (&defrag->lock);
+
+ if (!glusterd_is_service_running (pidfile, NULL)) {
+ glusterd_defrag_update_state (volinfo, defrag);
+ } else {
+ volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ /* Success or failure, Reset cmd in volinfo */
+
+ volinfo->defrag_cmd = 0;
+
+ glusterd_store_volinfo (volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+ if (defrag->rpc) {
+ rpc_clnt_unref (defrag->rpc);
+ defrag->rpc = NULL;
+ }
+ if (defrag->cbk_fn)
+ defrag->cbk_fn (volinfo, volinfo->defrag_status);
+
+ if (defrag)
+ GF_FREE (defrag);
+ gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT",
+ rpc->conn.trans->name);
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_TRACE,
+ "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
int
glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
size_t len, int cmd, defrag_cbk_fn_t cbk)
@@ -547,6 +309,11 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
glusterd_defrag_info_t *defrag = NULL;
runner_t runner = {0,};
glusterd_conf_t *priv = NULL;
+ char defrag_path[PATH_MAX];
+ struct stat buf = {0,};
+ char sockfile[PATH_MAX] = {0,};
+ char pidfile[PATH_MAX] = {0,};
+ dict_t *options = NULL;
priv = THIS->private;
@@ -567,18 +334,29 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
defrag->cmd = cmd;
LOCK_INIT (&defrag->lock);
- snprintf (defrag->mount, 1024, "%s/mount/%s",
- priv->workdir, volinfo->volname);
- /* Create a directory, mount glusterfs over it, start glusterfs-defrag */
- runinit (&runner);
- runner_add_args (&runner, "mkdir", "-p", defrag->mount, NULL);
- ret = runner_run_reuse (&runner);
- if (ret) {
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
+
+ volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ volinfo->defrag_cmd = cmd;
+ glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+ GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv);
+ ret = stat (defrag_path, &buf);
+ if (ret && (errno == ENOENT)) {
+ runinit (&runner);
+ runner_add_args (&runner, "mkdir", "-p", defrag_path, NULL);
+ ret = runner_run_reuse (&runner);
+ if (ret) {
+ runner_log (&runner, "glusterd", GF_LOG_DEBUG,
+ "command failed");
+ runner_end (&runner);
+ goto out;
+ }
runner_end (&runner);
- goto out;
}
- runner_end (&runner);
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
+ GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
runinit (&runner);
runner_add_args (&runner, SBIN_DIR"/glusterfs",
@@ -586,34 +364,37 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
"--xlator-option", "*dht.use-readdirp=yes",
"--xlator-option", "*dht.lookup-unhashed=yes",
"--xlator-option", "*dht.assert-no-child-down=yes",
- defrag->mount, NULL);
+ NULL);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
+ runner_add_arg (&runner, "--socket-file");
+ runner_argprintf (&runner, "%s",sockfile);
+ runner_add_arg (&runner, "--pid-file");
+ runner_argprintf (&runner, "%s",pidfile);
+
ret = runner_run_reuse (&runner);
if (ret) {
runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
runner_end (&runner);
goto out;
}
- runner_end (&runner);
- volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
- if ((cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA) ||
- (cmd == GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE)) {
- volinfo->defrag_status = GF_DEFRAG_STATUS_MIGRATE_DATA_STARTED;
+ ret = rpc_clnt_transport_unix_options_build (&options, sockfile);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
+ goto out;
+ }
+
+ ret = glusterd_rpc_create (&defrag->rpc, options,
+ glusterd_defrag_notify, volinfo);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
+ goto out;
}
if (cbk)
defrag->cbk_fn = cbk;
- ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start,
- volinfo);
- if (ret) {
- runinit (&runner);
- runner_add_args (&runner, "umount", "-l", defrag->mount, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
- runner_end (&runner);
- }
out:
gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -712,7 +493,13 @@ glusterd_handle_defrag_volume (rpcsvc_request_t *req)
if (ret)
goto out;
- ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STOP)) {
+ ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
+ dict);
+ }
+ else
+ ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
out:
@@ -762,8 +549,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
ret = glusterd_defrag_start_validate (volinfo,
msg, sizeof (msg));
if (ret) {
@@ -771,6 +557,33 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
"start validate failed");
goto out;
}
+ break;
+ case GF_DEFRAG_CMD_STATUS:
+ ret = glusterd_is_defrag_on (volinfo);
+ if (!ret) {
+ ret = -1;
+ if (volinfo->defrag_status ==
+ GF_DEFRAG_STATUS_COMPLETE) {
+ snprintf (msg, sizeof (msg), "Rebalance "
+ "completed!");
+ goto out;
+ }
+ snprintf (msg, sizeof(msg), "Rebalance is not running"
+ " on volume %s", volname);
+ goto out;
+ }
+ break;
+
+ case GF_DEFRAG_CMD_STOP:
+ ret = glusterd_is_defrag_on (volinfo);
+ if (!ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "rebalance is not running");
+ ret = -1;
+ snprintf (msg, sizeof(msg), "Rebalance is not running"
+ " on volume %s", volname);
+ goto out;
+ }
default:
break;
}
@@ -792,11 +605,8 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
int32_t cmd = 0;
char msg[2048] = {0};
glusterd_volinfo_t *volinfo = NULL;
- uint64_t files = 0;
- uint64_t size = 0;
void *node_uuid = NULL;
glusterd_conf_t *priv = NULL;
- dict_t *tmp_dict = NULL;
priv = THIS->private;
@@ -839,39 +649,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA:
- case GF_DEFRAG_CMD_START_MIGRATE_DATA_FORCE:
+ case GF_DEFRAG_CMD_START_FORCE:
ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
cmd, NULL);
break;
- case GF_DEFRAG_CMD_STOP:
- ret = glusterd_defrag_stop (volinfo, &files, &size,
- msg, sizeof (msg));
- if (!ret && rsp_dict) {
- ret = dict_set_uint64 (rsp_dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
-
- ret = dict_set_uint64 (rsp_dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set xfer size");
-
- /* Don't want to propagate errors from dict_set() */
- ret = 0;
- }
- break;
+ case GF_DEFRAG_CMD_STOP:
case GF_DEFRAG_CMD_STATUS:
-
- if (rsp_dict)
- tmp_dict = rsp_dict;
-
- /* On source node, there will be no 'rsp_dict' */
- if (!tmp_dict)
- tmp_dict = glusterd_op_get_ctx (GD_OP_REBALANCE);
-
- ret = glusterd_defrag_status_get (volinfo, tmp_dict);
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 2cf17b3f730..537496f0835 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -82,6 +82,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
break;
}
case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
{
if (ctx) {
ret = dict_get_int32 (ctx, "status", &status);
@@ -1058,7 +1059,8 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
GF_ASSERT (rsp_dict);
op = glusterd_op_get_op ();
- GF_ASSERT (GD_OP_REBALANCE == op);
+ GF_ASSERT ((GD_OP_REBALANCE == op) ||
+ (GD_OP_DEFRAG_BRICK_VOLUME == op));
ctx_dict = glusterd_op_get_ctx (op);
@@ -1224,10 +1226,7 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
break;
case GD_OP_REBALANCE:
- ret = glusterd_volume_rebalance_use_rsp_dict (dict);
- if (ret)
- goto out;
-
+ case GD_OP_DEFRAG_BRICK_VOLUME:
break;
default:
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 4fe8f71cb5b..18d60d0a428 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -622,6 +622,15 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd);
+ ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ buf);
+ if (ret)
+ goto out;
+
out:
if (ret)
gf_log ("", GF_LOG_ERROR, "Unable to write volume values"
@@ -1860,6 +1869,9 @@ glusterd_store_retrieve_volume (char *volname)
}
gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" "
" slave:key=%s,value:%s", key, value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
+ volinfo->defrag_cmd = atoi (value);
}
else {
exists = glusterd_check_option_exists (key, NULL);
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index f1413955bed..f55fb8c2e48 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -59,6 +59,7 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 42924a5f68a..9ec9e16f18d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -2016,6 +2016,15 @@ glusterd_import_volinfo (dict_t *vols, int count,
goto out;
}
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.rebalance", count);
+ ret = dict_get_uint32 (vols, key, &new_volinfo->defrag_cmd);
+ if (ret) {
+ snprintf (msg, sizeof (msg), "%s missing in payload for %s",
+ key, volname);
+ goto out;
+ }
+
uuid_parse (volume_id_str, new_volinfo->volume_id);
memset (key, 0, sizeof (key));
@@ -2438,6 +2447,7 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
struct rpc_clnt *rpc = NULL;
glusterd_brickinfo_t *brickinfo = NULL;
nodesrv_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out);
GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out);
@@ -2449,6 +2459,11 @@ glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node)
shd = pending_node->node;
rpc = shd->rpc;
+ } else if (pending_node->type == GD_NODE_REBALANCE) {
+ volinfo = pending_node->node;
+ if (volinfo->defrag)
+ rpc = volinfo->defrag->rpc;
+
} else {
GF_ASSERT (0);
}
@@ -4811,3 +4826,42 @@ glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol",
path, volinfo->volname);
}
+
+int
+glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk)
+{
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX];
+ int ret = -1;
+ pid_t pid;
+
+ priv = THIS->private;
+ if (!priv)
+ return ret;
+
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+
+ if (!glusterd_is_service_running (pidfile, &pid)) {
+ glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd,
+ cbk);
+ }
+
+ return ret;
+}
+
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char op_errstr[256];
+
+ list_for_each_entry (volinfo, &conf->volumes, vol_list) {
+ if (!volinfo->defrag_cmd)
+ continue;
+ glusterd_volume_defrag_restart (volinfo, op_errstr, 256,
+ volinfo->defrag_cmd, NULL);
+ }
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index 6f9a5e14d5c..e52b25e3199 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -386,4 +386,6 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid);
void
glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo,
gf_transport_type type);
+int
+glusterd_restart_rebalance (glusterd_conf_t *conf);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index e9f3bd05577..28e80310ec2 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -999,6 +999,8 @@ init (xlator_t *this)
ret = glusterd_restart_gsyncds (conf);
if (ret)
goto out;
+
+ glusterd_restart_rebalance (conf);
ret = 0;
out:
if (ret < 0) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 60dbe61e04a..e200f49d1db 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -79,6 +79,7 @@ typedef enum glusterd_op_ {
GD_OP_STATEDUMP_VOLUME,
GD_OP_LIST_VOLUME,
GD_OP_CLEARLOCKS_VOLUME,
+ GD_OP_DEFRAG_BRICK_VOLUME,
GD_OP_MAX,
} glusterd_op_t;
@@ -164,6 +165,9 @@ struct glusterd_defrag_info_ {
gf_lock_t lock;
int cmd;
pthread_t th;
+ gf_defrag_status_t defrag_status;
+ struct rpc_clnt * rpc;
+ uint32_t connected;
char mount[1024];
char databuf[131072];
struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */
@@ -210,6 +214,7 @@ struct glusterd_volinfo_ {
uint64_t rebalance_data;
uint64_t lookedup_files;
glusterd_defrag_info_t *defrag;
+ gf_cli_defrag_type defrag_cmd;
/* Replace brick status */
gf_rb_status_t rb_status;
@@ -235,7 +240,8 @@ struct glusterd_volinfo_ {
typedef enum gd_node_type_ {
GD_NODE_NONE,
GD_NODE_BRICK,
- GD_NODE_SHD
+ GD_NODE_SHD,
+ GD_NODE_REBALANCE,
} gd_node_type;
typedef struct glusterd_pending_node_ {
@@ -315,6 +321,27 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args);
STACK_DESTROY (frame->root);\
} while (0)
+#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \
+ char vol_path[PATH_MAX]; \
+ GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \
+ uuid_utoa(priv->uuid)); \
+ } while (0)
+
+#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \
+ char defrag_path[PATH_MAX]; \
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \
+ snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path, \
+ uuid_utoa(priv->uuid)); \
+ } while (0)
+
+
int32_t
glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo,
char **new_brick);