summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishirng <shishirng@gluster.com>2012-03-06 18:55:37 +0530
committerVijay Bellur <vijay@gluster.com>2012-03-07 21:14:27 -0800
commit33e9f9da8546dc57ecf6b3705f6b6474150ec78c (patch)
tree9455c2dcd9f49e7ff8b50fdcc3fb6d8b0534c975
parentf6a779ffc5b515163995dc2d240c3271cc6bceeb (diff)
glusterd/rebalance: Bring in support for parallel rebalance
This patch, enables rebalance processes to be started on all nodes where the volume is spread across (1 process per node) node-uuid xattr identifies which node takes ownership of the task to migrate the file. The model employed is push (src pushes to dst) Change-Id: Ieacd46a6216cf6ded841bbaebd10cfaea51c16d6 BUG: 763844 Signed-off-by: shishirng <shishirng@gluster.com> Reviewed-on: http://review.gluster.com/2873 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--cli/src/cli-rpc-ops.c10
-rw-r--r--xlators/cluster/dht/src/dht-common.h1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c69
-rw-r--r--xlators/cluster/dht/src/dht.c17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c28
-rw-r--r--xlators/storage/posix/src/posix.c2
8 files changed, 204 insertions, 87 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index e436024a9..977ab0b2a 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -976,6 +976,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
uint64_t size = 0;
uint64_t lookup = 0;
char msg[1024] = {0,};
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
if (-1 == req->rpc_status) {
goto out;
@@ -1041,6 +1042,11 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
gf_log (THIS->name, GF_LOG_TRACE,
"failed to get lookedup file count");
+ ret = dict_get_int32 (dict, "status", (int32_t *)&status_rcd);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "failed to get status");
+
if (cmd == GF_DEFRAG_CMD_STOP) {
if (rsp.op_ret == -1) {
if (strcmp (rsp.op_errstr, ""))
@@ -1070,7 +1076,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto done;
}
- switch (rsp.op_errno) {
+ switch (status_rcd) {
case GF_DEFRAG_STATUS_NOT_STARTED:
status = "not started";
break;
@@ -1113,7 +1119,7 @@ done:
#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volRebalance", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ status_rcd, rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 3d215ab25..d7689cc7f 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -210,6 +210,7 @@ struct gf_defrag_info_ {
uint32_t is_exiting;
pid_t pid;
inode_t *root_inode;
+ uuid_t node_uuid;
};
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 64249d0e0..4c5dd6e99 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -320,7 +320,7 @@ out:
static inline int
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf)
+ struct iatt *stbuf, int flag)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
@@ -344,6 +344,12 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
loc->path, to->name, strerror (errno));
goto out;
}
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
if (((dst_statfs.f_bavail *
dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) <
(((src_statfs.f_bavail * src_statfs.f_bsize) /
@@ -360,6 +366,17 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
goto out;
}
+check_avail_space:
+ if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
+ GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "data movement attempted from node (%s) with "
+ "to node (%s) which does not have required free space"
+ " for %s", from->name, to->name, loc->path);
+ ret = 1;
+ goto out;
+ }
+
ret = 0;
out:
return ret;
@@ -672,12 +689,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret)
goto out;
- /* Should happen on all files when 'force' option is not given */
- if (flag == GF_DHT_MIGRATE_DATA) {
- ret = __dht_check_free_space (to, from, loc, &stbuf);
- if (ret) {
- goto out;
- }
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
+ if (ret) {
+ goto out;
}
/* Open the source, and also update mode/xattr */
@@ -1040,6 +1054,8 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
dict_t *dict = NULL;
struct iatt iatt = {0,};
int32_t op_errno = 0;
+ char *uuid_str = NULL;
+ uuid_t node_uuid = {0,};
gf_log (this->name, GF_LOG_INFO, "migate data called on %s",
loc->path);
@@ -1122,6 +1138,43 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
continue;
}
+ ret = syncop_getxattr (this, &entry_loc, &dict,
+ GF_XATTR_NODE_UUID_KEY);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid for %s", entry_loc.path);
+ continue;
+ }
+
+ ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
+ &uuid_str);
+ if(ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to "
+ "get node-uuid from dict for %s",
+ entry_loc.path);
+ continue;
+ }
+
+ if (uuid_parse (uuid_str, node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
+ "failed for %s", entry_loc.path);
+ continue;
+ }
+
+ /* if file belongs to different node, skip migration
+ * the other node will take responsibility of migration
+ */
+ if (uuid_compare (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_TRACE, "%s does not"
+ "belong to this node", entry_loc.path);
+ continue;
+ }
+
+ uuid_str = NULL;
+
+ dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+
+
/* if distribute is present, it will honor this key.
* -1 is returned if distribute is not present or file
* doesn't have a link-file. If file has link-file, the
@@ -1131,6 +1184,8 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = syncop_getxattr (this, &entry_loc, &dict,
GF_XATTR_LINKINFO_KEY);
if (ret < 0) {
+ gf_log (this->name, GF_LOG_TRACE, "getxattr "
+ "failed for %s", entry_loc.path);
continue;
}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 4502a751b..96382bf16 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -359,6 +359,7 @@ init (xlator_t *this)
int i = 0;
gf_defrag_info_t *defrag = NULL;
int cmd = 0;
+ char *node_uuid = NULL;
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -391,6 +392,19 @@ init (xlator_t *this)
defrag->is_exiting = 0;
+ ret = dict_get_str (this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
+ "specified");
+ goto err;
+ }
+
+ if (uuid_parse (node_uuid, defrag->node_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
+ "glusterd node uuid");
+ goto err;
+ }
+
defrag->cmd = cmd;
conf->defrag = defrag;
@@ -591,6 +605,9 @@ struct volume_options options[] = {
{ .key = {"rebalance-cmd"},
.type = GF_OPTION_TYPE_INT,
},
+ { .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 3c1baa7e6..bda5e61e4 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -2709,6 +2709,83 @@ out:
return ret;
}
+
+int
+glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+
+ GF_ASSERT (req_dict);
+
+ ret = dict_get_str (req_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint64 (rsp_dict, "files", &files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "failed to get file count");
+
+ ret = dict_get_uint64 (rsp_dict, "size", &size);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "failed to get size of xfer");
+
+ ret = dict_get_uint64 (rsp_dict, "lookups", &lookup);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "failed to get lookedup file count");
+ ret = dict_get_int32 (rsp_dict, "status", (int32_t *)&status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "failed to get status");
+
+ volinfo->rebalance_files += files;
+ volinfo->rebalance_data += size;
+ volinfo->lookedup_files += lookup;
+
+ if (!op_ctx) {
+ dict_copy (rsp_dict, op_ctx);
+ goto out;
+ }
+
+ ret = dict_set_uint64 (op_ctx, "files", volinfo->rebalance_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set file count");
+
+ ret = dict_set_uint64 (op_ctx, "size", volinfo->rebalance_data);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set size of xfer");
+
+ ret = dict_set_uint64 (op_ctx, "lookups", volinfo->lookedup_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set lookedup file count");
+ ret = dict_set_int32 (op_ctx, "status", status);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set status");
+
+out:
+ return ret;
+}
+
int32_t
glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry,
glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
@@ -2732,8 +2809,9 @@ glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry,
break;
case GD_OP_DEFRAG_BRICK_VOLUME:
- dict_copy (rsp_dict, op_ctx);
- break;
+ glusterd_defrag_volume_node_rsp (req_ctx->dict,
+ rsp_dict, op_ctx);
+ break;
case GD_OP_HEAL_VOLUME:
ret = glusterd_heal_volume_brick_rsp (req_ctx->dict, rsp_dict,
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index f7304b9c0..936a3b26e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -82,60 +82,6 @@ out:
return ret;
}
-int
-glusterd_defrag_status_get (glusterd_volinfo_t *volinfo,
- dict_t *dict)
-{
- int ret = 0;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
-
- if (!volinfo || !dict)
- goto out;
-
- ret = 0;
- if (volinfo->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
- goto out;
-
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
- {
- files = volinfo->defrag->total_files;
- size = volinfo->defrag->total_data;
- lookup = volinfo->defrag->num_files_lookedup;
- }
- UNLOCK (&volinfo->defrag->lock);
- } else {
- files = volinfo->rebalance_files;
- size = volinfo->rebalance_data;
- lookup = volinfo->lookedup_files;
- }
-
- ret = dict_set_uint64 (dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
-
- ret = dict_set_uint64 (dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set size of xfer");
-
- ret = dict_set_uint64 (dict, "lookups", lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set lookedup file count");
-
- ret = dict_set_int32 (dict, "status", volinfo->defrag_status);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set status");
-
-out:
- return 0;
-}
-
void
glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)
{
@@ -338,6 +284,10 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ volinfo->rebalance_files = 0;
+ volinfo->rebalance_data = 0;
+ volinfo->lookedup_files = 0;
+
volinfo->defrag_cmd = cmd;
glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
@@ -369,6 +319,8 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
NULL);
runner_add_arg (&runner, "--xlator-option");
runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(priv->uuid));
runner_add_arg (&runner, "--socket-file");
runner_argprintf (&runner, "%s",sockfile);
runner_add_arg (&runner, "--pid-file");
@@ -383,6 +335,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
goto out;
}
+ sleep (5);
ret = rpc_clnt_transport_unix_options_build (&options, sockfile);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
@@ -609,7 +562,6 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
int32_t cmd = 0;
char msg[2048] = {0};
glusterd_volinfo_t *volinfo = NULL;
- void *node_uuid = NULL;
glusterd_conf_t *priv = NULL;
priv = THIS->private;
@@ -633,23 +585,6 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
goto out;
}
- if ((cmd != GF_DEFRAG_CMD_STATUS) &&
- (cmd != GF_DEFRAG_CMD_STOP)) {
- ret = dict_get_ptr (dict, "node-uuid", &node_uuid);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "node-uuid not found");
- goto out;
- }
-
- /* perform this on only the node which has
- issued the command */
- if (uuid_compare (node_uuid, priv->uuid)) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "not the source node %s", uuid_utoa (priv->uuid));
- goto out;
- }
- }
-
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
@@ -659,6 +594,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
case GF_DEFRAG_CMD_STOP:
case GF_DEFRAG_CMD_STATUS:
+ volinfo->rebalance_files = 0;
+ volinfo->rebalance_data = 0;
+ volinfo->lookedup_files = 0;
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 39a9c6161..4e55c383c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -1059,6 +1059,8 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
glusterd_op_t op = GD_OP_NONE;
uint64_t value = 0;
int32_t value32 = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
GF_ASSERT (rsp_dict);
@@ -1071,9 +1073,22 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
if (!ctx_dict)
goto out;
+ ret = dict_get_str (ctx_dict, "volname", &volname);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find (volname, &volinfo);
+
+ if (ret)
+ goto out;
+
ret = dict_get_uint64 (rsp_dict, "files", &value);
if (!ret) {
- ret = dict_set_uint64 (ctx_dict, "files", value);
+ volinfo->rebalance_files += value;
+ ret = dict_set_uint64 (ctx_dict, "files",
+ volinfo->rebalance_files);
if (ret) {
gf_log (THIS->name, GF_LOG_DEBUG,
"failed to set the file count");
@@ -1082,7 +1097,9 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
ret = dict_get_uint64 (rsp_dict, "size", &value);
if (!ret) {
- ret = dict_set_uint64 (ctx_dict, "size", value);
+ volinfo->rebalance_data += value;
+ ret = dict_set_uint64 (ctx_dict, "size",
+ volinfo->rebalance_data);
if (ret) {
gf_log (THIS->name, GF_LOG_DEBUG,
"failed to set the size of migration");
@@ -1091,7 +1108,9 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
ret = dict_get_uint64 (rsp_dict, "lookups", &value);
if (!ret) {
- ret = dict_set_uint64 (ctx_dict, "lookups", value);
+ volinfo->lookedup_files += value;
+ ret = dict_set_uint64 (ctx_dict, "lookups",
+ volinfo->lookedup_files);
if (ret) {
gf_log (THIS->name, GF_LOG_DEBUG,
"failed to set lookuped file count");
@@ -1273,6 +1292,9 @@ glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
case GD_OP_REBALANCE:
case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict (dict);
+ if (ret)
+ goto out;
break;
case GD_OP_HEAL_VOLUME:
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 84d1d11a5..22a9574dc 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -2480,7 +2480,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
if (loc->inode && name &&
(strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0)
&& !uuid_is_null (priv->glusterd_uuid)) {
- (void) snprintf (host_buf, 1024, "<%s>",
+ (void) snprintf (host_buf, 1024, "%s",
uuid_utoa (priv->glusterd_uuid));
dyn_rpath = gf_strdup (host_buf);