summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorshishir gowda <sgowda@redhat.com>2013-07-26 11:59:12 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-30 23:55:59 -0700
commite306698b00d2d3e736cbc97a1383bfb5d3724796 (patch)
treedd560b044b5741d0b0d007cfdd01d50e7be47ac2
parent7e6432015afd758ca3fd9e32d3e62b29935b0874 (diff)
cluster/dht: Treat migration failures due to space constraints as skipped
Currently rebalance/remove-brick op's display migration failed count even for files which failed due to space issues (not enough space for file, or migration leading to cluster imbalance) These will now be counted as skipped, and rebalance/remove-brick status will display the additional counter Change-Id: I674904d380b5f8300e9ca9e6af557c3d30d6cff4 BUG: 989846 Signed-off-by: shishir gowda <sgowda@redhat.com> Reviewed-on: http://review.gluster.org/5399 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--cli/src/cli-rpc-ops.c40
-rw-r--r--tests/volume.rc4
-rw-r--r--xlators/cluster/dht/src/dht-common.h1
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
6 files changed, 88 insertions, 19 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 0205b4cc..ea80f936 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1206,6 +1206,7 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
char key[256] = {0,};
int32_t i = 1;
uint64_t failures = 0;
+ uint64_t skipped = 0;
double elapsed = 0;
char *size_str = NULL;
char *task_id_str = NULL;
@@ -1326,11 +1327,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %16s", "---------",
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %16s", "---------",
"-----------", "-----------", "-----------", "-----------",
- "------------", "--------------");
+ "-----------", "------------", "--------------");
do {
snprintf (key, 256, "node-uuid-%d", i);
ret = dict_get_str (dict, key, &node_uuid);
@@ -1374,6 +1376,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
"failed to get failures count");
memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+ memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", i);
ret = dict_get_double (dict, key, &elapsed);
if (ret)
@@ -1400,8 +1408,8 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
size_str = gf_uint64_2human_readable(size);
cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"PRIu64
- " %14s %16.2f", node_uuid, files, size_str, lookup,
- failures, status, elapsed);
+ " %13"PRIu64 " %14s %16.2f", node_uuid, files,
+ size_str, lookup, failures, skipped, status, elapsed);
GF_FREE(size_str);
i++;
@@ -1717,6 +1725,7 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
char *node_uuid = 0;
gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
uint64_t failures = 0;
+ uint64_t skipped = 0;
double elapsed = 0;
char *size_str = NULL;
int32_t command = 0;
@@ -1817,11 +1826,12 @@ xml_output:
}
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run-time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %16s", "---------",
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run-time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %16s", "---------",
"-----------", "-----------", "-----------", "-----------",
- "------------", "--------------");
+ "-----------","------------", "--------------");
do {
snprintf (key, 256, "node-uuid-%d", i);
@@ -1864,6 +1874,11 @@ xml_output:
gf_log (frame->this->name, GF_LOG_TRACE,
"Failed to get failure on files");
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get skipped files");
memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", i);
ret = dict_get_double (dict, key, &elapsed);
@@ -1893,8 +1908,9 @@ xml_output:
if (strcmp (status, "not started")) {
cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
- PRIu64 " %14s %16.2f", node_uuid, files,
- size_str, lookup, failures, status, elapsed);
+ PRIu64 " %13"PRIu64 " %14s %16.2f", node_uuid,
+ files, size_str, lookup, failures, skipped,
+ status, elapsed);
}
GF_FREE(size_str);
diff --git a/tests/volume.rc b/tests/volume.rc
index b358b108..02892d71 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -27,11 +27,11 @@ function volume_option()
}
function rebalance_status_field {
- $CLI volume rebalance $1 status | awk '{print $6}' | sed -n 3p
+ $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
}
function remove_brick_status_completed_field {
- $CLI volume remove-brick $V0 $H0:$B0/r2d2_{4,5} status | awk '{print $6}' | sed -n 3p
+ $CLI volume remove-brick $V0 $H0:$B0/r2d2_{4,5} status | awk '{print $7}' | sed -n 3p
}
function get_mount_process_pid {
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 2be76299..8a62b9ee 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -227,6 +227,7 @@ struct gf_defrag_info_ {
uint64_t total_data;
uint64_t num_files_lookedup;
uint64_t total_failures;
+ uint64_t skipped;
gf_lock_t lock;
int cmd;
pthread_t th;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 8510a9b3..dab6de60 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -1101,6 +1101,7 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
struct timeval end = {0,};
double elapsed = {0,};
struct timeval start = {0,};
+ int32_t err = 0;
gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
loc->path);
@@ -1258,9 +1259,21 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
ret = syncop_setxattr (this, &entry_loc, migrate_data,
0);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "migrate-data"
- " failed for %s", entry_loc.path);
- defrag->total_failures +=1;
+ err = op_errno;
+ /* errno is overloaded. See
+ * rebalance_task_completion () */
+ if (err != ENOSPC) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "migrate-data skipped for %s"
+ " due to space constraints",
+ entry_loc.path);
+ defrag->skipped +=1;
+ } else{
+ gf_log (this->name, GF_LOG_ERROR,
+ "migrate-data failed for %s",
+ entry_loc.path);
+ defrag->total_failures +=1;
+ }
}
if (ret == -1) {
@@ -1659,6 +1672,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
uint64_t size = 0;
uint64_t lookup = 0;
uint64_t failures = 0;
+ uint64_t skipped = 0;
char *status = "";
double elapsed = 0;
struct timeval end = {0,};
@@ -1675,6 +1689,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
size = defrag->total_data;
lookup = defrag->num_files_lookedup;
failures = defrag->total_failures;
+ skipped = defrag->skipped;
gettimeofday (&end, NULL);
@@ -1698,6 +1713,7 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
gf_log (THIS->name, GF_LOG_WARNING,
"failed to set lookedup file count");
+
ret = dict_set_int32 (dict, "status", defrag->defrag_status);
if (ret)
gf_log (THIS->name, GF_LOG_WARNING,
@@ -1710,6 +1726,14 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
}
ret = dict_set_uint64 (dict, "failures", failures);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set failure count");
+
+ ret = dict_set_uint64 (dict, "skipped", skipped);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to set skipped file count");
log:
switch (defrag->defrag_status) {
case GF_DEFRAG_STATUS_NOT_STARTED:
@@ -1732,8 +1756,8 @@ log:
gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
"secs", status, elapsed);
gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
- PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size,
- lookup, failures);
+ PRIu64", lookups: %"PRIu64", failures: %"PRIu64", skipped: "
+ "%"PRIu64, files, size, lookup, failures, skipped);
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 85c17cd5..e894a9cd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -6154,6 +6154,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
uint64_t lookup = 0;
gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
uint64_t failures = 0;
+ uint64_t skipped = 0;
xlator_t *this = NULL;
double run_time = 0;
@@ -6184,6 +6185,11 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
gf_log (this->name, GF_LOG_TRACE,
"failed to get failure count");
+ ret = dict_get_uint64 (rsp_dict, "skipped", &skipped);
+ if (ret)
+ gf_log (this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+
ret = dict_get_double (rsp_dict, "run-time", &run_time);
if (ret)
gf_log (this->name, GF_LOG_TRACE,
@@ -6199,6 +6205,8 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,
volinfo->rebal.defrag_status = status;
if (failures)
volinfo->rebal.rebalance_failures = failures;
+ if (skipped)
+ volinfo->rebal.skipped_files = skipped;
if (run_time)
volinfo->rebal.rebalance_time = run_time;
@@ -6930,6 +6938,18 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)
}
memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", index);
+ ret = dict_get_uint64 (rsp_dict, key, &value);
+ if (!ret) {
+ memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", current_index);
+ ret = dict_set_uint64 (ctx_dict, key, value);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_DEBUG,
+ "failed to set skipped count");
+ }
+ }
+ memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", index);
ret = dict_get_double (rsp_dict, key, &elapsed_time);
if (!ret) {
@@ -7347,6 +7367,13 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
gf_log (THIS->name, GF_LOG_ERROR,
"failed to set failure count");
+ memset (key, 0 , 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "failed to set skipped count");
+
memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", i);
ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time);
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ccdf1c43..8c043a9a 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -237,6 +237,7 @@ struct glusterd_rebalance_ {
uint64_t rebalance_files;
uint64_t rebalance_data;
uint64_t lookedup_files;
+ uint64_t skipped_files;
glusterd_defrag_info_t *defrag;
gf_cli_defrag_type defrag_cmd;
uint64_t rebalance_failures;