summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2017-06-01 22:13:41 +0530
committerRaghavendra Talur <rtalur@redhat.com>2017-06-20 04:57:14 +0000
commitf05dd5d6d9b8953bf97c70148bae8eadc68b5c24 (patch)
treeba54986223e5ac73d8104ed631a42bc62c3c84c5
parentef331f945bfd4e5664d557ce8e5a4bd275792e13 (diff)
cluster/dht: Include dirs in rebalance estimates
Empty directories were not being considered while calculating rebalance estimates leading to negative time-left values being displayed as part of the rebalance status. > BUG: 1457985 > Signed-off-by: N Balachandran <nbalacha@redhat.com> > Reviewed-on: https://review.gluster.org/17448 > Smoke: Gluster Build System <jenkins@build.gluster.org> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Amar Tumballi <amarts@redhat.com> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Change-Id: I48d41d702e72db30af10e6b87b628baa605afa98 BUG: 1460914 Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: https://review.gluster.org/17530 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-common.c6
-rw-r--r--xlators/cluster/dht/src/dht-common.h5
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c103
3 files changed, 83 insertions, 31 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 22061ef5682..c7427e5c81d 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -9199,7 +9199,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)
DHT_MSG_CHILD_DOWN,
"Received CHILD_DOWN. Exiting");
if (conf->defrag) {
- gf_defrag_stop (conf->defrag,
+ gf_defrag_stop (conf,
GF_DEFRAG_STATUS_FAILED, NULL);
} else {
kill (getpid(), SIGTERM);
@@ -9280,7 +9280,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
(cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_DETACH_STATUS))
- gf_defrag_status_get (defrag, output);
+ gf_defrag_status_get (conf, output);
else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
gf_defrag_start_detach_tier(defrag);
else if (cmd == GF_DEFRAG_CMD_DETACH_START)
@@ -9288,7 +9288,7 @@ dht_notify (xlator_t *this, int event, void *data, ...)
else if (cmd == GF_DEFRAG_CMD_STOP ||
cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER ||
cmd == GF_DEFRAG_CMD_DETACH_STOP)
- gf_defrag_stop (defrag,
+ gf_defrag_stop (conf,
GF_DEFRAG_STATUS_STOPPED, output);
else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER)
ret = gf_defrag_pause_tier (this, defrag);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index c74beb26d96..ab44600d313 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -457,6 +457,7 @@ struct gf_defrag_info_ {
uint64_t num_files_lookedup;
uint64_t total_failures;
uint64_t skipped;
+ uint64_t num_dirs_processed;
gf_lock_t lock;
int cmd;
pthread_t th;
@@ -1103,7 +1104,7 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postparent, dict_t *xdata);
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+gf_defrag_status_get (dht_conf_t *conf, dict_t *dict);
void
gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state);
@@ -1124,7 +1125,7 @@ int
gf_defrag_start_detach_tier (gf_defrag_info_t *defrag);
int
-gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status,
dict_t *output);
void*
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 90e45b1c293..0e492e66d13 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -3022,6 +3022,11 @@ out:
ret = 2;
}
+ /* It does not matter if it errored out - this number is
+ * used to calculate rebalance estimated time to complete.
+ * No locking required as dirs are processed by a single thread.
+ */
+ defrag->num_dirs_processed++;
return ret;
}
int
@@ -4121,7 +4126,7 @@ out:
LOCK (&defrag->lock);
{
status = dict_new ();
- gf_defrag_status_get (defrag, status);
+ gf_defrag_status_get (conf, status);
if (ctx && ctx->notify)
ctx->notify (GF_EN_DEFRAG_STATUS, status);
if (status)
@@ -4200,8 +4205,66 @@ out:
return NULL;
}
+
+uint64_t
+gf_defrag_get_estimates (dht_conf_t *conf)
+{
+ gf_defrag_info_t *defrag = NULL;
+ double rate_lookedup = 0;
+ uint64_t dirs_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ struct timeval end = {0,};
+ double elapsed = 0;
+
+ defrag = conf->defrag;
+
+ if (!g_totalfiles)
+ return 0;
+
+ gettimeofday (&end, NULL);
+ elapsed = end.tv_sec - defrag->start_time.tv_sec;
+
+ /* I tried locking before accessing num_files_lookedup and
+ * num_dirs_processed but the status function
+ * never seemed to get the lock, causing the status cli to
+ * hang.
+ */
+
+ dirs_processed = defrag->num_dirs_processed;
+
+ total_processed = defrag->num_files_lookedup
+ + dirs_processed;
+
+ /* rate at which files looked up */
+ rate_lookedup = (total_processed)/elapsed;
+
+
+ /* We initially sum up dirs across all local subvols.
+ * The same directories will be counted for each subvol so
+ * we want to ensure that they are only counted once.
+ */
+
+ tmp_count = g_totalfiles
+ - (dirs_processed * (conf->local_subvols_cnt - 1));
+
+ if (total_processed > g_totalfiles)
+ g_totalfiles = total_processed + 10000;
+
+ time_to_complete = (tmp_count)/rate_lookedup;
+
+ gf_log (THIS->name, GF_LOG_INFO,
+ "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64","
+ "rate_lookedup=%f", total_processed, tmp_count,
+ rate_lookedup);
+
+ return time_to_complete;
+}
+
+
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+gf_defrag_status_get (dht_conf_t *conf, dict_t *dict)
{
int ret = 0;
uint64_t files = 0;
@@ -4213,11 +4276,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
uint64_t demoted = 0;
char *status = "";
double elapsed = 0;
- uint64_t time_left = 0;
- uint64_t time_to_complete = 0;
- double rate_lookedup = 0;
struct timeval end = {0,};
-
+ uint64_t time_to_complete = 0;
+ uint64_t time_left = 0;
+ gf_defrag_info_t *defrag = conf->defrag;
if (!defrag)
goto out;
@@ -4238,34 +4300,20 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
elapsed = end.tv_sec - defrag->start_time.tv_sec;
-/*START */
-
-/* rate at which files looked up */
-
if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER)
- && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)
- && g_totalfiles) {
+ && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) {
- rate_lookedup = (defrag->num_files_lookedup)/elapsed;
- if (defrag->num_files_lookedup > g_totalfiles)
- g_totalfiles = defrag->num_files_lookedup + 10000;
- time_to_complete = (g_totalfiles)/rate_lookedup;
+ time_to_complete = gf_defrag_get_estimates (conf);
time_left = time_to_complete - elapsed;
gf_log (THIS->name, GF_LOG_INFO,
- "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f,"
- "rate_lookedup=%f", defrag->num_files_lookedup, elapsed,
- rate_lookedup);
- gf_log (THIS->name, GF_LOG_INFO,
"TIME: Estimated total time to complete = %"PRIu64
- " seconds", time_to_complete);
+ " seconds, seconds left = %"PRIu64"",
+ time_to_complete, time_left);
- gf_log (THIS->name, GF_LOG_INFO,
- "TIME: Seconds left = %"PRIu64" seconds", time_left);
}
-/*END */
if (!dict)
goto log;
@@ -4362,6 +4410,7 @@ gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state)
pthread_mutex_unlock (&tier_conf->pause_mutex);
}
+
tier_pause_state_t
gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf)
{
@@ -4515,12 +4564,14 @@ gf_defrag_start_detach_tier (gf_defrag_info_t *defrag)
}
int
-gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
+gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status,
dict_t *output)
{
/* TODO: set a variable 'stop_defrag' here, it should be checked
in defrag loop */
int ret = -1;
+ gf_defrag_info_t *defrag = conf->defrag;
+
GF_ASSERT (defrag);
if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
@@ -4532,7 +4583,7 @@ gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status,
defrag->defrag_status = status;
if (output)
- gf_defrag_status_get (defrag, output);
+ gf_defrag_status_get (conf, output);
ret = 0;
out:
gf_msg_debug ("", 0, "Returning %d", ret);