diff options
author | N Balachandran <nbalacha@redhat.com> | 2017-06-01 22:13:41 +0530 |
---|---|---|
committer | Raghavendra Talur <rtalur@redhat.com> | 2017-06-20 04:57:14 +0000 |
commit | f05dd5d6d9b8953bf97c70148bae8eadc68b5c24 (patch) | |
tree | ba54986223e5ac73d8104ed631a42bc62c3c84c5 | |
parent | ef331f945bfd4e5664d557ce8e5a4bd275792e13 (diff) |
cluster/dht: Include dirs in rebalance estimates
Empty directories were not being considered while
calculating rebalance estimates leading to negative
time-left values being displayed as part of the
rebalance status.
> BUG: 1457985
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17448
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I48d41d702e72db30af10e6b87b628baa605afa98
BUG: 1460914
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://review.gluster.org/17530
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 5 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 103 |
3 files changed, 83 insertions, 31 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 22061ef5682..c7427e5c81d 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -9199,7 +9199,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) DHT_MSG_CHILD_DOWN, "Received CHILD_DOWN. Exiting"); if (conf->defrag) { - gf_defrag_stop (conf->defrag, + gf_defrag_stop (conf, GF_DEFRAG_STATUS_FAILED, NULL); } else { kill (getpid(), SIGTERM); @@ -9280,7 +9280,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) - gf_defrag_status_get (defrag, output); + gf_defrag_status_get (conf, output); else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) gf_defrag_start_detach_tier(defrag); else if (cmd == GF_DEFRAG_CMD_DETACH_START) @@ -9288,7 +9288,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) else if (cmd == GF_DEFRAG_CMD_STOP || cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || cmd == GF_DEFRAG_CMD_DETACH_STOP) - gf_defrag_stop (defrag, + gf_defrag_stop (conf, GF_DEFRAG_STATUS_STOPPED, output); else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) ret = gf_defrag_pause_tier (this, defrag); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index c74beb26d96..ab44600d313 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -457,6 +457,7 @@ struct gf_defrag_info_ { uint64_t num_files_lookedup; uint64_t total_failures; uint64_t skipped; + uint64_t num_dirs_processed; gf_lock_t lock; int cmd; pthread_t th; @@ -1103,7 +1104,7 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postparent, dict_t *xdata); int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict); void gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state); @@ -1124,7 +1125,7 @@ int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag); int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, dict_t *output); void* diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 90e45b1c293..0e492e66d13 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -3022,6 +3022,11 @@ out: ret = 2; } + /* It does not matter if it errored out - this number is + * used to calculate rebalance estimated time to complete. + * No locking required as dirs are processed by a single thread. + */ + defrag->num_dirs_processed++; return ret; } int @@ -4121,7 +4126,7 @@ out: LOCK (&defrag->lock); { status = dict_new (); - gf_defrag_status_get (defrag, status); + gf_defrag_status_get (conf, status); if (ctx && ctx->notify) ctx->notify (GF_EN_DEFRAG_STATUS, status); if (status) @@ -4200,8 +4205,66 @@ out: return NULL; } + +uint64_t +gf_defrag_get_estimates (dht_conf_t *conf) +{ + gf_defrag_info_t *defrag = NULL; + double rate_lookedup = 0; + uint64_t dirs_processed = 0; + uint64_t total_processed = 0; + uint64_t tmp_count = 0; + uint64_t time_to_complete = 0; + struct timeval end = {0,}; + double elapsed = 0; + + defrag = conf->defrag; + + if (!g_totalfiles) + return 0; + + gettimeofday (&end, NULL); + elapsed = end.tv_sec - defrag->start_time.tv_sec; + + /* I tried locking before accessing num_files_lookedup and + * num_dirs_processed but the status function + * never seemed to get the lock, causing the status cli to + * hang. + */ + + dirs_processed = defrag->num_dirs_processed; + + total_processed = defrag->num_files_lookedup + + dirs_processed; + + /* rate at which files looked up */ + rate_lookedup = (total_processed)/elapsed; + + + /* We initially sum up dirs across all local subvols. + * The same directories will be counted for each subvol so + * we want to ensure that they are only counted once. + */ + + tmp_count = g_totalfiles + - (dirs_processed * (conf->local_subvols_cnt - 1)); + + if (total_processed > g_totalfiles) + g_totalfiles = total_processed + 10000; + + time_to_complete = (tmp_count)/rate_lookedup; + + gf_log (THIS->name, GF_LOG_INFO, + "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64"," + "rate_lookedup=%f", total_processed, tmp_count, + rate_lookedup); + + return time_to_complete; +} + + int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict) { int ret = 0; uint64_t files = 0; @@ -4213,11 +4276,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t demoted = 0; char *status = ""; double elapsed = 0; - uint64_t time_left = 0; - uint64_t time_to_complete = 0; - double rate_lookedup = 0; struct timeval end = {0,}; - + uint64_t time_to_complete = 0; + uint64_t time_left = 0; + gf_defrag_info_t *defrag = conf->defrag; if (!defrag) goto out; @@ -4238,34 +4300,20 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) elapsed = end.tv_sec - defrag->start_time.tv_sec; -/*START */ - -/* rate at which files looked up */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) - && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) - && g_totalfiles) { + && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { - rate_lookedup = (defrag->num_files_lookedup)/elapsed; - if (defrag->num_files_lookedup > g_totalfiles) - g_totalfiles = defrag->num_files_lookedup + 10000; - time_to_complete = (g_totalfiles)/rate_lookedup; + time_to_complete = gf_defrag_get_estimates (conf); time_left = time_to_complete - elapsed; gf_log (THIS->name, GF_LOG_INFO, - "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f," - "rate_lookedup=%f", defrag->num_files_lookedup, elapsed, - rate_lookedup); - gf_log (THIS->name, GF_LOG_INFO, "TIME: Estimated total time to complete = %"PRIu64 - " seconds", time_to_complete); + " seconds, seconds left = %"PRIu64"", + time_to_complete, time_left); - gf_log (THIS->name, GF_LOG_INFO, - "TIME: Seconds left = %"PRIu64" seconds", time_left); } -/*END */ if (!dict) goto log; @@ -4362,6 +4410,7 @@ gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state) pthread_mutex_unlock (&tier_conf->pause_mutex); } + tier_pause_state_t gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf) { @@ -4515,12 +4564,14 @@ gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) } int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { /* TODO: set a variable 'stop_defrag' here, it should be checked in defrag loop */ int ret = -1; + gf_defrag_info_t *defrag = conf->defrag; + GF_ASSERT (defrag); if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { @@ -4532,7 +4583,7 @@ gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, defrag->defrag_status = status; if (output) - gf_defrag_status_get (defrag, output); + gf_defrag_status_get (conf, output); ret = 0; out: gf_msg_debug ("", 0, "Returning %d", ret); |