diff options
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 5 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 103 |
3 files changed, 83 insertions, 31 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 109106082e6..dfc58e49c17 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -9056,7 +9056,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) DHT_MSG_CHILD_DOWN, "Received CHILD_DOWN. Exiting"); if (conf->defrag) { - gf_defrag_stop (conf->defrag, + gf_defrag_stop (conf, GF_DEFRAG_STATUS_FAILED, NULL); } else { kill (getpid(), SIGTERM); @@ -9139,7 +9139,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_DETACH_STATUS)) - gf_defrag_status_get (defrag, output); + gf_defrag_status_get (conf, output); else if (cmd == GF_DEFRAG_CMD_START_DETACH_TIER) gf_defrag_start_detach_tier(defrag); else if (cmd == GF_DEFRAG_CMD_DETACH_START) @@ -9147,7 +9147,7 @@ dht_notify (xlator_t *this, int event, void *data, ...) else if (cmd == GF_DEFRAG_CMD_STOP || cmd == GF_DEFRAG_CMD_STOP_DETACH_TIER || cmd == GF_DEFRAG_CMD_DETACH_STOP) - gf_defrag_stop (defrag, + gf_defrag_stop (conf, GF_DEFRAG_STATUS_STOPPED, output); else if (cmd == GF_DEFRAG_CMD_PAUSE_TIER) ret = gf_defrag_pause_tier (this, defrag); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 333fae8c894..44150a3d77b 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -507,6 +507,7 @@ struct gf_defrag_info_ { uint64_t num_files_lookedup; uint64_t total_failures; uint64_t skipped; + uint64_t num_dirs_processed; gf_lock_t lock; int cmd; pthread_t th; @@ -1154,7 +1155,7 @@ int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postparent, dict_t *xdata); int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict); +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict); void gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state); @@ -1175,7 +1176,7 @@ int gf_defrag_start_detach_tier (gf_defrag_info_t *defrag); int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, dict_t *output); void* diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index ead75abc2a7..ce8483c961a 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -3323,6 +3323,11 @@ out: ret = 2; } + /* It does not matter if it errored out - this number is + * used to calculate rebalance estimated time to complete. + * No locking required as dirs are processed by a single thread. + */ + defrag->num_dirs_processed++; return ret; } int @@ -4439,7 +4444,7 @@ out: LOCK (&defrag->lock); { status = dict_new (); - gf_defrag_status_get (defrag, status); + gf_defrag_status_get (conf, status); if (ctx && ctx->notify) ctx->notify (GF_EN_DEFRAG_STATUS, status); if (status) @@ -4521,8 +4526,66 @@ out: return NULL; } + +uint64_t +gf_defrag_get_estimates (dht_conf_t *conf) +{ + gf_defrag_info_t *defrag = NULL; + double rate_lookedup = 0; + uint64_t dirs_processed = 0; + uint64_t total_processed = 0; + uint64_t tmp_count = 0; + uint64_t time_to_complete = 0; + struct timeval end = {0,}; + double elapsed = 0; + + defrag = conf->defrag; + + if (!g_totalfiles) + return 0; + + gettimeofday (&end, NULL); + elapsed = end.tv_sec - defrag->start_time.tv_sec; + + /* I tried locking before accessing num_files_lookedup and + * num_dirs_processed but the status function + * never seemed to get the lock, causing the status cli to + * hang. + */ + + dirs_processed = defrag->num_dirs_processed; + + total_processed = defrag->num_files_lookedup + + dirs_processed; + + /* rate at which files looked up */ + rate_lookedup = (total_processed)/elapsed; + + + /* We initially sum up dirs across all local subvols. + * The same directories will be counted for each subvol so + * we want to ensure that they are only counted once. + */ + + tmp_count = g_totalfiles + - (dirs_processed * (conf->local_subvols_cnt - 1)); + + if (total_processed > g_totalfiles) + g_totalfiles = total_processed + 10000; + + time_to_complete = (tmp_count)/rate_lookedup; + + gf_log (THIS->name, GF_LOG_INFO, + "TIME: total_processed=%"PRIu64" tmp_cnt = %"PRIu64"," + "rate_lookedup=%f", total_processed, tmp_count, + rate_lookedup); + + return time_to_complete; +} + + int -gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) +gf_defrag_status_get (dht_conf_t *conf, dict_t *dict) { int ret = 0; uint64_t files = 0; @@ -4534,11 +4597,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t demoted = 0; char *status = ""; double elapsed = 0; - uint64_t time_left = 0; - uint64_t time_to_complete = 0; - double rate_lookedup = 0; struct timeval end = {0,}; - + uint64_t time_to_complete = 0; + uint64_t time_left = 0; + gf_defrag_info_t *defrag = conf->defrag; if (!defrag) goto out; @@ -4559,34 +4621,20 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) elapsed = end.tv_sec - defrag->start_time.tv_sec; -/*START */ - -/* rate at which files looked up */ - if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) - && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) - && g_totalfiles) { + && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED)) { - rate_lookedup = (defrag->num_files_lookedup)/elapsed; - if (defrag->num_files_lookedup > g_totalfiles) - g_totalfiles = defrag->num_files_lookedup + 10000; - time_to_complete = (g_totalfiles)/rate_lookedup; + time_to_complete = gf_defrag_get_estimates (conf); time_left = time_to_complete - elapsed; gf_log (THIS->name, GF_LOG_INFO, - "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f," - "rate_lookedup=%f", defrag->num_files_lookedup, elapsed, - rate_lookedup); - gf_log (THIS->name, GF_LOG_INFO, "TIME: Estimated total time to complete = %"PRIu64 - " seconds", time_to_complete); + " seconds, seconds left = %"PRIu64"", + time_to_complete, time_left); - gf_log (THIS->name, GF_LOG_INFO, - "TIME: Seconds left = %"PRIu64" seconds", time_left); } -/*END */ if (!dict) goto log; @@ -4683,6 +4731,7 @@ gf_defrag_set_pause_state (gf_tier_conf_t *tier_conf, tier_pause_state_t state) pthread_mutex_unlock (&tier_conf->pause_mutex); } + tier_pause_state_t gf_defrag_get_pause_state (gf_tier_conf_t *tier_conf) { @@ -4836,12 +4885,14 @@ gf_defrag_start_detach_tier (gf_defrag_info_t *defrag) } int -gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, +gf_defrag_stop (dht_conf_t *conf, gf_defrag_status_t status, dict_t *output) { /* TODO: set a variable 'stop_defrag' here, it should be checked in defrag loop */ int ret = -1; + gf_defrag_info_t *defrag = conf->defrag; + GF_ASSERT (defrag); if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) { @@ -4853,7 +4904,7 @@ gf_defrag_stop (gf_defrag_info_t *defrag, gf_defrag_status_t status, defrag->defrag_status = status; if (output) - gf_defrag_status_get (defrag, output); + gf_defrag_status_get (conf, output); ret = 0; out: gf_msg_debug ("", 0, "Returning %d", ret); |