diff options
-rw-r--r-- | cli/src/cli-rpc-ops.c | 86 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 28 |
2 files changed, 79 insertions, 35 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index d6abd8f253a..ff44a22be78 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -19,6 +19,10 @@ #define INDENT_MAIN_HEAD "%-25s %s " +/* Do not show estimates if greater than this number */ +#define REBAL_ESTIMATE_SEC_UPPER_LIMIT (60*24*3600) +#define REBAL_ESTIMATE_START_TIME 600 + #include "cli.h" #include "compat-errno.h" #include "cli-cmd.h" @@ -1589,27 +1593,28 @@ int gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, gf_boolean_t is_tier) { - int ret = -1; - int count = 0; - int i = 1; - char key[256] = {0,}; - gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED; - uint64_t files = 0; - uint64_t size = 0; - uint64_t lookup = 0; - char *node_name = NULL; - uint64_t failures = 0; - uint64_t skipped = 0; - double elapsed = 0; - char *status_str = NULL; - char *size_str = NULL; - int hrs = 0; - int min = 0; - int sec = 0; - gf_boolean_t down = _gf_false; - gf_boolean_t fix_layout = _gf_false; - uint64_t max_time = 0; - uint64_t time_left = 0; + int ret = -1; + int count = 0; + int i = 1; + char key[256] = {0,}; + gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + char *node_name = NULL; + uint64_t failures = 0; + uint64_t skipped = 0; + double elapsed = 0; + char *status_str = NULL; + char *size_str = NULL; + int32_t hrs = 0; + uint32_t min = 0; + uint32_t sec = 0; + gf_boolean_t down = _gf_false; + gf_boolean_t fix_layout = _gf_false; + uint64_t max_time = 0; + uint64_t time_left = 0; + gf_boolean_t show_estimates = _gf_false; ret = dict_get_int32 (dict, "count", &count); @@ -1688,6 +1693,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd) continue; + if (GF_DEFRAG_STATUS_STARTED == status_rcd) + show_estimates = _gf_true; snprintf (key, 256, "node-name-%d", i); ret = dict_get_str (dict, key, &node_name); @@ -1747,6 +1754,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, if (ret) gf_log ("cli", GF_LOG_TRACE, "failed to get time left"); + if (time_left > max_time) max_time = time_left; @@ -1757,8 +1765,8 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, status_str = cli_vol_task_status_str[status_rcd]; size_str = gf_uint64_2human_readable(size); hrs = elapsed / 3600; - min = ((int) elapsed % 3600) / 60; - sec = ((int) elapsed % 3600) % 60; + min = ((uint64_t) elapsed % 3600) / 60; + sec = ((uint64_t) elapsed % 3600) % 60; if (fix_layout) { cli_out ("%35s %50s %8d:%d:%d", node_name, status_str, @@ -1785,12 +1793,36 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type, " Please check the nodes that are down using \'gluster" " peer status\' and start the glusterd on those nodes," " else tier detach commit might fail!"); + + /* Max time will be non-zero if rebalance is still running */ if (max_time) { hrs = max_time / 3600; - min = ((int) max_time % 3600) / 60; - sec = ((int) max_time % 3600) % 60; - cli_out ("Estimated time left for rebalance to complete :" - " %8d:%02d:%02d", hrs, min, sec); + min = (max_time % 3600) / 60; + sec = (max_time % 3600) % 60; + + if (hrs < REBAL_ESTIMATE_SEC_UPPER_LIMIT) { + cli_out ("Estimated time left for rebalance to " + "complete : %8d:%02d:%02d", hrs, min, sec); + } else { + cli_out ("Estimated time left for rebalance to " + "complete : > 2 months. Please try again " + "later."); + } + } else { + /* Rebalance will return 0 if it could not calculate the + * estimates or if it is complete. + */ + if (!show_estimates) { + goto out; + } + if (elapsed <= REBAL_ESTIMATE_START_TIME) { + cli_out ("The estimated time for rebalance to complete " + "will be unavailable for the first 10 " + "minutes."); + } else { + cli_out ("Rebalance estimated time unavailable. Please " + "try again later."); + } } out: return ret; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 266dc5a99a9..f1d32a9a00c 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -18,13 +18,14 @@ #include <signal.h> #include "events.h" -#define GF_DISK_SECTOR_SIZE 512 +#define GF_DISK_SECTOR_SIZE 512 #define DHT_REBALANCE_PID 4242 /* Change it if required */ #define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ -#define MAX_MIGRATE_QUEUE_COUNT 500 -#define MIN_MIGRATE_QUEUE_COUNT 200 -#define MAX_REBAL_TYPE_SIZE 16 -#define FILE_CNT_INTERVAL 600 /* 10 mins */ +#define MAX_MIGRATE_QUEUE_COUNT 500 +#define MIN_MIGRATE_QUEUE_COUNT 200 +#define MAX_REBAL_TYPE_SIZE 16 +#define FILE_CNT_INTERVAL 600 /* 10 mins */ +#define ESTIMATE_START_INTERVAL 600 /* 10 mins */ #ifndef MAX #define MAX(a, b) (((a) > (b))?(a):(b)) @@ -2972,7 +2973,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container, !strcmp (df_entry->d_name, "..")) continue; - if (IA_ISDIR (df_entry->d_stat.ia_type)) { defrag->size_processed += df_entry->d_stat.ia_size; continue; @@ -4723,6 +4723,19 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf) gettimeofday (&now, NULL); elapsed = now.tv_sec - defrag->start_time.tv_sec; + /* Don't calculate the estimates for the first 10 minutes. + * It is unlikely to be accurate and estimates are not required + * if the process finishes in less than 10 mins. + */ + + if (elapsed < ESTIMATE_START_INTERVAL) { + gf_msg (THIS->name, GF_LOG_INFO, 0, 0, + "Rebalance estimates will not be available for the " + "first %d seconds.", ESTIMATE_START_INTERVAL); + + goto out; + } + total_processed = defrag->size_processed; /* rate at which files processed */ @@ -4734,7 +4747,6 @@ gf_defrag_get_estimates_based_on_size (dht_conf_t *conf) time_to_complete = (tmp_count)/rate_processed; } else { - gf_msg (THIS->name, GF_LOG_ERROR, 0, 0, "Unable to calculate estimated time for rebalance"); } @@ -4880,8 +4892,8 @@ gf_defrag_status_get (dht_conf_t *conf, dict_t *dict) "TIME: Estimated total time to complete based on" " count = %"PRIu64 " seconds, seconds left = %"PRIu64"", time_to_complete, time_left); - */ + time_to_complete = gf_defrag_get_estimates_based_on_size (conf); if (time_to_complete && (time_to_complete > elapsed)) |