From 9f81eb0b24f27ea9797bb8a4ff062e6f77e872fd Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Tue, 8 May 2012 16:21:29 +0530 Subject: glusterd/rebalance: Display run-time of rebalance process Added run-time value field to cli output of rebalance/remove-brick. A new cluster/distribute boolean option rebalance-stats when set to ON, time taken for migration of each file is logged. With rebalance-stats OFF (default), rebalance logs will only have entries showing time spent in each directory. Change-Id: I02a8918621120068cd71ffaf2999d30b3a2d10a2 BUG: 821987 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3303 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- xlators/cluster/dht/src/dht-common.h | 2 ++ xlators/cluster/dht/src/dht-rebalance.c | 43 +++++++++++++++++++++++++++++++-- xlators/cluster/dht/src/dht.c | 17 ++++++++++++- 3 files changed, 59 insertions(+), 3 deletions(-) (limited to 'xlators/cluster/dht/src') diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9f7723fdb..28deaefc6 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -206,6 +206,8 @@ struct gf_defrag_info_ { pid_t pid; inode_t *root_inode; uuid_t node_uuid; + struct timeval start_time; + gf_boolean_t stats; }; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index fd07a1c7e..32c1774be 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1050,9 +1050,15 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, char *uuid_str = NULL; uuid_t node_uuid = {0,}; int readdir_operrno = 0; + struct timeval dir_start = {0,}; + struct timeval end = {0,}; + double elapsed = {0,}; + struct timeval start = {0,}; gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); + gettimeofday (&dir_start, NULL); + fd = fd_create (loc->inode, defrag->pid); if (!fd) { gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); @@ -1097,7 +1103,9 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, continue; defrag->num_files_lookedup++; - + if (defrag->stats == _gf_true) { + gettimeofday (&start, NULL); + } loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, entry->d_name); @@ -1217,6 +1225,15 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->total_data += iatt.ia_size; } UNLOCK (&defrag->lock); + if (defrag->stats == _gf_true) { + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - start.tv_sec) * 1e6 + + (end.tv_usec - start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration of " + "file:%s size:%"PRIu64" bytes took %.2f" + "secs", entry_loc.path, iatt.ia_size, + elapsed/1e6); + } } gf_dirent_free (&entries); @@ -1226,6 +1243,12 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (readdir_operrno == ENOENT) break; } + + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + + (end.tv_usec - dir_start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took " + "%.2f secs", loc->path, elapsed/1e6); ret = 0; out: if (free_entries) @@ -1416,6 +1439,8 @@ gf_defrag_start_crawl (void *data) defrag = conf->defrag; if (!defrag) goto out; + + gettimeofday (&defrag->start_time, NULL); dht_build_root_inode (this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -1552,6 +1577,9 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; char *status = ""; + double elapsed = 0; + struct timeval end = {0,}; + if (!defrag) goto out; @@ -1565,6 +1593,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; + gettimeofday (&end, NULL); + + elapsed = end.tv_sec - defrag->start_time.tv_sec; + if (!dict) goto log; @@ -1587,6 +1619,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) if (ret) gf_log (THIS->name, GF_LOG_WARNING, "failed to set status"); + if (elapsed) { + ret = dict_set_double (dict, "run-time", elapsed); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set run-time"); + } ret = dict_set_uint64 (dict, "failures", failures); log: @@ -1608,7 +1646,8 @@ log: break; } - gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s", status); + gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f " + "secs", status, elapsed); gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %" PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size, lookup, failures); diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index e3930e2af..c25cdb4fd 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -332,6 +332,11 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); + if (conf->defrag) { + GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats, + options, bool, out); + } + if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { ret = dht_parse_decommissioned_bricks (this, conf, temp_str); if (ret == -1) @@ -401,9 +406,11 @@ init (xlator_t *this) defrag->cmd = cmd; + defrag->stats = _gf_false; + conf->defrag = defrag; - } + } conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON; if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) { @@ -432,6 +439,10 @@ init (xlator_t *this) GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, bool, err); + if (defrag) { + GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err); + } + ret = dht_init_subvolumes (this, conf); if (ret == -1) { goto err; @@ -593,6 +604,10 @@ struct volume_options options[] = { { .key = {"node-uuid"}, .type = GF_OPTION_TYPE_STR, }, + { .key = {"rebalance-stats"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; -- cgit