From 9f81eb0b24f27ea9797bb8a4ff062e6f77e872fd Mon Sep 17 00:00:00 2001 From: shishir gowda Date: Tue, 8 May 2012 16:21:29 +0530 Subject: glusterd/rebalance: Display run-time of rebalance process Added run-time value field to cli output of rebalance/remove-brick. A new cluster/distribute boolean option rebalance-stats when set to ON, time taken for migration of each file is logged. With rebalance-stats OFF (default), rebalance logs will only have entries showing time spent in each directory. Change-Id: I02a8918621120068cd71ffaf2999d30b3a2d10a2 BUG: 821987 Signed-off-by: shishir gowda Reviewed-on: http://review.gluster.com/3303 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- cli/src/cli-rpc-ops.c | 42 ++++++++++++++++++------- xlators/cluster/dht/src/dht-common.h | 2 ++ xlators/cluster/dht/src/dht-rebalance.c | 43 ++++++++++++++++++++++++-- xlators/cluster/dht/src/dht.c | 17 +++++++++- xlators/mgmt/glusterd/src/glusterd-op-sm.c | 7 +++++ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 1 + xlators/mgmt/glusterd/src/glusterd-rpc-ops.c | 14 +++++++++ xlators/mgmt/glusterd/src/glusterd-utils.c | 8 +++++ xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 + xlators/mgmt/glusterd/src/glusterd.h | 1 + 10 files changed, 121 insertions(+), 15 deletions(-) diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 9737e13f929..2ab13261a9f 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1040,6 +1040,7 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, char key[256] = {0,}; int32_t i = 1; uint64_t failures = 0; + double elapsed = 0; if (-1 == req->rpc_status) { goto out; @@ -1138,10 +1139,11 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, goto done; } } - cli_out ("%40s %16s %13s %13s %13s %14s", "Node", "Rebalanced-files", - "size", "scanned", "failures","status"); - cli_out ("%40s %16s %13s %13s %13s %14s", "---------", "-----------", - "-----------", "-----------", "-----------", "------------"); + cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files", + "size", "scanned", "failures", "status", "run time in secs"); + cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------", + "-----------", "-----------", "-----------", "-----------", + "------------", "-----------"); do { snprintf (key, 256, "node-uuid-%d", i); @@ -1185,6 +1187,13 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, gf_log (THIS->name, GF_LOG_TRACE, "failed to get failures count"); + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_get_double (dict, key, &elapsed); + if (ret) + gf_log (THIS->name, GF_LOG_TRACE, + "failed to get run-time"); + switch (status_rcd) { case GF_DEFRAG_STATUS_NOT_STARTED: status = "not started"; @@ -1203,8 +1212,8 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, break; } cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64 - " %14s", node_uuid, files, size, lookup, failures, - status); + " %14s %14.2f", node_uuid, files, size, lookup, + failures, status, elapsed); i++; } while (i <= counter); @@ -1469,6 +1478,7 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, char *node_uuid = 0; gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED; uint64_t failures = 0; + double elapsed = 0; if (-1 == req->rpc_status) { @@ -1514,10 +1524,11 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, } - cli_out ("%40s %16s %13s %13s %13s %14s", "Node", "Rebalanced-files", - "size", "scanned", "failures", "status"); - cli_out ("%40s %16s %13s %13s %13s %14s", "---------", "-----------", - "-----------", "-----------", "-----------", "------------"); + cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files", + "size", "scanned", "failures", "status", "run-time in secs"); + cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------", + "-----------", "-----------", "-----------", "-----------", + "------------", "------------"); do { snprintf (key, 256, "node-uuid-%d", i); @@ -1560,6 +1571,13 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, gf_log (THIS->name, GF_LOG_TRACE, "Failed to get failure on files"); + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_get_double (dict, key, &elapsed); + if (ret) + gf_log (THIS->name, GF_LOG_TRACE, + "Failed to get run-time"); + switch (status_rcd) { case GF_DEFRAG_STATUS_NOT_STARTED: status = "not started"; @@ -1578,8 +1596,8 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov, break; } cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64 - " %14s", node_uuid, files, size, lookup, failures, - status); + " %14s %14.2f", node_uuid, files, size, lookup, + failures, status, elapsed); i++; } while (i <= counter); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9f7723fdbde..28deaefc6e5 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -206,6 +206,8 @@ struct gf_defrag_info_ { pid_t pid; inode_t *root_inode; uuid_t node_uuid; + struct timeval start_time; + gf_boolean_t stats; }; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index fd07a1c7ec1..32c1774be00 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1050,9 +1050,15 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, char *uuid_str = NULL; uuid_t node_uuid = {0,}; int readdir_operrno = 0; + struct timeval dir_start = {0,}; + struct timeval end = {0,}; + double elapsed = {0,}; + struct timeval start = {0,}; gf_log (this->name, GF_LOG_INFO, "migrate data called on %s", loc->path); + gettimeofday (&dir_start, NULL); + fd = fd_create (loc->inode, defrag->pid); if (!fd) { gf_log (this->name, GF_LOG_ERROR, "Failed to create fd"); @@ -1097,7 +1103,9 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, continue; defrag->num_files_lookedup++; - + if (defrag->stats == _gf_true) { + gettimeofday (&start, NULL); + } loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, entry->d_name); @@ -1217,6 +1225,15 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, defrag->total_data += iatt.ia_size; } UNLOCK (&defrag->lock); + if (defrag->stats == _gf_true) { + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - start.tv_sec) * 1e6 + + (end.tv_usec - start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration of " + "file:%s size:%"PRIu64" bytes took %.2f" + "secs", entry_loc.path, iatt.ia_size, + elapsed/1e6); + } } gf_dirent_free (&entries); @@ -1226,6 +1243,12 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (readdir_operrno == ENOENT) break; } + + gettimeofday (&end, NULL); + elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 + + (end.tv_usec - dir_start.tv_usec); + gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took " + "%.2f secs", loc->path, elapsed/1e6); ret = 0; out: if (free_entries) @@ -1416,6 +1439,8 @@ gf_defrag_start_crawl (void *data) defrag = conf->defrag; if (!defrag) goto out; + + gettimeofday (&defrag->start_time, NULL); dht_build_root_inode (this, &defrag->root_inode); if (!defrag->root_inode) goto out; @@ -1552,6 +1577,9 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) uint64_t lookup = 0; uint64_t failures = 0; char *status = ""; + double elapsed = 0; + struct timeval end = {0,}; + if (!defrag) goto out; @@ -1565,6 +1593,10 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) lookup = defrag->num_files_lookedup; failures = defrag->total_failures; + gettimeofday (&end, NULL); + + elapsed = end.tv_sec - defrag->start_time.tv_sec; + if (!dict) goto log; @@ -1587,6 +1619,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict) if (ret) gf_log (THIS->name, GF_LOG_WARNING, "failed to set status"); + if (elapsed) { + ret = dict_set_double (dict, "run-time", elapsed); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, + "failed to set run-time"); + } ret = dict_set_uint64 (dict, "failures", failures); log: @@ -1608,7 +1646,8 @@ log: break; } - gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s", status); + gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f " + "secs", status, elapsed); gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %" PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size, lookup, failures); diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index e3930e2af3f..c25cdb4fd80 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -332,6 +332,11 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); + if (conf->defrag) { + GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats, + options, bool, out); + } + if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) { ret = dht_parse_decommissioned_bricks (this, conf, temp_str); if (ret == -1) @@ -401,9 +406,11 @@ init (xlator_t *this) defrag->cmd = cmd; + defrag->stats = _gf_false; + conf->defrag = defrag; - } + } conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON; if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) { @@ -432,6 +439,10 @@ init (xlator_t *this) GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down, bool, err); + if (defrag) { + GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err); + } + ret = dht_init_subvolumes (this, conf); if (ret == -1) { goto err; @@ -593,6 +604,10 @@ struct volume_options options[] = { { .key = {"node-uuid"}, .type = GF_OPTION_TYPE_STR, }, + { .key = {"rebalance-stats"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 07f35642b14..a507c76d71c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -3341,6 +3341,13 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, gf_log (THIS->name, GF_LOG_ERROR, "failed to set failure count"); + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_set_double (op_ctx, key, volinfo->rebalance_time); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set run-time"); + out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 8312610f5be..20142a1cc4b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -257,6 +257,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, volinfo->rebalance_data = 0; volinfo->lookedup_files = 0; volinfo->rebalance_failures = 0; + volinfo->rebalance_time = 0; volinfo->defrag_cmd = cmd; glusterd_store_perform_node_state_store (volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 2f9b99b976a..6d89e4c03d9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -1118,6 +1118,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict) int32_t i = 0; char *node_uuid = NULL; char *node_uuid_str = NULL; + double elapsed_time = 0; GF_ASSERT (rsp_dict); @@ -1229,6 +1230,19 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict) } } + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", index); + ret = dict_get_double (rsp_dict, key, &elapsed_time); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_set_double (ctx_dict, key, elapsed_time); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set run-time"); + } + } + ret = 0; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 9971c4d859b..cd11d0a4595 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5483,6 +5483,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; uint64_t failures = 0; xlator_t *this = NULL; + double run_time = 0; this = THIS; @@ -5511,6 +5512,11 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, gf_log (this->name, GF_LOG_TRACE, "failed to get failure count"); + ret = dict_get_double (rsp_dict, "run-time", &run_time); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get run-time"); + if (files) volinfo->rebalance_files = files; if (size) @@ -5521,6 +5527,8 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, volinfo->defrag_status = status; if (failures) volinfo->rebalance_failures = failures; + if (run_time) + volinfo->rebalance_time = run_time; return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index c8ff573b10b..0d82f1624b5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -117,6 +117,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.lookup-unhashed", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-disk", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.min-free-inodes", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, + {"cluster.rebalance-stats", "cluster/distribute", NULL, NULL, NO_DOC, 0 }, {"cluster.entry-change-log", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, {"cluster.read-subvolume", "cluster/replicate", NULL, NULL, NO_DOC, 0 }, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 11259a498ef..1ba922a80c7 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -229,6 +229,7 @@ struct glusterd_volinfo_ { glusterd_defrag_info_t *defrag; gf_cli_defrag_type defrag_cmd; uint64_t rebalance_failures; + double rebalance_time; /* Replace brick status */ gf_rb_status_t rb_status; -- cgit