diff options
| author | N Balachandran <nbalacha@redhat.com> | 2017-01-17 15:43:47 +0530 | 
|---|---|---|
| committer | Raghavendra G <rgowdapp@redhat.com> | 2017-01-19 20:01:47 -0800 | 
| commit | 2edd75ec8de17da89004859375844f60890a4df0 (patch) | |
| tree | 4ff010534660c47c2475e2af27478f2c4a2c920f | |
| parent | 7b5b7111c9d9a2a65e4f4d0abf832a88e021c576 (diff) | |
dht/rebalance Estimate time to complete rebalance
The estimates will be logged to the rebalance log on running
gluster v rebalance <vol> status
Change-Id: I9d51b139cd4c8dfde1ff2c2050720ae606c13fc6
BUG: 1396004
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: http://review.gluster.org/15893
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 28 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 102 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 47 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 | 
5 files changed, 172 insertions, 8 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 59a1a4c1266..b56457ecf63 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -1608,6 +1608,9 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,          int                sec          = 0;          gf_boolean_t       down         = _gf_false;  	gf_boolean_t       fix_layout   = _gf_false; +        uint64_t           max_time     = 0; +        uint64_t           time_left    = 0; +          ret = dict_get_int32 (dict, "count", &count);          if (ret) { @@ -1660,6 +1663,7 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,                  skipped = 0;                  status_str = NULL;                  elapsed = 0; +                time_left = 0;                  /* Check if status is NOT_STARTED, and continue early */                  memset (key, 0, 256); @@ -1737,6 +1741,15 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,                  if (ret)                          gf_log ("cli", GF_LOG_TRACE, "failed to get run-time"); +                memset (key, 0, 256); +                snprintf (key, 256, "time-left-%d", i); +                ret = dict_get_uint64 (dict, key, &time_left); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get time left"); +                if (time_left > max_time) +                        max_time = time_left; +                  /* Check for array bound */                  if (status_rcd >= GF_DEFRAG_STATUS_MAX)                          status_rcd = GF_DEFRAG_STATUS_MAX; @@ -1754,15 +1767,15 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,                          if (size_str) {                                  cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64                                            " %13" PRIu64" %13"PRIu64 " %20s " -                                         "%8d:%d:%d", node_name, files, +                                         "%8d:%02d:%02d", node_name, files,                                           size_str, lookup, failures, skipped,                                           status_str, hrs, min, sec);                          } else {                                  cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13"                                           PRIu64 " %13"PRIu64" %13"PRIu64 " %20s" -                                         " %8d:%d:%d", node_name, files, size, -                                         lookup, failures, skipped, status_str, -                                         hrs, min, sec); +                                         " %8d:%02d:%02d", node_name, files, +                                         size, lookup, failures, skipped, +                                         status_str, hrs, min, sec);                          }                  }                  GF_FREE(size_str); @@ -1772,6 +1785,13 @@ gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type,                           " Please check the nodes that are down using \'gluster"                           " peer status\' and start the glusterd on those nodes,"                           " else tier detach commit might fail!"); +        if (max_time) { +                hrs = max_time / 3600; +                min = ((int) max_time % 3600) / 60; +                sec = ((int) max_time % 3600) % 60; +                cli_out ("Estimated time left for rebalance to complete :" +                         " %8d:%02d:%02d", hrs, min, sec); +        }  out:          return ret;  } diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index dbbdfffef78..437ab2567ac 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -46,6 +46,9 @@                  }                                               \          }                                                       \ +uint64_t g_totalfiles = 0; + +  void  gf_defrag_free_container (struct dht_container *container)  { @@ -3708,6 +3711,58 @@ gf_tier_wait_fix_lookup (gf_defrag_info_t *defrag) {  /******************Tier background Fix layout functions END********************/ +uint64_t gf_defrag_subvol_file_cnt (xlator_t *this, loc_t *root_loc) +{ +        int ret = -1; +        struct statvfs buf = {0,}; + +        if (!this) +                return 0; + +        ret = syncop_statfs (this, root_loc, &buf, NULL, NULL); +        if (ret) { +                /* Aargh! */ +                return 0; +        } +        return (buf.f_files - buf.f_ffree); +} + + +int gf_defrag_total_file_cnt (xlator_t *this, loc_t *root_loc) +{ +        dht_conf_t    *conf  = NULL; +        int            ret   = -1; +        int            i     = 0; +        uint64_t       num_files = 0; + + +        conf = this->private; +        if (!conf) { +                return ret; +        } + +        for (i = 0 ; i < conf->local_subvols_cnt; i++) { +                num_files = gf_defrag_subvol_file_cnt (conf->local_subvols[i], +                                                       root_loc); +                g_totalfiles += num_files; +                gf_msg (this->name, GF_LOG_INFO, 0, 0, "local subvol: %s," +                        "cnt = %"PRIu64, conf->local_subvols[i]->name, +                        num_files); +        } + +        /* FIXFIXFIX: halve the number of files to negate .glusterfs contents +           We need a better way to figure this out */ + +        g_totalfiles = g_totalfiles/2; +        if (g_totalfiles > 20000) +                g_totalfiles += 10000; + +        gf_msg (this->name, GF_LOG_INFO, 0, 0, +                "Total number of files = %"PRIu64, g_totalfiles); + +        return 0; +} +  int  gf_defrag_start_crawl (void *data) @@ -3732,6 +3787,7 @@ gf_defrag_start_crawl (void *data)          pthread_t               *tid                    = NULL;          gf_boolean_t            is_tier_detach          = _gf_false; +          this = data;          if (!this)                  goto exit; @@ -3861,6 +3917,13 @@ gf_defrag_start_crawl (void *data)                                  "are %s", conf->local_subvols[i]->name);                  } +                ret = gf_defrag_total_file_cnt (this, &loc); +                if (!ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Failed to get " +                                "the total number of files. Unable to estimate " +                                "time to complete rebalance."); +                } +                  /* Initialize global entry queue */                  defrag->queue = GF_CALLOC (1, sizeof (struct dht_container),                                             gf_dht_mt_container_t); @@ -4099,8 +4162,11 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)          uint64_t skipped = 0;          uint64_t promoted = 0;          uint64_t demoted = 0; -        char     *status = ""; +        char    *status = "";          double   elapsed = 0; +        uint64_t time_left = 0; +        uint64_t time_to_complete = 0; +        double rate_lookedup = 0;          struct timeval end = {0,}; @@ -4123,6 +4189,34 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)          elapsed = end.tv_sec - defrag->start_time.tv_sec; +/*START */ + +/* rate at which files looked up */ + + +        if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) +                && (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) +                && g_totalfiles) { + +                rate_lookedup = (defrag->num_files_lookedup)/elapsed; +                if (defrag->num_files_lookedup > g_totalfiles) +                        g_totalfiles = defrag->num_files_lookedup + 10000; +                time_to_complete = (g_totalfiles)/rate_lookedup; +                time_left = time_to_complete - elapsed; + +                gf_log (THIS->name, GF_LOG_INFO, +                        "TIME: num_files_lookedup=%"PRIu64",elapsed time = %f," +                        "rate_lookedup=%f", defrag->num_files_lookedup, elapsed, +                        rate_lookedup); +                gf_log (THIS->name, GF_LOG_INFO, +                        "TIME: Estimated total time to complete = %"PRIu64 +                        " seconds", time_to_complete); + +                gf_log (THIS->name, GF_LOG_INFO, +                        "TIME: Seconds left = %"PRIu64" seconds", time_left); +        } + +/*END */          if (!dict)                  goto log; @@ -4171,6 +4265,12 @@ gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)          if (ret)                  gf_log (THIS->name, GF_LOG_WARNING,                          "failed to set skipped file count"); + +        ret = dict_set_uint64 (dict, "time-left", time_left); +        if (ret) +                gf_log (THIS->name, GF_LOG_WARNING, +                        "failed to set time-left"); +  log:          switch (defrag->defrag_status) {          case GF_DEFRAG_STATUS_NOT_STARTED: diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 3853e148893..00b84e076c3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1072,7 +1072,7 @@ glusterd_defrag_event_notify_handle (dict_t *dict)                          gf_msg (this->name, GF_LOG_ERROR, 0,                                  GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME, -                                "volname recieved (%s) is not prefixed with " +                                "volname received (%s) is not prefixed with "                                  "rebalance or tierd.", volname);                          ret = -1;                          goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index c408f12c10d..21482752c53 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -7613,6 +7613,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,                                        dict_t *rsp_dict, int32_t cmd)  {          int                             ret = 0; +        int                             ret2 = 0;          uint64_t                        files = 0;          uint64_t                        size = 0;          uint64_t                        lookup = 0; @@ -7623,6 +7624,7 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,          double                          run_time = 0;          uint64_t                        promoted = 0;          uint64_t                        demoted = 0; +        uint64_t                        time_left = 0;          this = THIS; @@ -7671,6 +7673,11 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,                  gf_msg_trace (this->name, 0,                          "failed to get run-time"); +        ret2 = dict_get_uint64 (rsp_dict, "time-left", &time_left); +        if (ret2) +                gf_msg_trace (this->name, 0, +                        "failed to get time left"); +          if (cmd == GF_DEFRAG_CMD_STATUS_TIER) {                  if (files)                          volinfo->tier.rebalance_files = files; @@ -7701,7 +7708,10 @@ glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo,                          volinfo->rebal.skipped_files = skipped;                  if (run_time)                          volinfo->rebal.rebalance_time = run_time; +                if (!ret2) +                        volinfo->rebal.time_left = time_left;          } +          if (promoted)                  volinfo->tier_info.promoted = promoted;          if (demoted) @@ -9324,7 +9334,7 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)                  ret = dict_set_uint64 (ctx_dict, key, value);                  if (ret) {                          gf_msg_debug (THIS->name, 0, -                                "failed to set lookuped file count"); +                                "failed to set looked up file count");                  }          } @@ -9380,6 +9390,18 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)          }          memset (key, 0, 256); +        snprintf (key, 256, "time-left-%d", index); +        ret = dict_get_uint64 (rsp_dict, key, &value); +        if (!ret) { +                memset (key, 0, 256); +                snprintf (key, 256, "time-left-%d", current_index); +                ret = dict_set_uint64 (ctx_dict, key, value); +                if (ret) { +                        gf_msg_debug (THIS->name, 0, +                                "failed to set time-left"); +                } +        } +        memset (key, 0, 256);          snprintf (key, 256, "demoted-%d", index);          ret = dict_get_uint64 (rsp_dict, key, &value);          if (!ret) { @@ -9521,7 +9543,7 @@ glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)                  ret = dict_set_uint64 (ctx_dict, key, value);                  if (ret) {                          gf_msg_debug (this->name, 0, -                                "failed to set lookuped file count"); +                                "failed to set looked up file count");                  }          } @@ -9601,6 +9623,19 @@ glusterd_volume_tier_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict)                  }          } +        memset (key, 0, 256); +        snprintf (key, 256, "time-left-%d", index); +        ret = dict_get_uint64 (rsp_dict, key, &value); +        if (!ret) { +                memset (key, 0, 256); +                snprintf (key, 256, "time-left-%d", count); +                ret = dict_set_uint64 (ctx_dict, key, value); +                if (ret) { +                        gf_msg_debug (THIS->name, 0, +                                "failed to set time-left"); +                } +        } +          ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY,                                  &task_id_str);          if (ret) { @@ -10271,6 +10306,14 @@ glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,                  glusterd_tier_or_rebalance_rsp (op_ctx, &volinfo->rebal, i);          memset (key, 0 , 256); +        snprintf (key, 256, "time-left-%d", i); +        ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.time_left); +        if (ret) +                gf_msg (THIS->name, GF_LOG_ERROR, errno, +                        GD_MSG_DICT_SET_FAILED, +                        "failed to set time left"); + +        memset (key, 0 , 256);          snprintf (key, 256, "promoted-%d", i);          ret = dict_set_uint64 (op_ctx, key, volinfo->tier_info.promoted);          if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e6a4d8b65a8..32f29526fb4 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -317,6 +317,7 @@ struct glusterd_rebalance_ {          uint64_t                 rebalance_failures;          uuid_t                   rebalance_id;          double                   rebalance_time; +        uint64_t                 time_left;          glusterd_op_t            op;          dict_t                  *dict; /* Dict to store misc information                                          * like list of bricks being removed */  | 
