From 6d992b8bcb4e902a34f3618d2dfe80b1b5543112 Mon Sep 17 00:00:00 2001 From: Richard Wareing Date: Wed, 6 Apr 2016 17:06:43 -0700 Subject: debug/io-stats: Adding stat for weighted & unweighted average latency Summary: - Our current approach to measuring "average fop latency" is badly flawed in that it doesn't weight the FOPs correctly according to how many occurred in the time interval. This makes Statisticians very sad. This patch adds an internally computed weighted average latency which will be far more efficient to display via ODS, as well as having the benefit of not being complete nonsense. - This is a port of D3148415 & D3405772 to 3.8 Reviewers: kvigor, dph, sshreyas Reviewed By: sshreyas Change-Id: Ie3618f279b545610b7ed1a8482243fcc8dc53217 Reviewed-on: https://review.gluster.org/18192 Reviewed-by: Shreyas Siravara Smoke: Gluster Build System CentOS-regression: Gluster Build System --- tests/basic/ios-dump.t | 43 +++++++++++++++++++++++++++++++++++ xlators/debug/io-stats/src/io-stats.c | 30 ++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 tests/basic/ios-dump.t diff --git a/tests/basic/ios-dump.t b/tests/basic/ios-dump.t new file mode 100644 index 00000000000..ddeb19d01ac --- /dev/null +++ b/tests/basic/ios-dump.t @@ -0,0 +1,43 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +function check_brick_inter_stats() { + local counter="$1" + local inter_cnt="" + + inter_cnt=$(grep -h "\".*inter.*$counter\"" \ + /var/lib/glusterd/stats/glusterfsd*.dump 2>/dev/null | + grep -v '\"0.0000\"' | wc -l) + if (( $inter_cnt == 3 )); then + echo "Y" + else + echo "N" + fi +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 diagnostics.stats-dump-interval 5 +TEST $CLI volume set $V0 diagnostics.count-fop-hits on +TEST $CLI volume set $V0 diagnostics.latency-measurement on +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +# Generate some FOPs +cd $M0 +for i in {1..10}; do + mkdir a + cd a + for g in {1..10}; do + dd if=/dev/zero of=test$g bs=128k count=1 + done +done + +EXPECT_WITHIN 30 "Y" check_brick_inter_stats fop.weighted_latency_ave_usec + +cleanup diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 2f9a2d92f15..bd0ba632baa 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -865,6 +865,11 @@ io_stats_dump_global_to_json_logfp (xlator_t *this, float fop_lat_min; float fop_lat_max; double interval_sec; + double fop_ave_usec = 0.0; + double fop_ave_usec_sum = 0.0; + double weighted_fop_ave_usec = 0.0; + double weighted_fop_ave_usec_sum = 0.0; + long total_fop_hits = 0; loc_t unused_loc = {0, }; interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) - @@ -969,7 +974,32 @@ io_stats_dump_global_to_json_logfp (xlator_t *this, ios_log (this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": \"%0.2lf\",", key_prefix, str_prefix, lc_fop_name, fop_lat_max); + + fop_ave_usec_sum += fop_lat_ave; + weighted_fop_ave_usec_sum += fop_hits * fop_lat_ave; + total_fop_hits += fop_hits; + } + + if (total_fop_hits) { + weighted_fop_ave_usec = weighted_fop_ave_usec_sum/total_fop_hits; + /* Extra key that does not print out an entry w/ 0.00 for + * intervals with no data + */ + ios_log (this, logfp, + "\"%s.%s.fop.weighted_latency_ave_usec_nozerofill\": \"%0.4lf\",", + key_prefix, str_prefix, weighted_fop_ave_usec); } + ios_log (this, logfp, + "\"%s.%s.fop.weighted_latency_ave_usec\": \"%0.4lf\",", + key_prefix, str_prefix, weighted_fop_ave_usec); + ios_log (this, logfp, + "\"%s.%s.fop.weighted_fop_count\": \"%ld\",", + key_prefix, str_prefix, total_fop_hits); + + fop_ave_usec = fop_ave_usec_sum/GF_FOP_MAXVALUE; + ios_log (this, logfp, + "\"%s.%s.fop.unweighted_latency_ave_usec\":\"%0.4lf\",", + key_prefix, str_prefix, fop_ave_usec); dict_t *xattr = NULL; ret = syncop_getxattr (this, &unused_loc, &xattr, -- cgit