From 7f2e67d40d1006e88fda86eb04699c15db3440ee Mon Sep 17 00:00:00 2001 From: Amar Tumballi Date: Wed, 11 Oct 2017 15:08:49 +0530 Subject: metrics: provide options to dump metrics from xlators * Introduce xlator methods to allow dumping of metrics * Separate options to get the metrics dumped in a path Updates #168 Change-Id: I7df80df33b71d6f449f03c2332665b4a45f6ddf2 Signed-off-by: Amar Tumballi --- libglusterfs/src/Makefile.am | 4 +- libglusterfs/src/globals.c | 25 +++- libglusterfs/src/glusterfs.h | 5 + libglusterfs/src/latency.c | 12 -- libglusterfs/src/latency.h | 3 - libglusterfs/src/monitoring.c | 268 ++++++++++++++++++++++++++++++++++++++++++ libglusterfs/src/monitoring.h | 21 ++++ libglusterfs/src/xlator.c | 6 + libglusterfs/src/xlator.h | 5 + 9 files changed, 331 insertions(+), 18 deletions(-) create mode 100644 libglusterfs/src/monitoring.c create mode 100644 libglusterfs/src/monitoring.h (limited to 'libglusterfs') diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 094646c5e3f..e4fe6125a3d 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -35,7 +35,7 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \ $(CONTRIBDIR)/timer-wheel/timer-wheel.c \ $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \ $(CONTRIBDIR)/xxhash/xxhash.c \ - compound-fop-utils.c throttle-tbf.c + compound-fop-utils.c throttle-tbf.c monitoring.c nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c nodist_libglusterfs_la_HEADERS = y.tab.h protocol-common.h @@ -58,7 +58,7 @@ libglusterfs_la_HEADERS = common-utils.h defaults.h default-args.h \ syncop-utils.h parse-utils.h libglusterfs-messages.h \ lvm-defaults.h quota-common-utils.h rot-buffs.h \ compat-uuid.h upcall-utils.h throttle-tbf.h events.h\ - compound-fop-utils.h atomic.h + compound-fop-utils.h atomic.h monitoring.h libglusterfs_ladir = $(includedir)/glusterfs diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 6bed1b546ee..f1eaccc9305 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -157,6 +157,9 @@ global_xl_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("measure-latency", bool_opt, options, bool, out); this->ctx->measure_latency = bool_opt; + GF_OPTION_RECONF ("metrics-dump-path", this->ctx->config.metrics_dumppath, + options, str, out); + /* TODO: add more things here */ ret = 0; out: @@ -166,7 +169,19 @@ out: static int global_xl_init (xlator_t *this) { - return 0; + int ret = -1; + gf_boolean_t bool_opt = false; + + GF_OPTION_INIT ("measure-latency", bool_opt, bool, out); + this->ctx->measure_latency = bool_opt; + + GF_OPTION_INIT ("metrics-dump-path", this->ctx->config.metrics_dumppath, + str, out); + + ret = 0; + +out: + return ret; } static void @@ -184,6 +199,14 @@ struct volume_options global_xl_options[] = { .tags = {"global", "context"}, .description = "Use this option to toggle measuring latency" }, + { .key = {"metrics-dump-path"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "{{gluster_workdir}}/metrics", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE, + .tags = {"global", "context"}, + .description = "Use this option to set the metrics dump path" + }, { .key = {NULL},}, }; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index aa28b80222e..4dee7f00ce6 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -553,6 +553,11 @@ struct _glusterfs_ctx { gf_lock_t volfile_lock; + /* configuration related elements, which gets changed + from global xlator */ + struct { + char *metrics_dumppath; + } config; struct { gf_atomic_t max_dict_pairs; diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c index 4c83024e85d..076c019c607 100644 --- a/libglusterfs/src/latency.c +++ b/libglusterfs/src/latency.c @@ -93,15 +93,3 @@ gf_proc_dump_latency_info (xlator_t *xl) } } - -void -gf_latency_toggle (int signum, glusterfs_ctx_t *ctx) -{ - if (ctx) { - ctx->measure_latency = !ctx->measure_latency; - gf_msg ("[core]", GF_LOG_INFO, 0, - LG_MSG_LATENCY_MEASUREMENT_STATE, - "Latency measurement turned %s", - ctx->measure_latency ? "on" : "off"); - } -} diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/latency.h index 132520c1263..73a93242341 100644 --- a/libglusterfs/src/latency.h +++ b/libglusterfs/src/latency.h @@ -20,7 +20,4 @@ typedef struct fop_latency { uint64_t count; } fop_latency_t; -void -gf_latency_toggle (int signum, glusterfs_ctx_t *ctx); - #endif /* __LATENCY_H__ */ diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c new file mode 100644 index 00000000000..25103867e24 --- /dev/null +++ b/libglusterfs/src/monitoring.c @@ -0,0 +1,268 @@ +/* + Copyright (c) 2017 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "monitoring.h" +#include "xlator.h" +#include "syscall.h" + +#include + +static void +dump_mem_acct_details(xlator_t *xl, int fd) +{ + struct mem_acct_rec *mem_rec; + int i = 0; + + if (!xl || !xl->mem_acct || (xl->ctx->active != xl->graph)) + return; + + dprintf (fd, "# %s.%s.total.num_types %d\n", xl->type, xl->name, + xl->mem_acct->num_types); + + dprintf (fd, "# type, in-use-size, in-use-units, max-size, " + "max-units, total-allocs\n"); + + for (i = 0; i < xl->mem_acct->num_types; i++) { + mem_rec = &xl->mem_acct->rec[i]; + if (mem_rec->num_allocs == 0) + continue; + dprintf (fd, "# %s, %"GF_PRI_SIZET", %u, %"GF_PRI_SIZET", %u," + " %u\n", mem_rec->typestr, mem_rec->size, + mem_rec->num_allocs, mem_rec->max_size, + mem_rec->max_num_allocs, mem_rec->total_allocs); + } +} + +static void +dump_global_memory_accounting (int fd) +{ +#if MEMORY_ACCOUNTING_STATS + int i = 0; + uint64_t count = 0; + + uint64_t tcalloc = GF_ATOMIC_GET (gf_memory_stat_counts.total_calloc); + uint64_t tmalloc = GF_ATOMIC_GET (gf_memory_stat_counts.total_malloc); + uint64_t tfree = GF_ATOMIC_GET (gf_memory_stat_counts.total_free); + + dprintf (fd, "memory.total.calloc %lu\n", tcalloc); + dprintf (fd, "memory.total.malloc %lu\n", tmalloc); + dprintf (fd, "memory.total.realloc %lu\n", + GF_ATOMIC_GET (gf_memory_stat_counts.total_realloc)); + dprintf (fd, "memory.total.free %lu\n", tfree); + dprintf (fd, "memory.total.in-use %lu\n", ((tcalloc + tmalloc) - tfree)); + + for (i = 0; i < GF_BLK_MAX_VALUE; i++) { + count = GF_ATOMIC_GET (gf_memory_stat_counts.blk_size[i]); + dprintf (fd, "memory.total.blk_size.%s %lu\n", + gf_mem_stats_blk[i].blk_size_str, count); + } + + dprintf (fd, "#----\n"); +#endif + + /* This is not a metric to be watched in admin guide, + but keeping it here till we resolve all leak-issues + would be great */ +} + + +static void +dump_latency_and_count (xlator_t *xl, int fd) +{ + int32_t index = 0; + uint64_t fop; + uint64_t cbk; + uint64_t count; + + dprintf (fd, "%s.total.winds.count %lu\n", xl->name, xl->winds); + + /* Need 'fuse' data, and don't need all the old graph info */ + if ((xl != xl->ctx->master) && (xl->ctx->active != xl->graph)) + return; + + count = GF_ATOMIC_GET (xl->stats.total.count); + dprintf (fd, "%s.total.fop-count %lu\n", xl->name, count); + + count = GF_ATOMIC_GET (xl->stats.interval.count); + dprintf (fd, "%s.interval.fop-count %lu\n", xl->name, count); + GF_ATOMIC_INIT (xl->stats.interval.count, 0); + + for (index = 0; index < GF_FOP_MAXVALUE; index++) { + fop = GF_ATOMIC_GET (xl->stats.total.metrics[index].fop); + if (fop) { + dprintf (fd, "%s.total.%s.count %lu\n", + xl->name, gf_fop_list[index], fop); + } + fop = GF_ATOMIC_GET (xl->stats.interval.metrics[index].fop); + if (fop) { + dprintf (fd, "%s.interval.%s.count %lu\n", + xl->name, gf_fop_list[index], fop); + } + cbk = GF_ATOMIC_GET (xl->stats.interval.metrics[index].cbk); + if (cbk) { + dprintf (fd, "%s.interval.%s.fail_count %lu\n", + xl->name, gf_fop_list[index], cbk); + } + if (xl->stats.interval.latencies[index].count != 0.0) { + dprintf (fd, "%s.interval.%s.latency %lf\n", + xl->name, gf_fop_list[index], + (xl->stats.interval.latencies[index].total / + xl->stats.interval.latencies[index].count)); + dprintf (fd, "%s.interval.%s.max %lf\n", + xl->name, gf_fop_list[index], + xl->stats.interval.latencies[index].max); + dprintf (fd, "%s.interval.%s.min %lf\n", + xl->name, gf_fop_list[index], + xl->stats.interval.latencies[index].min); + } + GF_ATOMIC_INIT (xl->stats.interval.metrics[index].cbk, 0); + GF_ATOMIC_INIT (xl->stats.interval.metrics[index].fop, 0); + } + memset (xl->stats.interval.latencies, 0, + sizeof (xl->stats.interval.latencies)); +} + +static inline void +dump_call_stack_details (glusterfs_ctx_t *ctx, int fd) +{ + dprintf (fd, "total.stack.count %lu\n", + GF_ATOMIC_GET (ctx->pool->total_count)); + dprintf (fd, "total.stack.in-flight %lu\n", + ctx->pool->cnt); +} + +static inline void +dump_dict_details (glusterfs_ctx_t *ctx, int fd) +{ + uint64_t total_dicts = 0; + uint64_t total_pairs = 0; + + total_dicts = GF_ATOMIC_GET (ctx->stats.total_dicts_used); + total_pairs = GF_ATOMIC_GET (ctx->stats.total_pairs_used); + + dprintf (fd, "total.dict.max-pairs-per %lu\n", + GF_ATOMIC_GET (ctx->stats.max_dict_pairs)); + dprintf (fd, "total.dict.pairs-used %lu\n", total_pairs); + dprintf (fd, "total.dict.used %lu\n", total_dicts); + dprintf (fd, "total.dict.average-pairs %lu\n", + (total_pairs / total_dicts)); +} + +static void +dump_inode_stats (glusterfs_ctx_t *ctx, int fd) +{ +} + +static void +dump_global_metrics (glusterfs_ctx_t *ctx, int fd) +{ + struct timeval tv; + time_t nowtime; + struct tm *nowtm; + char tmbuf[64] = {0,}; + + gettimeofday(&tv, NULL); + nowtime = tv.tv_sec; + nowtm = localtime(&nowtime); + strftime(tmbuf, sizeof tmbuf, "%Y-%m-%d %H:%M:%S", nowtm); + + /* Let every file have information on which process dumped info */ + dprintf (fd, "## %s\n", ctx->cmdlinestr); + dprintf (fd, "### %s\n", tmbuf); + dprintf (fd, "### BrickName: %s\n", ctx->cmd_args.brick_name); + dprintf (fd, "### MountName: %s\n", ctx->cmd_args.mount_point); + dprintf (fd, "### VolumeName: %s\n", ctx->cmd_args.volume_name); + + /* Dump memory accounting */ + dump_global_memory_accounting (fd); + dprintf (fd, "# -----\n"); + + dump_call_stack_details (ctx, fd); + dump_dict_details (ctx, fd); + dprintf (fd, "# -----\n"); + + dump_inode_stats (ctx, fd); + dprintf (fd, "# -----\n"); +} + +static void +dump_xl_metrics (glusterfs_ctx_t *ctx, int fd) +{ + xlator_t *xl; + + xl = ctx->active->top; + + while (xl) { + dump_latency_and_count (xl, fd); + dump_mem_acct_details (xl, fd); + if (xl->dump_metrics) + xl->dump_metrics (xl, fd); + xl = xl->next; + } + + if (ctx->master) { + xl = ctx->master; + + dump_latency_and_count (xl, fd); + dump_mem_acct_details (xl, fd); + if (xl->dump_metrics) + xl->dump_metrics (xl, fd); + } + + return; +} + +char * +gf_monitor_metrics (glusterfs_ctx_t *ctx) +{ + int ret = -1; + int fd = 0; + char *filepath, *dumppath; + + dumppath = ctx->config.metrics_dumppath; + if (dumppath == NULL) { + dumppath = GLUSTER_METRICS_DIR; + } + + ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath); + if (ret < 0) { + return NULL; + } + + fd = mkstemp (filepath); + if (fd < 0) { + gf_msg ("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR, + "failed to open tmp file %s (%s)", + filepath, strerror (errno)); + GF_FREE (filepath); + return NULL; + } + + dump_global_metrics (ctx, fd); + + dump_xl_metrics (ctx, fd); + + /* This below line is used just to capture any errors with dprintf() */ + ret = dprintf (fd, "\n# End of metrics\n"); + if (ret < 0) { + gf_msg ("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR, + "dprintf() failed: %s", strerror (errno)); + } + + ret = sys_fsync (fd); + if (ret < 0) { + gf_msg ("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR, + "fsync() failed: %s", strerror (errno)); + } + sys_close (fd); + + /* Figure this out, not happy with returning this string */ + return filepath; +} diff --git a/libglusterfs/src/monitoring.h b/libglusterfs/src/monitoring.h new file mode 100644 index 00000000000..1c08bfc7eb6 --- /dev/null +++ b/libglusterfs/src/monitoring.h @@ -0,0 +1,21 @@ +/* + Copyright (c) 2017 Red Hat, Inc. + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __MONITORING_H__ +#define __MONITORING_H__ + +#include "glusterfs.h" + +#define GLUSTER_METRICS_DIR "/var/run/gluster/metrics" + +char * +gf_monitor_metrics (glusterfs_ctx_t *ctx); + +#endif /* __MONITORING_H__ */ diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 3e108312d60..03495463a53 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -373,6 +373,12 @@ int xlator_dynload_newway (xlator_t *xl) xl->name); } + xl->dump_metrics = xlapi->dump_metrics; + if (!xl->dump_metrics) { + gf_msg_trace ("xlator", 0, "%s: method missing (dump_metrics)", + xl->name); + } + vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t), gf_common_mt_volume_opt_list_t); if (!vol_opt) { diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 81199bd1809..8a5ee819e5c 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -982,6 +982,8 @@ struct _xlator { int32_t (*init) (xlator_t *this); int32_t (*reconfigure) (xlator_t *this, dict_t *options); int32_t (*mem_acct_init) (xlator_t *this); + int32_t (*dump_metrics) (xlator_t *this, int fd); + event_notify_fn_t notify; gf_loglevel_t loglevel; /* Log level for translator */ @@ -1071,6 +1073,9 @@ typedef struct { optional. called during translator initialization */ int32_t (*mem_acct_init) (xlator_t *this); + /* dump_metrics(): used for providing internal metrics. optional */ + int32_t (*dump_metrics) (xlator_t *this, int fd); + /* notify(): used for handling the notification of events from either the parent or child in the graph. optional. */ event_notify_fn_t notify; -- cgit