diff options
author | Varsha Rao <varao@redhat.com> | 2018-02-06 18:56:45 +0530 |
---|---|---|
committer | Jeff Darcy <jeff@pl.atyp.us> | 2018-02-08 17:01:12 +0000 |
commit | aa4372bf427152f671de52fc6e02b93ca09f22c7 (patch) | |
tree | 8c07cb7c3a9ad8122be3b7db403726dd7749c2ed | |
parent | 5e751b4c05375aa8b0f217ca73629d7d43faccf6 (diff) |
performance/io-threads: expose io-thread queue depths
The following release-3.8-fb branch patch is upstreamed:
> io-stats: Expose io-thread queue depths
> Commit ID: 69509ee7d2
> https://review.gluster.org/#/c/18143/
> By Shreyas Siravara <sshreyas@fb.com>
Changes in this patch:
- Replace iot_pri_t with gf_fop_pri_t
- Replace IOT_PRI_{HI, LO, NORMAL, MAX, LEAST} with
GF_FOP_PRI_{HI, LO, NORMAL, MAX, LEAST}
- Use dict_unref() instead of dict_destroy()
This patch is required to forward port io-threads namespace patch.
Updates: #401
Change-Id: I1b47a63185a441a30fbc423ca1015df7b36c2518
Signed-off-by: Varsha Rao <varao@redhat.com>
-rw-r--r-- | libglusterfs/src/dict.h | 4 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 37 | ||||
-rw-r--r-- | tests/basic/stats-dump.t | 7 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 9 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 5 | ||||
-rw-r--r-- | xlators/debug/io-stats/src/io-stats.c | 27 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 2 | ||||
-rw-r--r-- | xlators/performance/io-threads/src/io-threads.c | 95 | ||||
-rw-r--r-- | xlators/performance/io-threads/src/io-threads.h | 20 |
9 files changed, 158 insertions, 48 deletions
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h index ab8a8a56352..1ce055a5940 100644 --- a/libglusterfs/src/dict.h +++ b/libglusterfs/src/dict.h @@ -62,6 +62,8 @@ typedef struct _data_pair data_pair_t; #define DICT_KEY_VALUE_MAX_SIZE 1048576 +#define dict_for_each(d, c) for (c = d->members_list; c; c = c->next) + struct _data { unsigned char is_static:1; unsigned char is_const:1; @@ -160,8 +162,8 @@ data_t *data_from_uint16 (uint16_t value); char *data_to_str (data_t *data); void *data_to_bin (data_t *data); void *data_to_ptr (data_t *data); - data_t * data_copy (data_t *old); + int dict_foreach (dict_t *this, int (*fn)(dict_t *this, char *key, diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 438ce059fad..d702a5fa9ba 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -85,6 +85,7 @@ #define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check" #define ZR_DUMP_FUSE "dump-fuse" #define ZR_FUSE_MOUNTOPTS "fuse-mountopts" +#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size" #define GF_XATTR_CLRLK_CMD "glusterfs.clrlk" #define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo" @@ -308,6 +309,42 @@ #define GF_LK_MANDATORY 1 /* maps to GLFS_LK_MANDATORY from libgfapi*/ #define GF_LOCK_MODE "glusterfs.lk.lkmode" +#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl) \ + do { \ + if (key && strcmp (key, cmpkey) == 0) { \ + errval = -EINVAL; \ + goto lbl; \ + } \ + } while (0) \ + + +typedef enum { + GF_FOP_PRI_UNSPEC = -1, /* Priority not specified */ + GF_FOP_PRI_HI = 0, /* low latency */ + GF_FOP_PRI_NORMAL, /* normal */ + GF_FOP_PRI_LO, /* bulk */ + GF_FOP_PRI_LEAST, /* least */ + GF_FOP_PRI_MAX, /* Highest */ +} gf_fop_pri_t; + +static const char * const FOP_PRI_STRINGS[] = { + "HIGH", + "NORMAL", + "LOW", + "LEAST" +}; + +static inline const char *fop_pri_to_string (gf_fop_pri_t pri) +{ + if (pri < 0) + return "UNSPEC"; + + if (pri >= GF_FOP_PRI_MAX) + return "INVALID"; + + return FOP_PRI_STRINGS[pri]; +} + const char *fop_enum_to_pri_string (glusterfs_fop_t fop); #define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */ diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t index 5f35db4e0bd..3eed80e5579 100644 --- a/tests/basic/stats-dump.t +++ b/tests/basic/stats-dump.t @@ -12,6 +12,7 @@ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} TEST $CLI volume set $V0 diagnostics.latency-measurement on TEST $CLI volume set $V0 diagnostics.count-fop-hits on TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1 +TEST $CLI volume set $V0 performance.nfs.io-threads on TEST $CLI volume set $V0 nfs.disable off TEST $CLI volume start $V0 EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available @@ -35,4 +36,10 @@ TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_bac TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump|tail -1|cut -d: -f2) != "0," ] TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump|tail -1|cut -d: -f2) != "0," ] +# Test that io-stats is getting queue sizes from io-threads +TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump +TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump +TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump +TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump + cleanup; diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 75b2bf8e22c..cdd22475cbe 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1648,6 +1648,15 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, return 0; /* + * Heal daemons don't have IO threads ... and as a result they + * send this getxattr down and eventually crash :( + */ + if (strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) { + ret = -EINVAL; + goto out; + } + + /* * Special xattrs which need responses from all subvols */ if (afr_is_special_xattr (name, &cbk, 0)) { diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5717650dd12..445bd590c4d 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -4562,7 +4562,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; xlator_t *hashed_subvol = NULL; - xlator_t *mds_subvol = NULL; + xlator_t *mds_subvol = NULL; xlator_t *cached_subvol = NULL; dht_conf_t *conf = NULL; dht_local_t *local = NULL; @@ -4572,6 +4572,9 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, int cnt = 0; char *node_uuid_key = NULL; int ret = -1; + + GF_CHECK_XATTR_KEY_AND_GOTO (key, IO_THREADS_QUEUE_SIZE_KEY, + op_errno, err); VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 4ea45b058ae..985c5fbc389 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -36,6 +36,7 @@ #include "logging.h" #include "cli1-xdr.h" #include "statedump.h" +#include "syncop.h" #include <pwd.h> #include <grp.h> #include "upcall-utils.h" @@ -798,6 +799,8 @@ io_stats_dump_global_to_json_logfp (xlator_t *this, double weighted_fop_ave_usec = 0.0; double weighted_fop_ave_usec_sum = 0.0; long total_fop_hits = 0; + loc_t unused_loc = {0, }; + dict_t *xattr = NULL; interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) - (stats->started_at.tv_sec * 1000000.0 + @@ -950,6 +953,30 @@ io_stats_dump_global_to_json_logfp (xlator_t *this, } } + ret = syncop_getxattr (this, &unused_loc, &xattr, + IO_THREADS_QUEUE_SIZE_KEY, NULL, NULL); + if (xattr) { + /* + * Iterate over the dictionary returned to us by io-threads and + * dump the results to the stats file. + */ + data_pair_t *curr = NULL; + + dict_for_each (xattr, curr) { + ios_log (this, logfp, + "\"%s.%s.%s.queue_size\": \"%d\",", + key_prefix, str_prefix, curr->key, + data_to_int32 (curr->value)); + } + + /* Free the dictionary */ + dict_unref (xattr); + } else { + gf_log (this->name, GF_LOG_WARNING, + "Unable to get queue size counts from " + "the io-threads translator!"); + } + if (interval == -1) { ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",", key_prefix, str_prefix, diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 31564751c9a..0c984c97192 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -4983,7 +4983,7 @@ static gf_boolean_t volgen_is_shd_compatible_xl (char *xl_type) { char *shd_xls[] = {"cluster/replicate", "cluster/disperse", - NULL}; + "debug/io-stats", NULL}; if (gf_get_index_by_elem (shd_xls, xl_type) != -1) return _gf_true; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 80d0168bf4c..a5a9543ae39 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -56,10 +56,10 @@ iot_get_ctx (xlator_t *this, client_t *client) int i; if (client_ctx_get (client, this, (void **)&ctx) != 0) { - ctx = GF_CALLOC (IOT_PRI_MAX, sizeof(*ctx), + ctx = GF_CALLOC (GF_FOP_PRI_MAX, sizeof(*ctx), gf_iot_mt_client_ctx_t); if (ctx) { - for (i = 0; i < IOT_PRI_MAX; ++i) { + for (i = 0; i < GF_FOP_PRI_MAX; ++i) { INIT_LIST_HEAD (&ctx[i].clients); INIT_LIST_HEAD (&ctx[i].reqs); } @@ -82,7 +82,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri) iot_client_ctx_t *ctx; *pri = -1; - for (i = 0; i < IOT_PRI_MAX; i++) { + for (i = 0; i < GF_FOP_PRI_MAX; i++) { if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) { continue; @@ -133,8 +133,8 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri) client_t *client = stub->frame->root->client; iot_client_ctx_t *ctx; - if (pri < 0 || pri >= IOT_PRI_MAX) - pri = IOT_PRI_MAX-1; + if (pri < 0 || pri >= GF_FOP_PRI_MAX) + pri = GF_FOP_PRI_MAX-1; if (client) { ctx = iot_get_ctx (THIS, client); @@ -252,25 +252,28 @@ do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri) } char* -iot_get_pri_meaning (iot_pri_t pri) +iot_get_pri_meaning (gf_fop_pri_t pri) { char *name = NULL; switch (pri) { - case IOT_PRI_HI: + case GF_FOP_PRI_HI: name = "fast"; break; - case IOT_PRI_NORMAL: + case GF_FOP_PRI_NORMAL: name = "normal"; break; - case IOT_PRI_LO: + case GF_FOP_PRI_LO: name = "slow"; break; - case IOT_PRI_LEAST: + case GF_FOP_PRI_LEAST: name = "least priority"; break; - case IOT_PRI_MAX: + case GF_FOP_PRI_MAX: name = "invalid"; break; + case GF_FOP_PRI_UNSPEC: + name = "unspecified"; + break; } return name; } @@ -279,11 +282,11 @@ int iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) { int ret = -1; - iot_pri_t pri = IOT_PRI_MAX - 1; + gf_fop_pri_t pri = GF_FOP_PRI_MAX - 1; iot_conf_t *conf = this->private; if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) { - pri = IOT_PRI_LEAST; + pri = GF_FOP_PRI_LEAST; goto out; } @@ -302,7 +305,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_SETACTIVELK: case GF_FOP_ICREATE: case GF_FOP_NAMELINK: - pri = IOT_PRI_HI; + pri = GF_FOP_PRI_HI; break; case GF_FOP_CREATE: @@ -328,7 +331,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_FSETXATTR: case GF_FOP_REMOVEXATTR: case GF_FOP_FREMOVEXATTR: - pri = IOT_PRI_NORMAL; + pri = GF_FOP_PRI_NORMAL; break; case GF_FOP_READ: @@ -344,7 +347,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_DISCARD: case GF_FOP_ZEROFILL: case GF_FOP_SEEK: - pri = IOT_PRI_LO; + pri = GF_FOP_PRI_LO; break; case GF_FOP_FORGET: @@ -606,6 +609,36 @@ int iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { + iot_conf_t *conf = NULL; + dict_t *depths = NULL; + int i = 0; + + conf = this->private; + + if (conf && name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) { + /* + * We explicitly do not want a reference count + * for this dict in this translator + */ + depths = get_new_dict (); + if (!depths) + goto unwind_special_getxattr; + + for (i = 0; i < GF_FOP_PRI_MAX; i++) { + if (dict_set_int32 (depths, + (char *)fop_pri_to_string (i), + conf->queue_sizes[i]) != 0) { + dict_unref (depths); + depths = NULL; + goto unwind_special_getxattr; + } + } + +unwind_special_getxattr: + STACK_UNWIND_STRICT (getxattr, frame, 0, 0, depths, xdata); + return 0; + } + IOT_FOP (getxattr, frame, this, loc, name, xdata); return 0; } @@ -793,7 +826,7 @@ __iot_workers_scale (iot_conf_t *conf) int i = 0; char thread_name[GF_THREAD_NAMEMAX] = {0,}; - for (i = 0; i < IOT_PRI_MAX; i++) + for (i = 0; i < GF_FOP_PRI_MAX; i++) scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]); if (scale < IOT_MIN_THREADS) @@ -931,13 +964,13 @@ iot_priv_dump (xlator_t *this) gf_proc_dump_write("idle_time", "%d", conf->idle_time); gf_proc_dump_write("stack_size", "%zd", conf->stack_size); gf_proc_dump_write("high_priority_threads", "%d", - conf->ac_iot_limit[IOT_PRI_HI]); + conf->ac_iot_limit[GF_FOP_PRI_HI]); gf_proc_dump_write("normal_priority_threads", "%d", - conf->ac_iot_limit[IOT_PRI_NORMAL]); + conf->ac_iot_limit[GF_FOP_PRI_NORMAL]); gf_proc_dump_write("low_priority_threads", "%d", - conf->ac_iot_limit[IOT_PRI_LO]); + conf->ac_iot_limit[GF_FOP_PRI_LO]); gf_proc_dump_write("least_priority_threads", "%d", - conf->ac_iot_limit[IOT_PRI_LEAST]); + conf->ac_iot_limit[GF_FOP_PRI_LEAST]); return 0; } @@ -955,17 +988,19 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out); GF_OPTION_RECONF ("high-prio-threads", - conf->ac_iot_limit[IOT_PRI_HI], options, int32, out); + conf->ac_iot_limit[GF_FOP_PRI_HI], options, int32, + out); GF_OPTION_RECONF ("normal-prio-threads", - conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32, + conf->ac_iot_limit[GF_FOP_PRI_NORMAL], options, int32, out); GF_OPTION_RECONF ("low-prio-threads", - conf->ac_iot_limit[IOT_PRI_LO], options, int32, out); + conf->ac_iot_limit[GF_FOP_PRI_LO], options, int32, + out); GF_OPTION_RECONF ("least-prio-threads", - conf->ac_iot_limit[IOT_PRI_LEAST], options, int32, + conf->ac_iot_limit[GF_FOP_PRI_LEAST], options, int32, out); GF_OPTION_RECONF ("enable-least-priority", conf->least_priority, options, bool, out); @@ -1029,16 +1064,16 @@ init (xlator_t *this) GF_OPTION_INIT ("thread-count", conf->max_count, int32, out); GF_OPTION_INIT ("high-prio-threads", - conf->ac_iot_limit[IOT_PRI_HI], int32, out); + conf->ac_iot_limit[GF_FOP_PRI_HI], int32, out); GF_OPTION_INIT ("normal-prio-threads", - conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out); + conf->ac_iot_limit[GF_FOP_PRI_NORMAL], int32, out); GF_OPTION_INIT ("low-prio-threads", - conf->ac_iot_limit[IOT_PRI_LO], int32, out); + conf->ac_iot_limit[GF_FOP_PRI_LO], int32, out); GF_OPTION_INIT ("least-prio-threads", - conf->ac_iot_limit[IOT_PRI_LEAST], int32, out); + conf->ac_iot_limit[GF_FOP_PRI_LEAST], int32, out); GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out); GF_OPTION_INIT ("enable-least-priority", conf->least_priority, @@ -1046,7 +1081,7 @@ init (xlator_t *this) conf->this = this; - for (i = 0; i < IOT_PRI_MAX; i++) { + for (i = 0; i < GF_FOP_PRI_MAX; i++) { INIT_LIST_HEAD (&conf->clients[i]); INIT_LIST_HEAD (&conf->no_client[i].clients); INIT_LIST_HEAD (&conf->no_client[i].reqs); diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index 9648f74f39b..bd1c3f523c4 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -36,18 +36,8 @@ struct iot_conf; #define IOT_DEFAULT_THREADS 16 #define IOT_MAX_THREADS 64 - #define IOT_THREAD_STACK_SIZE ((size_t)(256*1024)) - -typedef enum { - IOT_PRI_HI = 0, /* low latency */ - IOT_PRI_NORMAL, /* normal */ - IOT_PRI_LO, /* bulk */ - IOT_PRI_LEAST, /* least */ - IOT_PRI_MAX, -} iot_pri_t; - typedef struct { struct list_head clients; struct list_head reqs; @@ -63,18 +53,18 @@ struct iot_conf { int32_t idle_time; /* in seconds */ - struct list_head clients[IOT_PRI_MAX]; + struct list_head clients[GF_FOP_PRI_MAX]; /* * It turns out that there are several ways a frame can get to us * without having an associated client (server_first_lookup was the * first one I hit). Instead of trying to update all such callers, * we use this to queue them. */ - iot_client_ctx_t no_client[IOT_PRI_MAX]; + iot_client_ctx_t no_client[GF_FOP_PRI_MAX]; - int32_t ac_iot_limit[IOT_PRI_MAX]; - int32_t ac_iot_count[IOT_PRI_MAX]; - int queue_sizes[IOT_PRI_MAX]; + int32_t ac_iot_limit[GF_FOP_PRI_MAX]; + int32_t ac_iot_count[GF_FOP_PRI_MAX]; + int queue_sizes[GF_FOP_PRI_MAX]; int queue_size; pthread_attr_t w_attr; gf_boolean_t least_priority; /*Enable/Disable least-priority */ |