summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVarsha Rao <varao@redhat.com>2018-02-06 18:56:45 +0530
committerJeff Darcy <jeff@pl.atyp.us>2018-02-08 17:01:12 +0000
commitaa4372bf427152f671de52fc6e02b93ca09f22c7 (patch)
tree8c07cb7c3a9ad8122be3b7db403726dd7749c2ed
parent5e751b4c05375aa8b0f217ca73629d7d43faccf6 (diff)
performance/io-threads: expose io-thread queue depths
The following release-3.8-fb branch patch is upstreamed: > io-stats: Expose io-thread queue depths > Commit ID: 69509ee7d2 > https://review.gluster.org/#/c/18143/ > By Shreyas Siravara <sshreyas@fb.com> Changes in this patch: - Replace iot_pri_t with gf_fop_pri_t - Replace IOT_PRI_{HI, LO, NORMAL, MAX, LEAST} with GF_FOP_PRI_{HI, LO, NORMAL, MAX, LEAST} - Use dict_unref() instead of dict_destroy() This patch is required to forward port io-threads namespace patch. Updates: #401 Change-Id: I1b47a63185a441a30fbc423ca1015df7b36c2518 Signed-off-by: Varsha Rao <varao@redhat.com>
-rw-r--r--libglusterfs/src/dict.h4
-rw-r--r--libglusterfs/src/glusterfs.h37
-rw-r--r--tests/basic/stats-dump.t7
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c9
-rw-r--r--xlators/cluster/dht/src/dht-common.c5
-rw-r--r--xlators/debug/io-stats/src/io-stats.c27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c2
-rw-r--r--xlators/performance/io-threads/src/io-threads.c95
-rw-r--r--xlators/performance/io-threads/src/io-threads.h20
9 files changed, 158 insertions, 48 deletions
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index ab8a8a56352..1ce055a5940 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -62,6 +62,8 @@ typedef struct _data_pair data_pair_t;
#define DICT_KEY_VALUE_MAX_SIZE 1048576
+#define dict_for_each(d, c) for (c = d->members_list; c; c = c->next)
+
struct _data {
unsigned char is_static:1;
unsigned char is_const:1;
@@ -160,8 +162,8 @@ data_t *data_from_uint16 (uint16_t value);
char *data_to_str (data_t *data);
void *data_to_bin (data_t *data);
void *data_to_ptr (data_t *data);
-
data_t * data_copy (data_t *old);
+
int dict_foreach (dict_t *this,
int (*fn)(dict_t *this,
char *key,
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 438ce059fad..d702a5fa9ba 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -85,6 +85,7 @@
#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
#define ZR_DUMP_FUSE "dump-fuse"
#define ZR_FUSE_MOUNTOPTS "fuse-mountopts"
+#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size"
#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
@@ -308,6 +309,42 @@
#define GF_LK_MANDATORY 1 /* maps to GLFS_LK_MANDATORY from libgfapi*/
#define GF_LOCK_MODE "glusterfs.lk.lkmode"
+#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl) \
+ do { \
+ if (key && strcmp (key, cmpkey) == 0) { \
+ errval = -EINVAL; \
+ goto lbl; \
+ } \
+ } while (0) \
+
+
+typedef enum {
+ GF_FOP_PRI_UNSPEC = -1, /* Priority not specified */
+ GF_FOP_PRI_HI = 0, /* low latency */
+ GF_FOP_PRI_NORMAL, /* normal */
+ GF_FOP_PRI_LO, /* bulk */
+ GF_FOP_PRI_LEAST, /* least */
+ GF_FOP_PRI_MAX, /* Highest */
+} gf_fop_pri_t;
+
+static const char * const FOP_PRI_STRINGS[] = {
+ "HIGH",
+ "NORMAL",
+ "LOW",
+ "LEAST"
+};
+
+static inline const char *fop_pri_to_string (gf_fop_pri_t pri)
+{
+ if (pri < 0)
+ return "UNSPEC";
+
+ if (pri >= GF_FOP_PRI_MAX)
+ return "INVALID";
+
+ return FOP_PRI_STRINGS[pri];
+}
+
const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
index 5f35db4e0bd..3eed80e5579 100644
--- a/tests/basic/stats-dump.t
+++ b/tests/basic/stats-dump.t
@@ -12,6 +12,7 @@ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
TEST $CLI volume set $V0 diagnostics.latency-measurement on
TEST $CLI volume set $V0 diagnostics.count-fop-hits on
TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1
+TEST $CLI volume set $V0 performance.nfs.io-threads on
TEST $CLI volume set $V0 nfs.disable off
TEST $CLI volume start $V0
EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
@@ -35,4 +36,10 @@ TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_bac
TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump|tail -1|cut -d: -f2) != "0," ]
TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump|tail -1|cut -d: -f2) != "0," ]
+# Test that io-stats is getting queue sizes from io-threads
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump
+
cleanup;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 75b2bf8e22c..cdd22475cbe 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1648,6 +1648,15 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
/*
+ * Heal daemons don't have IO threads ... and as a result they
+ * send this getxattr down and eventually crash :(
+ */
+ if (strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
* Special xattrs which need responses from all subvols
*/
if (afr_is_special_xattr (name, &cbk, 0)) {
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5717650dd12..445bd590c4d 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4562,7 +4562,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
xlator_t *hashed_subvol = NULL;
- xlator_t *mds_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
xlator_t *cached_subvol = NULL;
dht_conf_t *conf = NULL;
dht_local_t *local = NULL;
@@ -4572,6 +4572,9 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
int cnt = 0;
char *node_uuid_key = NULL;
int ret = -1;
+
+ GF_CHECK_XATTR_KEY_AND_GOTO (key, IO_THREADS_QUEUE_SIZE_KEY,
+ op_errno, err);
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 4ea45b058ae..985c5fbc389 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -36,6 +36,7 @@
#include "logging.h"
#include "cli1-xdr.h"
#include "statedump.h"
+#include "syncop.h"
#include <pwd.h>
#include <grp.h>
#include "upcall-utils.h"
@@ -798,6 +799,8 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
double weighted_fop_ave_usec = 0.0;
double weighted_fop_ave_usec_sum = 0.0;
long total_fop_hits = 0;
+ loc_t unused_loc = {0, };
+ dict_t *xattr = NULL;
interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
(stats->started_at.tv_sec * 1000000.0 +
@@ -950,6 +953,30 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
}
}
+ ret = syncop_getxattr (this, &unused_loc, &xattr,
+ IO_THREADS_QUEUE_SIZE_KEY, NULL, NULL);
+ if (xattr) {
+ /*
+ * Iterate over the dictionary returned to us by io-threads and
+ * dump the results to the stats file.
+ */
+ data_pair_t *curr = NULL;
+
+ dict_for_each (xattr, curr) {
+ ios_log (this, logfp,
+ "\"%s.%s.%s.queue_size\": \"%d\",",
+ key_prefix, str_prefix, curr->key,
+ data_to_int32 (curr->value));
+ }
+
+ /* Free the dictionary */
+ dict_unref (xattr);
+ } else {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Unable to get queue size counts from "
+ "the io-threads translator!");
+ }
+
if (interval == -1) {
ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",",
key_prefix, str_prefix,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 31564751c9a..0c984c97192 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -4983,7 +4983,7 @@ static gf_boolean_t
volgen_is_shd_compatible_xl (char *xl_type)
{
char *shd_xls[] = {"cluster/replicate", "cluster/disperse",
- NULL};
+ "debug/io-stats", NULL};
if (gf_get_index_by_elem (shd_xls, xl_type) != -1)
return _gf_true;
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 80d0168bf4c..a5a9543ae39 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -56,10 +56,10 @@ iot_get_ctx (xlator_t *this, client_t *client)
int i;
if (client_ctx_get (client, this, (void **)&ctx) != 0) {
- ctx = GF_CALLOC (IOT_PRI_MAX, sizeof(*ctx),
+ ctx = GF_CALLOC (GF_FOP_PRI_MAX, sizeof(*ctx),
gf_iot_mt_client_ctx_t);
if (ctx) {
- for (i = 0; i < IOT_PRI_MAX; ++i) {
+ for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
INIT_LIST_HEAD (&ctx[i].clients);
INIT_LIST_HEAD (&ctx[i].reqs);
}
@@ -82,7 +82,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri)
iot_client_ctx_t *ctx;
*pri = -1;
- for (i = 0; i < IOT_PRI_MAX; i++) {
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) {
continue;
@@ -133,8 +133,8 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
client_t *client = stub->frame->root->client;
iot_client_ctx_t *ctx;
- if (pri < 0 || pri >= IOT_PRI_MAX)
- pri = IOT_PRI_MAX-1;
+ if (pri < 0 || pri >= GF_FOP_PRI_MAX)
+ pri = GF_FOP_PRI_MAX-1;
if (client) {
ctx = iot_get_ctx (THIS, client);
@@ -252,25 +252,28 @@ do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri)
}
char*
-iot_get_pri_meaning (iot_pri_t pri)
+iot_get_pri_meaning (gf_fop_pri_t pri)
{
char *name = NULL;
switch (pri) {
- case IOT_PRI_HI:
+ case GF_FOP_PRI_HI:
name = "fast";
break;
- case IOT_PRI_NORMAL:
+ case GF_FOP_PRI_NORMAL:
name = "normal";
break;
- case IOT_PRI_LO:
+ case GF_FOP_PRI_LO:
name = "slow";
break;
- case IOT_PRI_LEAST:
+ case GF_FOP_PRI_LEAST:
name = "least priority";
break;
- case IOT_PRI_MAX:
+ case GF_FOP_PRI_MAX:
name = "invalid";
break;
+ case GF_FOP_PRI_UNSPEC:
+ name = "unspecified";
+ break;
}
return name;
}
@@ -279,11 +282,11 @@ int
iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
{
int ret = -1;
- iot_pri_t pri = IOT_PRI_MAX - 1;
+ gf_fop_pri_t pri = GF_FOP_PRI_MAX - 1;
iot_conf_t *conf = this->private;
if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) {
- pri = IOT_PRI_LEAST;
+ pri = GF_FOP_PRI_LEAST;
goto out;
}
@@ -302,7 +305,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_SETACTIVELK:
case GF_FOP_ICREATE:
case GF_FOP_NAMELINK:
- pri = IOT_PRI_HI;
+ pri = GF_FOP_PRI_HI;
break;
case GF_FOP_CREATE:
@@ -328,7 +331,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_FSETXATTR:
case GF_FOP_REMOVEXATTR:
case GF_FOP_FREMOVEXATTR:
- pri = IOT_PRI_NORMAL;
+ pri = GF_FOP_PRI_NORMAL;
break;
case GF_FOP_READ:
@@ -344,7 +347,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
case GF_FOP_DISCARD:
case GF_FOP_ZEROFILL:
case GF_FOP_SEEK:
- pri = IOT_PRI_LO;
+ pri = GF_FOP_PRI_LO;
break;
case GF_FOP_FORGET:
@@ -606,6 +609,36 @@ int
iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
+ iot_conf_t *conf = NULL;
+ dict_t *depths = NULL;
+ int i = 0;
+
+ conf = this->private;
+
+ if (conf && name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+ /*
+ * We explicitly do not want a reference count
+ * for this dict in this translator
+ */
+ depths = get_new_dict ();
+ if (!depths)
+ goto unwind_special_getxattr;
+
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+ if (dict_set_int32 (depths,
+ (char *)fop_pri_to_string (i),
+ conf->queue_sizes[i]) != 0) {
+ dict_unref (depths);
+ depths = NULL;
+ goto unwind_special_getxattr;
+ }
+ }
+
+unwind_special_getxattr:
+ STACK_UNWIND_STRICT (getxattr, frame, 0, 0, depths, xdata);
+ return 0;
+ }
+
IOT_FOP (getxattr, frame, this, loc, name, xdata);
return 0;
}
@@ -793,7 +826,7 @@ __iot_workers_scale (iot_conf_t *conf)
int i = 0;
char thread_name[GF_THREAD_NAMEMAX] = {0,};
- for (i = 0; i < IOT_PRI_MAX; i++)
+ for (i = 0; i < GF_FOP_PRI_MAX; i++)
scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]);
if (scale < IOT_MIN_THREADS)
@@ -931,13 +964,13 @@ iot_priv_dump (xlator_t *this)
gf_proc_dump_write("idle_time", "%d", conf->idle_time);
gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
gf_proc_dump_write("high_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_HI]);
+ conf->ac_iot_limit[GF_FOP_PRI_HI]);
gf_proc_dump_write("normal_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_NORMAL]);
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL]);
gf_proc_dump_write("low_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_LO]);
+ conf->ac_iot_limit[GF_FOP_PRI_LO]);
gf_proc_dump_write("least_priority_threads", "%d",
- conf->ac_iot_limit[IOT_PRI_LEAST]);
+ conf->ac_iot_limit[GF_FOP_PRI_LEAST]);
return 0;
}
@@ -955,17 +988,19 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
GF_OPTION_RECONF ("high-prio-threads",
- conf->ac_iot_limit[IOT_PRI_HI], options, int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_HI], options, int32,
+ out);
GF_OPTION_RECONF ("normal-prio-threads",
- conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32,
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL], options, int32,
out);
GF_OPTION_RECONF ("low-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LO], options, int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_LO], options, int32,
+ out);
GF_OPTION_RECONF ("least-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
+ conf->ac_iot_limit[GF_FOP_PRI_LEAST], options, int32,
out);
GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
options, bool, out);
@@ -1029,16 +1064,16 @@ init (xlator_t *this)
GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
GF_OPTION_INIT ("high-prio-threads",
- conf->ac_iot_limit[IOT_PRI_HI], int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_HI], int32, out);
GF_OPTION_INIT ("normal-prio-threads",
- conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_NORMAL], int32, out);
GF_OPTION_INIT ("low-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LO], int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_LO], int32, out);
GF_OPTION_INIT ("least-prio-threads",
- conf->ac_iot_limit[IOT_PRI_LEAST], int32, out);
+ conf->ac_iot_limit[GF_FOP_PRI_LEAST], int32, out);
GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out);
GF_OPTION_INIT ("enable-least-priority", conf->least_priority,
@@ -1046,7 +1081,7 @@ init (xlator_t *this)
conf->this = this;
- for (i = 0; i < IOT_PRI_MAX; i++) {
+ for (i = 0; i < GF_FOP_PRI_MAX; i++) {
INIT_LIST_HEAD (&conf->clients[i]);
INIT_LIST_HEAD (&conf->no_client[i].clients);
INIT_LIST_HEAD (&conf->no_client[i].reqs);
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 9648f74f39b..bd1c3f523c4 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -36,18 +36,8 @@ struct iot_conf;
#define IOT_DEFAULT_THREADS 16
#define IOT_MAX_THREADS 64
-
#define IOT_THREAD_STACK_SIZE ((size_t)(256*1024))
-
-typedef enum {
- IOT_PRI_HI = 0, /* low latency */
- IOT_PRI_NORMAL, /* normal */
- IOT_PRI_LO, /* bulk */
- IOT_PRI_LEAST, /* least */
- IOT_PRI_MAX,
-} iot_pri_t;
-
typedef struct {
struct list_head clients;
struct list_head reqs;
@@ -63,18 +53,18 @@ struct iot_conf {
int32_t idle_time; /* in seconds */
- struct list_head clients[IOT_PRI_MAX];
+ struct list_head clients[GF_FOP_PRI_MAX];
/*
* It turns out that there are several ways a frame can get to us
* without having an associated client (server_first_lookup was the
* first one I hit). Instead of trying to update all such callers,
* we use this to queue them.
*/
- iot_client_ctx_t no_client[IOT_PRI_MAX];
+ iot_client_ctx_t no_client[GF_FOP_PRI_MAX];
- int32_t ac_iot_limit[IOT_PRI_MAX];
- int32_t ac_iot_count[IOT_PRI_MAX];
- int queue_sizes[IOT_PRI_MAX];
+ int32_t ac_iot_limit[GF_FOP_PRI_MAX];
+ int32_t ac_iot_count[GF_FOP_PRI_MAX];
+ int queue_sizes[GF_FOP_PRI_MAX];
int queue_size;
pthread_attr_t w_attr;
gf_boolean_t least_priority; /*Enable/Disable least-priority */