performance/io-threads: expose io-thread queue depths

The following release-3.8-fb branch patch is upstreamed: > io-stats: Expose io-thread queue depths > Commit ID: 69509ee7d2 > https://review.gluster.org/#/c/18143/ > By Shreyas Siravara <sshreyas@fb.com> Changes in this patch: - Replace iot_pri_t with gf_fop_pri_t - Replace IOT_PRI_{HI, LO, NORMAL, MAX, LEAST} with GF_FOP_PRI_{HI, LO, NORMAL, MAX, LEAST} - Use dict_unref() instead of dict_destroy() This patch is required to forward port io-threads namespace patch. Updates: #401 Change-Id: I1b47a63185a441a30fbc423ca1015df7b36c2518 Signed-off-by: Varsha Rao <varao@redhat.com>
author: Varsha Rao <varao@redhat.com> 2018-02-06 18:56:45 +0530
committer: Jeff Darcy <jeff@pl.atyp.us> 2018-02-08 17:01:12 +0000
commit: aa4372bf427152f671de52fc6e02b93ca09f22c7 (patch)
tree: 8c07cb7c3a9ad8122be3b7db403726dd7749c2ed
parent: 5e751b4c05375aa8b0f217ca73629d7d43faccf6 (diff)
9 files changed, 158 insertions, 48 deletions
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index ab8a8a56352..1ce055a5940 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -62,6 +62,8 @@ typedef struct _data_pair data_pair_t;
 
 #define DICT_KEY_VALUE_MAX_SIZE                     1048576
 
+#define dict_for_each(d, c) for (c = d->members_list; c; c = c->next)
+
 struct _data {
         unsigned char  is_static:1;
         unsigned char  is_const:1;
@@ -160,8 +162,8 @@ data_t *data_from_uint16 (uint16_t value);
 char *data_to_str (data_t *data);
 void *data_to_bin (data_t *data);
 void *data_to_ptr (data_t *data);
-
 data_t * data_copy (data_t *old);
+
 int dict_foreach (dict_t *this,
                   int (*fn)(dict_t *this,
                             char *key,
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 438ce059fad..d702a5fa9ba 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -85,6 +85,7 @@
 #define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
 #define ZR_DUMP_FUSE            "dump-fuse"
 #define ZR_FUSE_MOUNTOPTS       "fuse-mountopts"
+#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size"
 
 #define GF_XATTR_CLRLK_CMD      "glusterfs.clrlk"
 #define GF_XATTR_PATHINFO_KEY   "trusted.glusterfs.pathinfo"
@@ -308,6 +309,42 @@
 #define GF_LK_MANDATORY 1 /* maps to GLFS_LK_MANDATORY from libgfapi*/
 #define GF_LOCK_MODE "glusterfs.lk.lkmode"
 
+#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl)   \
+        do {                                                    \
+                if (key && strcmp (key, cmpkey) == 0) {         \
+                        errval = -EINVAL;                       \
+                        goto lbl;                               \
+                }                                               \
+        } while (0)                                             \
+
+
+typedef enum {
+        GF_FOP_PRI_UNSPEC = -1,         /* Priority not specified */
+        GF_FOP_PRI_HI = 0,              /* low latency */
+        GF_FOP_PRI_NORMAL,              /* normal */
+        GF_FOP_PRI_LO,                  /* bulk */
+        GF_FOP_PRI_LEAST,               /* least */
+        GF_FOP_PRI_MAX,                 /* Highest */
+} gf_fop_pri_t;
+
+static const char * const FOP_PRI_STRINGS[] = {
+        "HIGH",
+        "NORMAL",
+        "LOW",
+        "LEAST"
+};
+
+static inline const char *fop_pri_to_string (gf_fop_pri_t pri)
+{
+        if (pri < 0)
+                return "UNSPEC";
+
+        if (pri >= GF_FOP_PRI_MAX)
+                return "INVALID";
+
+        return FOP_PRI_STRINGS[pri];
+}
+
 const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
 
 #define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
index 5f35db4e0bd..3eed80e5579 100644
--- a/tests/basic/stats-dump.t
+++ b/tests/basic/stats-dump.t
@@ -12,6 +12,7 @@ TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
 TEST $CLI volume set $V0 diagnostics.latency-measurement on
 TEST $CLI volume set $V0 diagnostics.count-fop-hits on
 TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1
+TEST $CLI volume set $V0 performance.nfs.io-threads on
 TEST $CLI volume set $V0 nfs.disable off
 TEST $CLI volume start $V0
 EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
@@ -35,4 +36,10 @@ TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_bac
 TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump|tail -1|cut -d: -f2) != "0," ]
 TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump|tail -1|cut -d: -f2) != "0," ]
 
+# Test that io-stats is getting queue sizes from io-threads
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump
+
 cleanup;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 75b2bf8e22c..cdd22475cbe 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1648,6 +1648,15 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
                 return 0;
 
         /*
+         * Heal daemons don't have IO threads ... and as a result they
+         * send this getxattr down and eventually crash :(
+         */
+        if (strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+                ret = -EINVAL;
+                goto out;
+        }
+
+        /*
          * Special xattrs which need responses from all subvols
          */
         if (afr_is_special_xattr (name, &cbk, 0)) {
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 5717650dd12..445bd590c4d 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -4562,7 +4562,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
 
         xlator_t     *subvol        = NULL;
         xlator_t     *hashed_subvol = NULL;
-        xlator_t     *mds_subvol = NULL;
+        xlator_t     *mds_subvol    = NULL;
         xlator_t     *cached_subvol = NULL;
         dht_conf_t   *conf          = NULL;
         dht_local_t  *local         = NULL;
@@ -4572,6 +4572,9 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
         int           cnt           = 0;
         char         *node_uuid_key = NULL;
         int           ret           = -1;
+
+        GF_CHECK_XATTR_KEY_AND_GOTO (key, IO_THREADS_QUEUE_SIZE_KEY,
+                                     op_errno, err);
         VALIDATE_OR_GOTO (frame, err);
         VALIDATE_OR_GOTO (this, err);
         VALIDATE_OR_GOTO (loc, err);
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 4ea45b058ae..985c5fbc389 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -36,6 +36,7 @@
 #include "logging.h"
 #include "cli1-xdr.h"
 #include "statedump.h"
+#include "syncop.h"
 #include <pwd.h>
 #include <grp.h>
 #include "upcall-utils.h"
@@ -798,6 +799,8 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
         double                weighted_fop_ave_usec = 0.0;
         double                weighted_fop_ave_usec_sum = 0.0;
         long                  total_fop_hits = 0;
+        loc_t                 unused_loc = {0, };
+        dict_t                *xattr = NULL;
 
         interval_sec = ((now->tv_sec * 1000000.0 + now->tv_usec) -
                 (stats->started_at.tv_sec * 1000000.0 +
@@ -950,6 +953,30 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                 }
         }
 
+        ret = syncop_getxattr (this, &unused_loc, &xattr,
+                               IO_THREADS_QUEUE_SIZE_KEY, NULL, NULL);
+        if (xattr) {
+                /*
+                 * Iterate over the dictionary returned to us by io-threads and
+                 * dump the results to the stats file.
+                 */
+                data_pair_t *curr = NULL;
+
+                dict_for_each (xattr, curr) {
+                        ios_log (this, logfp,
+                                 "\"%s.%s.%s.queue_size\": \"%d\",",
+                                 key_prefix, str_prefix, curr->key,
+                                 data_to_int32 (curr->value));
+                }
+
+                /* Free the dictionary */
+                dict_unref (xattr);
+        } else {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "Unable to get queue size counts from "
+                        "the io-threads translator!");
+        }
+
         if (interval == -1) {
                 ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",",
                          key_prefix, str_prefix,
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 31564751c9a..0c984c97192 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -4983,7 +4983,7 @@ static gf_boolean_t
 volgen_is_shd_compatible_xl (char *xl_type)
 {
         char            *shd_xls[] = {"cluster/replicate", "cluster/disperse",
-                                      NULL};
+                                      "debug/io-stats", NULL};
         if (gf_get_index_by_elem (shd_xls, xl_type) != -1)
                 return _gf_true;
 
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 80d0168bf4c..a5a9543ae39 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -56,10 +56,10 @@ iot_get_ctx (xlator_t *this, client_t *client)
         int                      i;
 
         if (client_ctx_get (client, this, (void **)&ctx) != 0) {
-                ctx = GF_CALLOC (IOT_PRI_MAX, sizeof(*ctx),
+                ctx = GF_CALLOC (GF_FOP_PRI_MAX, sizeof(*ctx),
                                  gf_iot_mt_client_ctx_t);
                 if (ctx) {
-                        for (i = 0; i < IOT_PRI_MAX; ++i) {
+                        for (i = 0; i < GF_FOP_PRI_MAX; ++i) {
                                 INIT_LIST_HEAD (&ctx[i].clients);
                                 INIT_LIST_HEAD (&ctx[i].reqs);
                         }
@@ -82,7 +82,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri)
         iot_client_ctx_t        *ctx;
 
         *pri = -1;
-        for (i = 0; i < IOT_PRI_MAX; i++) {
+        for (i = 0; i < GF_FOP_PRI_MAX; i++) {
 
                 if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]) {
                         continue;
@@ -133,8 +133,8 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
         client_t                *client = stub->frame->root->client;
         iot_client_ctx_t        *ctx;
 
-        if (pri < 0 || pri >= IOT_PRI_MAX)
-                pri = IOT_PRI_MAX-1;
+        if (pri < 0 || pri >= GF_FOP_PRI_MAX)
+                pri = GF_FOP_PRI_MAX-1;
 
         if (client) {
                 ctx = iot_get_ctx (THIS, client);
@@ -252,25 +252,28 @@ do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri)
 }
 
 char*
-iot_get_pri_meaning (iot_pri_t pri)
+iot_get_pri_meaning (gf_fop_pri_t pri)
 {
         char    *name = NULL;
         switch (pri) {
-        case IOT_PRI_HI:
+        case GF_FOP_PRI_HI:
                 name = "fast";
                 break;
-        case IOT_PRI_NORMAL:
+        case GF_FOP_PRI_NORMAL:
                 name = "normal";
                 break;
-        case IOT_PRI_LO:
+        case GF_FOP_PRI_LO:
                 name = "slow";
                 break;
-        case IOT_PRI_LEAST:
+        case GF_FOP_PRI_LEAST:
                 name = "least priority";
                 break;
-        case IOT_PRI_MAX:
+        case GF_FOP_PRI_MAX:
                 name = "invalid";
                 break;
+        case GF_FOP_PRI_UNSPEC:
+                name = "unspecified";
+                break;
         }
         return name;
 }
@@ -279,11 +282,11 @@ int
 iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
 {
         int             ret = -1;
-        iot_pri_t       pri = IOT_PRI_MAX - 1;
+        gf_fop_pri_t       pri = GF_FOP_PRI_MAX - 1;
         iot_conf_t      *conf = this->private;
 
         if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) {
-                pri = IOT_PRI_LEAST;
+                pri = GF_FOP_PRI_LEAST;
                 goto out;
         }
 
@@ -302,7 +305,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
         case GF_FOP_SETACTIVELK:
         case GF_FOP_ICREATE:
         case GF_FOP_NAMELINK:
-                pri = IOT_PRI_HI;
+                pri = GF_FOP_PRI_HI;
                 break;
 
         case GF_FOP_CREATE:
@@ -328,7 +331,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
         case GF_FOP_FSETXATTR:
         case GF_FOP_REMOVEXATTR:
         case GF_FOP_FREMOVEXATTR:
-                pri = IOT_PRI_NORMAL;
+                pri = GF_FOP_PRI_NORMAL;
                 break;
 
         case GF_FOP_READ:
@@ -344,7 +347,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
         case GF_FOP_DISCARD:
         case GF_FOP_ZEROFILL:
         case GF_FOP_SEEK:
-                pri = IOT_PRI_LO;
+                pri = GF_FOP_PRI_LO;
                 break;
 
         case GF_FOP_FORGET:
@@ -606,6 +609,36 @@ int
 iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
               const char *name, dict_t *xdata)
 {
+        iot_conf_t *conf = NULL;
+        dict_t     *depths = NULL;
+        int i = 0;
+
+        conf = this->private;
+
+        if (conf && name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+                /*
+                 * We explicitly do not want a reference count
+                 * for this dict in this translator
+                 */
+                depths = get_new_dict ();
+                if (!depths)
+                        goto unwind_special_getxattr;
+
+                for (i = 0; i < GF_FOP_PRI_MAX; i++) {
+                        if (dict_set_int32 (depths,
+                                            (char *)fop_pri_to_string (i),
+                                            conf->queue_sizes[i]) != 0) {
+                                dict_unref (depths);
+                                depths = NULL;
+                                goto unwind_special_getxattr;
+                        }
+                }
+
+unwind_special_getxattr:
+                STACK_UNWIND_STRICT (getxattr, frame, 0, 0, depths, xdata);
+                return 0;
+        }
+
         IOT_FOP (getxattr, frame, this, loc, name, xdata);
         return 0;
 }
@@ -793,7 +826,7 @@ __iot_workers_scale (iot_conf_t *conf)
         int       i = 0;
         char      thread_name[GF_THREAD_NAMEMAX] = {0,};
 
-        for (i = 0; i < IOT_PRI_MAX; i++)
+        for (i = 0; i < GF_FOP_PRI_MAX; i++)
                 scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]);
 
         if (scale < IOT_MIN_THREADS)
@@ -931,13 +964,13 @@ iot_priv_dump (xlator_t *this)
         gf_proc_dump_write("idle_time", "%d", conf->idle_time);
         gf_proc_dump_write("stack_size", "%zd", conf->stack_size);
         gf_proc_dump_write("high_priority_threads", "%d",
-                           conf->ac_iot_limit[IOT_PRI_HI]);
+                           conf->ac_iot_limit[GF_FOP_PRI_HI]);
         gf_proc_dump_write("normal_priority_threads", "%d",
-                           conf->ac_iot_limit[IOT_PRI_NORMAL]);
+                           conf->ac_iot_limit[GF_FOP_PRI_NORMAL]);
         gf_proc_dump_write("low_priority_threads", "%d",
-                           conf->ac_iot_limit[IOT_PRI_LO]);
+                           conf->ac_iot_limit[GF_FOP_PRI_LO]);
         gf_proc_dump_write("least_priority_threads", "%d",
-                           conf->ac_iot_limit[IOT_PRI_LEAST]);
+                           conf->ac_iot_limit[GF_FOP_PRI_LEAST]);
 
         return 0;
 }
@@ -955,17 +988,19 @@ reconfigure (xlator_t *this, dict_t *options)
         GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
 
         GF_OPTION_RECONF ("high-prio-threads",
-                          conf->ac_iot_limit[IOT_PRI_HI], options, int32, out);
+                          conf->ac_iot_limit[GF_FOP_PRI_HI], options, int32,
+                          out);
 
         GF_OPTION_RECONF ("normal-prio-threads",
-                          conf->ac_iot_limit[IOT_PRI_NORMAL], options, int32,
+                          conf->ac_iot_limit[GF_FOP_PRI_NORMAL], options, int32,
                           out);
 
         GF_OPTION_RECONF ("low-prio-threads",
-                          conf->ac_iot_limit[IOT_PRI_LO], options, int32, out);
+                          conf->ac_iot_limit[GF_FOP_PRI_LO], options, int32,
+                          out);
 
         GF_OPTION_RECONF ("least-prio-threads",
-                          conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
+                          conf->ac_iot_limit[GF_FOP_PRI_LEAST], options, int32,
                           out);
         GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
                           options, bool, out);
@@ -1029,16 +1064,16 @@ init (xlator_t *this)
         GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
 
         GF_OPTION_INIT ("high-prio-threads",
-                        conf->ac_iot_limit[IOT_PRI_HI], int32, out);
+                        conf->ac_iot_limit[GF_FOP_PRI_HI], int32, out);
 
         GF_OPTION_INIT ("normal-prio-threads",
-                        conf->ac_iot_limit[IOT_PRI_NORMAL], int32, out);
+                        conf->ac_iot_limit[GF_FOP_PRI_NORMAL], int32, out);
 
         GF_OPTION_INIT ("low-prio-threads",
-                        conf->ac_iot_limit[IOT_PRI_LO], int32, out);
+                        conf->ac_iot_limit[GF_FOP_PRI_LO], int32, out);
 
         GF_OPTION_INIT ("least-prio-threads",
-                        conf->ac_iot_limit[IOT_PRI_LEAST], int32, out);
+                        conf->ac_iot_limit[GF_FOP_PRI_LEAST], int32, out);
 
         GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out);
         GF_OPTION_INIT ("enable-least-priority", conf->least_priority,
@@ -1046,7 +1081,7 @@ init (xlator_t *this)
 
         conf->this = this;
 
-        for (i = 0; i < IOT_PRI_MAX; i++) {
+        for (i = 0; i < GF_FOP_PRI_MAX; i++) {
                 INIT_LIST_HEAD (&conf->clients[i]);
                 INIT_LIST_HEAD (&conf->no_client[i].clients);
                 INIT_LIST_HEAD (&conf->no_client[i].reqs);
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 9648f74f39b..bd1c3f523c4 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -36,18 +36,8 @@ struct iot_conf;
 #define IOT_DEFAULT_THREADS     16
 #define IOT_MAX_THREADS         64
 
-
 #define IOT_THREAD_STACK_SIZE   ((size_t)(256*1024))
 
-
-typedef enum {
-        IOT_PRI_HI = 0, /* low latency */
-        IOT_PRI_NORMAL, /* normal */
-        IOT_PRI_LO,     /* bulk */
-        IOT_PRI_LEAST,  /* least */
-        IOT_PRI_MAX,
-} iot_pri_t;
-
 typedef struct {
         struct list_head        clients;
         struct list_head        reqs;
@@ -63,18 +53,18 @@ struct iot_conf {
 
         int32_t              idle_time;   /* in seconds */
 
-        struct list_head     clients[IOT_PRI_MAX];
+        struct list_head     clients[GF_FOP_PRI_MAX];
         /*
          * It turns out that there are several ways a frame can get to us
          * without having an associated client (server_first_lookup was the
          * first one I hit).  Instead of trying to update all such callers,
          * we use this to queue them.
          */
-        iot_client_ctx_t     no_client[IOT_PRI_MAX];
+        iot_client_ctx_t     no_client[GF_FOP_PRI_MAX];
 
-        int32_t              ac_iot_limit[IOT_PRI_MAX];
-        int32_t              ac_iot_count[IOT_PRI_MAX];
-        int                  queue_sizes[IOT_PRI_MAX];
+        int32_t              ac_iot_limit[GF_FOP_PRI_MAX];
+        int32_t              ac_iot_count[GF_FOP_PRI_MAX];
+        int                  queue_sizes[GF_FOP_PRI_MAX];
         int                  queue_size;
         pthread_attr_t       w_attr;
         gf_boolean_t         least_priority; /*Enable/Disable least-priority */
author	Varsha Rao <varao@redhat.com>	2018-02-06 18:56:45 +0530
committer	Jeff Darcy <jeff@pl.atyp.us>	2018-02-08 17:01:12 +0000
commit	aa4372bf427152f671de52fc6e02b93ca09f22c7 (patch)
tree	8c07cb7c3a9ad8122be3b7db403726dd7749c2ed
parent	5e751b4c05375aa8b0f217ca73629d7d43faccf6 (diff)