Replace namespace/io-stats/io-threads with 3.6-fb versions

This rolls up multiple patches related to namespace identificaton and throttling/QoS. This primarily includes the following, all by Michael Goulet <mgoulet@fb.com>. io-threads: Add weighted round robin queueing by namespace https://phabricator.facebook.com/D5615269 io-threads: Add per-namespaces queue sizes to IO_THREADS_QUEUE_SIZE_KEY https://phabricator.facebook.com/D5683162 io-threads: Implement better slot allocation algorithm https://phabricator.facebook.com/D5683186 io-threads: Only enable weighted queueing on bricks https://phabricator.facebook.com/D5700062 io-threads: Update queue sizes on drain https://phabricator.facebook.com/D5704832 Fix parsing (-1) as default NS weight https://phabricator.facebook.com/D5723383 Parts of the following patches have also been applied to satisfy dependencies. io-throttling: Calculate moving averages and throttle offending hosts https://phabricator.fb.com/D2516161 Shreyas Siravara <sshreyas@fb.com> Hook up ODS logging for FUSE clients. https://phabricator.facebook.com/D3963376 Kevin Vigor <kvigor@fb.com> Add the flag --skip-nfsd-start to skip the NFS daemon stating, even if it is enabled https://phabricator.facebook.com/D4575368 Alex Lorca <alexlorca@fb.com> There are also some "standard" changes: dealing with code that moved, reindenting to comply with Gluster coding standards, gf_uuid_xxx, etc. This patch *does* revert some changes which have occurred upstream since 3.6; these will be re-applied as apppropriate on top of this new base. Change-Id: I69024115da7a60811e5b86beae781d602bdb558d Signed-off-by: Jeff Darcy <jdarcy@fb.com>
author: Jeff Darcy <jdarcy@fb.com> 2017-09-15 06:59:01 -0700
committer: Jeff Darcy <jdarcy@fb.com> 2017-09-15 13:47:01 -0700
commit: 8dfdecf220d1c9365e1f8d6af9ead5e48c61e2eb (patch)
tree: bccd5906be43cf81792248b06099006525ed0c27
parent: e4b47b5d54644c398c424a99116a0cc37e4431d6 (diff)
21 files changed, 3057 insertions, 1362 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 5022cfc22da..a0fec8f49a1 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -129,6 +129,10 @@ static struct argp_option gf_options[] = {
          "buffer size, [default: 5]"},
         {"log-flush-timeout", ARGP_LOG_FLUSH_TIMEOUT, "LOG-FLUSH-TIMEOUT", 0,
          "Set log flush timeout, [default: 2 minutes]"},
+        {"stats-instance-name", ARGP_STATS_INSTANCE_NAME,
+         "STATS-INSTANCE-NAME", 0,
+         "Specify instance name for io-stats translator"},
+
 
         {0, 0, 0, 0, "Advanced Options:"},
         {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
@@ -1246,6 +1250,10 @@ no_oom_api:
 
                 break;
 
+        case ARGP_STATS_INSTANCE_NAME:
+                cmd_args->stats_instance_name = gf_strdup (arg);
+                break;
+
         case ARGP_LOG_FLUSH_TIMEOUT:
                 if (gf_string2uint32 (arg, &cmd_args->log_flush_timeout)) {
                         argp_failure (state, -1, 0,
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index b5c6b27b534..940b351e552 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -96,6 +96,7 @@ enum argp_option_keys {
 #ifdef GF_LINUX_HOST_OS
         ARGP_OOM_SCORE_ADJ_KEY            = 176,
 #endif
+        ARGP_STATS_INSTANCE_NAME          = 177,
 };
 
 struct _gfd_vol_top_priv_t {
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 84a0785d660..62c41a767d0 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -4349,9 +4349,11 @@ list_node_del (struct list_node *node)
         GF_FREE (node);
 }
 
-const char *
-fop_enum_to_pri_string (glusterfs_fop_t fop)
+iot_pri_t
+iot_fop_to_pri (glusterfs_fop_t fop)
 {
+        iot_pri_t       pri = IOT_PRI_MAX - 1;
+
         switch (fop) {
         case GF_FOP_OPEN:
         case GF_FOP_STAT:
@@ -4365,7 +4367,8 @@ fop_enum_to_pri_string (glusterfs_fop_t fop)
         case GF_FOP_READDIRP:
         case GF_FOP_GETACTIVELK:
         case GF_FOP_SETACTIVELK:
-                return "HIGH";
+                pri = IOT_PRI_HI;
+                break;
 
         case GF_FOP_CREATE:
         case GF_FOP_FLUSH:
@@ -4391,7 +4394,8 @@ fop_enum_to_pri_string (glusterfs_fop_t fop)
         case GF_FOP_FREMOVEXATTR:
         case GF_FOP_IPC:
         case GF_FOP_LEASE:
-                return "NORMAL";
+                pri = IOT_PRI_NORMAL;
+                break;
 
         case GF_FOP_READ:
         case GF_FOP_WRITE:
@@ -4402,22 +4406,26 @@ fop_enum_to_pri_string (glusterfs_fop_t fop)
         case GF_FOP_XATTROP:
         case GF_FOP_FXATTROP:
         case GF_FOP_RCHECKSUM:
+	case GF_FOP_FALLOCATE:
+	case GF_FOP_DISCARD:
         case GF_FOP_ZEROFILL:
-        case GF_FOP_FALLOCATE:
         case GF_FOP_SEEK:
-                return "LOW";
+                pri = IOT_PRI_LO;
+                break;
 
         case GF_FOP_NULL:
         case GF_FOP_FORGET:
         case GF_FOP_RELEASE:
         case GF_FOP_RELEASEDIR:
         case GF_FOP_GETSPEC:
+        case GF_FOP_COMPOUND:
         case GF_FOP_MAXVALUE:
-        case GF_FOP_DISCARD:
-                return "LEAST";
-        default:
-                return "UNKNOWN";
+                //fail compilation on missing fop
+                //new fop must choose priority.
+                break;
         }
+
+        return pri;
 }
 
 const char *
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 6a61e641e19..82b9236e661 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -1258,6 +1258,38 @@ dict_foreach (dict_t *dict,
         return ret;
 }
 
+int
+dict_foreach_with_idx (dict_t *dict,
+              int (*fn)(dict_t *this,
+                        char *key,
+                        data_t *value,
+                        void *data, uint64_t idx),
+              void *data)
+{
+        if (!dict) {
+                gf_log_callingfn ("dict", GF_LOG_WARNING,
+                                  "dict is NULL");
+                return -1;
+        }
+
+        uint64_t     idx   = 0;
+        int          ret   = -1;
+        data_pair_t *pairs = NULL;
+        data_pair_t *next  = NULL;
+
+        pairs = dict->members_list;
+        while (pairs) {
+                next = pairs->next;
+                ret = fn (dict, pairs->key, pairs->value, data, idx);
+                if (ret < 0)
+                        return ret;
+                pairs = next;
+                idx++;
+        }
+
+        return 0;
+}
+
 /* return values:
    -1 = failure,
     0 = no matches found,
@@ -2979,6 +3011,16 @@ dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format)
         return 0;
 }
 
+/* This function converts a uint32 to a (string) key for use in a dictionary.
+ * Ensure that the key string buffer is at least DICT_UINT32_KEY_SIZE in
+ * length, since that's the maximum length of a uint32's string representation
+ * plus a NULL delimiter char. */
+void
+dict_uint32_to_key (uint32_t num, char *key_buf)
+{
+        snprintf (key_buf, DICT_UINT32_KEY_SIZE, "%u", num);
+}
+
 void
 dict_dump_to_log (dict_t *dict)
 {
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index 5259c6befa1..da95bc86bec 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -22,7 +22,6 @@ typedef struct _data data_t;
 typedef struct _dict dict_t;
 typedef struct _data_pair data_pair_t;
 
-
 #define GF_PROTOCOL_DICT_SERIALIZE(this,from_dict,to,len,ope,labl) do { \
                 int    ret     = 0;                                     \
                                                                         \
@@ -161,6 +160,8 @@ dict_t *get_new_dict ();
 
 #define dict_for_each(d, c) for (c = d->members_list; c; c = c->next)
 
+#define dict_foreach_inline(d, c) for (c = d->members_list; c; c = c->next)
+
 int dict_foreach (dict_t *this,
                   int (*fn)(dict_t *this,
                             char *key,
@@ -168,6 +169,13 @@ int dict_foreach (dict_t *this,
                             void *data),
                   void *data);
 
+int dict_foreach_with_idx (dict_t *this,
+                  int (*fn)(dict_t *this,
+                            char *key,
+                            data_t *value,
+                            void *data, uint64_t idx),
+                  void *data);
+
 int dict_foreach_fnmatch (dict_t *dict, char *pattern,
                           int (*fn)(dict_t *this,
                                     char *key,
@@ -246,6 +254,12 @@ GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
 GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
 GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
                                                     char delimiter);
+/* Log_10(2^32) + 1. This is the length of the longest string representation of
+ * a 32-bit integer, plus space for '\0'. */
+#define DICT_UINT32_KEY_SIZE 11
+
+void dict_uint32_to_key (uint32_t num, char *key_buf);
+
 void
 dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain);
 
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 0b033d8bfcf..f618c7aba19 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -331,7 +331,14 @@ static inline const char *fop_pri_to_string (gf_fop_pri_t pri)
         return FOP_PRI_STRINGS[pri];
 }
 
-const char *fop_enum_to_pri_string (glusterfs_fop_t fop);
+iot_pri_t iot_fop_to_pri (glusterfs_fop_t fop);
+
+static inline const char *fop_enum_to_pri_string (glusterfs_fop_t fop)
+{
+        iot_pri_t pri = iot_fop_to_pri (fop);
+        return fop_pri_to_string (pri);
+}
+
 const char *fop_enum_to_string (glusterfs_fop_t fop);
 
 #define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
@@ -444,6 +451,8 @@ struct _cmd_args {
 #ifdef GF_LINUX_HOST_OS
         char            *oom_score_adj;
 #endif
+
+        char           *stats_instance_name;
 };
 typedef struct _cmd_args cmd_args_t;
 
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index 6977814ec69..adcedd4cc96 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -36,6 +36,7 @@ create_frame (xlator_t *xl, call_pool_t *pool)
 
         frame->root = stack;
         frame->this = xl;
+        frame->pri = GF_FOP_PRI_UNSPEC;
         LOCK_INIT (&frame->lock);
         INIT_LIST_HEAD (&frame->frames);
         list_add (&frame->frames, &stack->myframes);
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
index 9e5355a6044..094dc62312c 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/stack.h
@@ -77,6 +77,8 @@ struct _call_frame_t {
         const char      *wind_to;
         const char      *unwind_from;
         const char      *unwind_to;
+
+        gf_fop_pri_t    pri;
 };
 
 struct _ns_info {
@@ -122,6 +124,16 @@ struct _call_stack_t {
         ns_info_t                     ns_info;
 };
 
+#define frame_set_throttling(frm, should_throttle)              \
+        do {                                                    \
+                if (frm) {                                      \
+                        if (should_throttle) {                  \
+                                frm->pri = IOT_PRI_LEAST;       \
+                        } else {                                \
+                                frm->pri = IOT_PRI_UNSPEC;      \
+                        }                                       \
+                }                                               \
+        } while (0)
 
 #define frame_set_uid_gid(frm, u, g)            \
         do {                                    \
@@ -259,6 +271,7 @@ STACK_RESET (call_stack_t *stack)
                 _new->wind_from = __FUNCTION__;                         \
                 _new->wind_to = #fn;                                    \
                 _new->unwind_to = #rfn;                                 \
+                _new->pri = frame->pri;                                 \
                                                                         \
                 LOCK_INIT (&_new->lock);                                \
                 LOCK(&frame->root->stack_lock);                         \
@@ -321,6 +334,8 @@ STACK_RESET (call_stack_t *stack)
                 _new->wind_from = __FUNCTION__;                         \
                 _new->wind_to = #fn;                                    \
                 _new->unwind_to = #rfn;                                 \
+                _new->pri = frame->pri;                                 \
+                                                                        \
                 LOCK_INIT (&_new->lock);                                \
                 LOCK(&frame->root->stack_lock);                         \
                 {                                                       \
diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
index 155ffa09ef0..0c4f470079a 100644
--- a/tests/basic/afr/durability-off.t
+++ b/tests/basic/afr/durability-off.t
@@ -5,6 +5,14 @@
 . $(dirname $0)/../../include.rc
 . $(dirname $0)/../../volume.rc
 
+did_fsync () {
+        local count=$($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+        if [ "$count" != "0" ]; then
+                echo "Y"
+        else
+                echo "N"
+        fi
+}
 cleanup;
 
 TEST glusterd
@@ -24,7 +32,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
 TEST $CLI volume heal $V0
 EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
-EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+EXPECT "N" did_fsync
 
 #Test that fsyncs happen when durability is on
 TEST $CLI volume set $V0 cluster.ensure-durability on
@@ -39,6 +47,6 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
 TEST $CLI volume heal $V0
 EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
-EXPECT "^2$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+EXPECT "Y" did_fsync
 
 cleanup;
diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t
index e429fd8cb07..a1b3edc3d5d 100644
--- a/tests/basic/fop-sampling.t
+++ b/tests/basic/fop-sampling.t
@@ -13,7 +13,7 @@ function check_path {
         op=$1
         path=$2
         file=$3
-        grep $op $file | awk -F, '{print $11}' | grep $path 2>&1 > /dev/null
+        grep $op $file | awk -F, '{print $12}' | grep $path 2>&1 > /dev/null
         if [ $? -eq 0 ]; then
           echo "Y"
         else
@@ -106,6 +106,8 @@ for dir in "$N0" "$M0"; do
   rm $dir/file2
 done;
 
+read -p "Continue? " nothing
+
 EXPECT_WITHIN 10 "Y" check_path CREATE /file1 $BRICK_SAMPLES
 EXPECT_WITHIN 10 "Y" check_path LOOKUP /file1 $BRICK_SAMPLES
 EXPECT_WITHIN 10 "Y" check_path SETATTR /file1 $BRICK_SAMPLES
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
index af1ad34702b..a188a45eea1 100644
--- a/tests/basic/stats-dump.t
+++ b/tests/basic/stats-dump.t
@@ -39,7 +39,9 @@ FUSE_RET="$?"
 
 # Test that io-stats is getting queue sizes from io-threads
 TEST grep 'queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd_$V0.dump
-TEST ! grep 'queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy?.dump
+
+# We should be getting queue sizes on bricks now too.
+TEST grep 'queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy?.dump
 
 TEST [ 0 -ne "$BRICK_RET" ]
 TEST [ 0 -ne "$NFSD_RET" ]
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index c5df598549a..5d39afc2a14 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,17 +2,17 @@
 xlator_LTLIBRARIES = io-stats.la
 xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
 
-io_stats_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+io_stats_la_LDFLAGS = -module -avoid-version
 
 io_stats_la_SOURCES = io-stats.c
 io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
 
-noinst_HEADERS = io-stats-mem-types.h
+noinst_HEADERS = io-stats-mem-types.h io-stats.h
 
 AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
 	-I$(top_srcdir)/rpc/xdr/src \
 	-I$(top_srcdir)/rpc/rpc-lib/src \
-        -DDATADIR=\"$(localstatedir)\"
+	-DGLUSTERD_WORKDIR=$(GLUSTERD_WORKDIR)
 
 AM_CFLAGS = -Wall $(GF_CFLAGS)
 
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index 9dde9373264..af1e59abc2c 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -13,8 +13,6 @@
 
 #include "mem-types.h"
 
-extern const char *__progname;
-
 enum gf_io_stats_mem_types_ {
         gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
         gf_io_stats_mt_ios_fd,
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index e3511233203..67edd36e611 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -7,8 +7,11 @@
    later), or the GNU General Public License, version 2 (GPLv2), in all
    cases as published by the Free Software Foundation.
 */
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
 #include "xlator.h"
-#include "syscall.h"
+#endif
 
 /**
  * xlators/debug/io_stats :
@@ -35,365 +38,54 @@
 #include "logging.h"
 #include "cli1-xdr.h"
 #include "statedump.h"
+#include "io-stats.h"
 #include "syncop.h"
+#include "hashfn.h"
 #include <pwd.h>
 #include <grp.h>
 
-#define MAX_LIST_MEMBERS 100
-#define DEFAULT_PWD_BUF_SZ 16384
-#define DEFAULT_GRP_BUF_SZ 16384
-#define IOS_MAX_ERRORS 132
-
-typedef enum {
-        IOS_STATS_TYPE_NONE,
-        IOS_STATS_TYPE_OPEN,
-        IOS_STATS_TYPE_READ,
-        IOS_STATS_TYPE_WRITE,
-        IOS_STATS_TYPE_OPENDIR,
-        IOS_STATS_TYPE_READDIRP,
-        IOS_STATS_TYPE_READ_THROUGHPUT,
-        IOS_STATS_TYPE_WRITE_THROUGHPUT,
-        IOS_STATS_TYPE_MAX
-}ios_stats_type_t;
-
-typedef enum {
-        IOS_STATS_THRU_READ,
-        IOS_STATS_THRU_WRITE,
-        IOS_STATS_THRU_MAX,
-}ios_stats_thru_t;
-
-struct ios_stat_lat {
-        struct timeval  time;
-        double          throughput;
-};
-
-struct ios_stat {
-        gf_lock_t       lock;
-        uuid_t          gfid;
-        char           *filename;
-        uint64_t        counters [IOS_STATS_TYPE_MAX];
-        struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
-        int             refcnt;
-};
-
-struct ios_stat_list {
-        struct list_head  list;
-        struct ios_stat  *iosstat;
-        double            value;
-};
-
-struct ios_stat_head {
-       gf_lock_t                lock;
-       double                   min_cnt;
-       uint64_t                 members;
-       struct ios_stat_list    *iosstats;
-};
-
-typedef struct _ios_sample_t {
-        uid_t  uid;
-        gid_t  gid;
-        char   identifier[UNIX_PATH_MAX];
-        char   path[UNIX_PATH_MAX];
-        glusterfs_fop_t fop_type;
-        struct timeval timestamp;
-        double elapsed;
-        gf_boolean_t have_path;
-        int32_t op_ret;
-        int32_t op_errno;
-} ios_sample_t;
-
-
-typedef struct _ios_sample_buf_t {
-        uint64_t        pos;  /* Position in write buffer */
-        uint64_t        size;  /* Size of ring buffer */
-        uint64_t        collected;  /* Number of samples we've collected */
-        uint64_t        observed;  /* Number of FOPs we've observed */
-        ios_sample_t    *ios_samples;  /* Our list of samples */
-} ios_sample_buf_t;
-
-
-struct ios_lat {
-        double      min;
-        double      max;
-        double      avg;
-        uint64_t    total;
-};
-
-struct ios_global_stats {
-        uint64_t        data_written;
-        uint64_t        data_read;
-        uint64_t        block_count_write[32];
-        uint64_t        block_count_read[32];
-        uint64_t        fop_hits[GF_FOP_MAXVALUE];
-        struct timeval  started_at;
-        struct ios_lat  latency[GF_FOP_MAXVALUE];
-        uint64_t        errno_count[IOS_MAX_ERRORS];
-        uint64_t        nr_opens;
-        uint64_t        max_nr_opens;
-        struct timeval  max_openfd_time;
-};
-
-/* This is a list of errors which are in some way critical.
- * It is useful to sample these errors even if other errors
- * should be ignored. */
-const int32_t ios_hard_error_list[] = {
-        EIO,
-        EROFS,
-        ENOSPC,
-        ENOTCONN,
-        ESTALE,
-};
-
-#define IOS_HARD_ERROR_LIST_SIZE (sizeof(ios_hard_error_list) / sizeof(int32_t))
-
-const char *errno_to_name[IOS_MAX_ERRORS] = {
-      "success",       /* 0 */
-      "eperm",
-      "enoent",
-      "esrch",
-      "eintr",
-      "eio",
-      "enxio",
-      "e2big",
-      "enoexec",
-      "ebadf",
-      "echild",
-      "eagain",
-      "enomem",
-      "eacces",
-      "efault",
-      "enotblk",
-      "ebusy",
-      "eexist",
-      "exdev",
-      "enodev",
-      "enotdir",
-      "eisdir",
-      "einval",
-      "enfile",
-      "emfile",
-      "enotty",
-      "etxtbsy",
-      "efbig",
-      "enospc",
-      "espipe",
-      "erofs",
-      "emlink",
-      "epipe",
-      "edom",
-      "erange",
-      "edeadlk",
-      "enametoolong",
-      "enolck",
-      "enosys",
-      "enotempty",
-      "eloop",
-      "ewouldblock",
-      "enomsg",
-      "eidrm",
-      "echrng",
-      "el2nsync",
-      "el3hlt",
-      "el3rst",
-      "elnrng",
-      "eunatch",
-      "enocsi",
-      "el2hlt",
-      "ebade",
-      "ebadr",
-      "exfull",
-      "enoano",
-      "ebadrqc",
-      "ebadslt",
-      "edeadlock",
-      "ebfont",
-      "enostr",
-      "enodata",
-      "etime",
-      "enosr",
-      "enonet",
-      "enopkg",
-      "eremote",
-      "enolink",
-      "eadv",
-      "esrmnt",
-      "ecomm",
-      "eproto",
-      "emultihop",
-      "edotdot",
-      "ebadmsg",
-      "eoverflow",
-      "enotuniq",
-      "ebadfd",
-      "eremchg",
-      "elibacc",
-      "elibbad",
-      "elibscn",
-      "elibmax",
-      "elibexec",
-      "eilseq",
-      "erestart",
-      "estrpipe",
-      "eusers",
-      "enotsock",
-      "edestaddrreq",
-      "emsgsize",
-      "eprototype",
-      "enoprotoopt",
-      "eprotonosupport",
-      "esocktnosupport",
-      "eopnotsupp",
-      "epfnosupport",
-      "eafnosupport",
-      "eaddrinuse",
-      "eaddrnotavail",
-      "enetdown",
-      "enetunreach",
-      "enetreset",
-      "econnaborted",
-      "econnreset",
-      "enobufs",
-      "eisconn",
-      "enotconn",
-      "eshutdown",
-      "etoomanyrefs",
-      "etimedout",
-      "econnrefused",
-      "ehostdown",
-      "ehostunreach",
-      "ealready",
-      "einprogress",
-      "estale",
-      "euclean",
-      "enotnam",
-      "enavail",
-      "eisnam",
-      "eremoteio",
-      "edquot",
-      "enomedium",
-      "emediumtype",
-      "ecanceled",
-      "enokey",
-      "ekeyexpired",
-      "ekeyrevoked",
-      "ekeyrejected",
-      "eownerdead",
-      "enotrecoverable"
-};
-
-struct ios_conf {
-        gf_lock_t                 lock;
-        struct ios_global_stats   cumulative;
-        uint64_t                  increment;
-        struct ios_global_stats   incremental;
-        gf_boolean_t              dump_fd_stats;
-        gf_boolean_t              count_fop_hits;
-        gf_boolean_t              measure_latency;
-        struct ios_stat_head      list[IOS_STATS_TYPE_MAX];
-        struct ios_stat_head      thru_list[IOS_STATS_THRU_MAX];
-        int32_t                   ios_dump_interval;
-        pthread_t                 dump_thread;
-        gf_boolean_t              dump_thread_should_die;
-        gf_lock_t                 ios_sampling_lock;
-        int32_t                   ios_sample_interval;
-        int32_t                   ios_sample_buf_size;
-        ios_sample_buf_t          *ios_sample_buf;
-        struct dnscache           *dnscache;
-        int32_t                   ios_dnscache_ttl_sec;
-        gf_boolean_t              iamshd;
-        gf_boolean_t              iamnfsd;
-        gf_boolean_t              iambrickd;
-        gf_boolean_t              iamgfproxyd;
-        gf_boolean_t              audit_creates_and_unlinks;
-        gf_boolean_t              sample_hard_errors;
-        gf_boolean_t              sample_all_errors;
-        uint32_t                  outstanding_req;
-        gf_boolean_t              dump_percentile_latencies;
-};
-
-
-struct ios_fd {
-        char           *filename;
-        uint64_t        data_written;
-        uint64_t        data_read;
-        uint64_t        block_count_write[32];
-        uint64_t        block_count_read[32];
-        struct timeval  opened_at;
-};
+const char *get_ns_from_hash (struct ios_conf *conf, const ns_info_t *info);
+int ios_conf_load_namespace_conf (struct ios_conf *conf);
 
-typedef enum {
-        IOS_DUMP_TYPE_NONE      = 0,
-        IOS_DUMP_TYPE_FILE      = 1,
-        IOS_DUMP_TYPE_DICT      = 2,
-        IOS_DUMP_TYPE_JSON_FILE = 3,
-        IOS_DUMP_TYPE_SAMPLES   = 4,
-        IOS_DUMP_TYPE_MAX       = 5
-} ios_dump_type_t;
-
-struct ios_dump_args {
-        ios_dump_type_t type;
-        union {
-                FILE *logfp;
-                dict_t *dict;
-        } u;
-};
+gf_boolean_t should_throttle_fop (call_frame_t *frame, xlator_t *this);
 
-typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
-                                    int , int , uint64_t ) ;
+int _ios_swap_sample_buf (xlator_t *this, ios_sample_buf_t **dest);
 
-struct ios_local {
-        inode_t *inode;
-        loc_t loc;
-        fd_t *fd;
-};
-
-static struct ios_local *
-ios_local_new() {
-        return GF_CALLOC (1, sizeof (struct ios_local),
-                                gf_common_mt_char);
-}
-
-static void
-ios_local_free (struct ios_local *local)
+/* A lot of io-stats logging code depends on the io-stats translator needing
+ * the top xlator's name, which is special. For example, on bricks, the name
+ * is the path to the backend storage directory.
+ *
+ * Fails gracefully if there is no xlator name, returning "Unknown". */
+const char *
+get_top_xlator_name (xlator_t *this)
 {
-        if (!local)
-                return;
-
-        inode_unref (local->inode);
+        const char *name = NULL;
 
-        if (local->fd)
-                fd_unref (local->fd);
-
-        loc_wipe (&local->loc);
-        memset (local, 0, sizeof (*local));
-        GF_FREE (local);
-}
+        while (this->parents && this->parents->xlator) {
+                this = this->parents->xlator;
+        }
 
-struct volume_options options[];
+        this = FIRST_CHILD (this);
 
-static int
-is_fop_latency_started (call_frame_t *frame)
-{
-        GF_ASSERT (frame);
-        struct timeval epoch = {0,};
-        return memcmp (&frame->begin, &epoch, sizeof (epoch));
+        if (this && this->name) {
+                return this->name;
+        } else {
+                gf_log (GF_IO_STATS, GF_LOG_ERROR,
+                        "Cannot retrieve top-of-stack translator name.");
+                return "Unknown";
+        }
 }
 
-static void
-ios_free_local (call_frame_t *frame)
+static void ios_free_local (call_frame_t *frame)
 {
         struct ios_local *local = frame->local;
-
         ios_local_free (local);
-
         frame->local = NULL;
 }
 
-static void
-ios_track_loc (call_frame_t *frame, loc_t *loc)
+static void ios_track_loc (call_frame_t *frame, loc_t *loc)
 {
         struct ios_local *local = NULL;
-
         if (loc && loc->path) {
                 /* Check if frame->local is already set (it should
                  * only be set by either ios_track_loc() or
@@ -412,11 +104,9 @@ ios_track_loc (call_frame_t *frame, loc_t *loc)
         }
 }
 
-static void
-ios_track_fd (call_frame_t *frame, fd_t *fd)
+static void ios_track_fd (call_frame_t *frame, fd_t *fd)
 {
         struct ios_local *local = NULL;
-
         if (fd && fd->inode) {
                 if (frame->local) {
                         local = frame->local;
@@ -429,14 +119,21 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
         }
 }
 
-
-#define _IOS_SAMP_DIR DEFAULT_LOG_FILE_DIRECTORY "/samples"
-#ifdef GF_LINUX_HOST_OS
-#define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"
+#ifdef HAVE_ATOMIC_BUILTINS
+#define INC_COUNTER(counter, inc, lock)                 \
+        (__sync_fetch_and_add (&(counter), (inc)))
 #else
-#define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats"
+#define INC_COUNTER(counter, inc, lock)                 \
+        do {                                            \
+                LOCK (&(lock));                         \
+                {                                       \
+                        counter += (inc);               \
+                }                                       \
+                UNLOCK (&(lock));                       \
+        } while (0)
 #endif
 
+#define FOP_THROTTLE(frame, this) frame_set_throttling (frame, should_throttle_fop (frame, this))
 
 #define END_FOP_LATENCY(frame, op)                                      \
         do {                                                            \
@@ -455,7 +152,7 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
                                                                          \
                 conf = this->private;                                    \
                 if (conf && conf->measure_latency) {                     \
-                        STATS_INC (conf->outstanding_req);               \
+                        INC_COUNTER (conf->outstanding_req, 1, conf->lock);\
                         gettimeofday (&frame->begin, NULL);              \
                 } else {                                                 \
                         memset (&frame->begin, 0, sizeof (frame->begin));\
@@ -474,24 +171,30 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
                 conf->incremental.fop_hits[GF_FOP_##op]++;              \
         } while (0)
 
-
-#if defined(HAVE_ATOMIC_BUILTINS)
-#define STATS_LOCK(x)
-#define STATS_UNLOCK(x)
-#define STATS_ADD(x,i)  __sync_add_and_fetch (&x, i)
-#define STATS_SUB(x,i)  __sync_sub_and_fetch (&x, i)
-#define STATS_INC(x)    STATS_ADD(x, 1)
-#define STATS_DEC(x)    STATS_SUB(x, 1)
+#ifdef HAVE_ATOMIC_BUILTINS
+#define UPDATE_PROFILE_STATS(frame, op, op_ret, op_errno)                     \
+        do {                                                                  \
+                struct ios_conf  *conf = NULL;                                \
+                                                                              \
+                if (!is_fop_latency_started (frame))                          \
+                        break;                                                \
+                conf = this->private;                                         \
+                if (conf && conf->measure_latency &&                          \
+                    conf->count_fop_hits) {                                   \
+                        BUMP_FOP (op);                                        \
+                        __sync_fetch_and_sub (&conf->outstanding_req, 1);     \
+                        if (op_ret != 0 && op_errno > 0                       \
+                            && op_errno < IOS_MAX_ERRORS) {                   \
+                                __sync_fetch_and_add (                        \
+                                  &conf->cumulative.errno_count[op_errno], 1); \
+                                __sync_fetch_and_add (                        \
+                                  &conf->incremental.errno_count[op_errno], 1); \
+                        }                                                     \
+                        gettimeofday (&frame->end, NULL);                     \
+                        update_ios_latency (conf, frame, GF_FOP_##op, op_ret, op_errno);        \
+                }                                                             \
+        } while (0)
 #else
-#define STATS_LOCK(x)   LOCK (x)
-#define STATS_UNLOCK(x) UNLOCK (x)
-#define STATS_ADD(x,i)  (x) += (i)
-#define STATS_SUB(x,i)  (x) -= (i)
-#define STATS_INC(x)    (x) += 1
-#define STATS_DEC(x)    (x) -= 1
-#endif
-
-
 #define UPDATE_PROFILE_STATS(frame, op, op_ret, op_errno)                     \
         do {                                                                  \
                 struct ios_conf  *conf = NULL;                                \
@@ -499,94 +202,200 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
                 if (!is_fop_latency_started (frame))                          \
                         break;                                                \
                 conf = this->private;                                         \
-                STATS_LOCK (&conf->lock);                                     \
+                LOCK (&conf->lock);                                           \
                 {                                                             \
                         if (conf && conf->measure_latency &&                  \
                             conf->count_fop_hits) {                           \
                                 BUMP_FOP(op);                                 \
-                                STATS_DEC (conf->outstanding_req);            \
+                                conf->outstanding_req -= 1;                   \
                                 if (op_ret != 0 && op_errno > 0               \
                                     && op_errno < IOS_MAX_ERRORS) {           \
-                                        STATS_INC(conf->cumulative.errno_count[op_errno]); \
-                                        STATS_INC(conf->incremental.errno_count[op_errno]); \
+                                        conf->cumulative.errno_count[op_errno]++; \
+                                        conf->incremental.errno_count[op_errno]++; \
                                 }                                             \
                                 gettimeofday (&frame->end, NULL);             \
-                                update_ios_latency (conf, frame, GF_FOP_##op, \
-                                                        op_ret, op_errno);    \
+                                update_ios_latency (conf, frame, GF_FOP_##op, op_ret, op_errno);\
                         }                                                     \
                 }                                                             \
-                STATS_UNLOCK (&conf->lock);                                   \
+                UNLOCK (&conf->lock);                                         \
         } while (0)
+#endif
 
-#define BUMP_READ(fd, len)                                                     \
-        do {                                                                   \
-                struct ios_conf  *conf = NULL;                                 \
-                struct ios_fd    *iosfd = NULL;                                \
-                int               lb2 = 0;                                     \
-                                                                               \
-                conf = this->private;                                          \
-                lb2 = log_base2 (len);                                         \
-                ios_fd_ctx_get (fd, this, &iosfd);                             \
-                if (!conf)                                                     \
-                        break;                                                 \
-                                                                               \
-                STATS_LOCK (&conf->lock);                                      \
-                {                                                              \
-                        STATS_ADD (conf->cumulative.data_read, len);           \
-                        STATS_ADD (conf->incremental.data_read, len);          \
-                        STATS_ADD (conf->cumulative.block_count_read[lb2], 1); \
-                        STATS_ADD (conf->incremental.block_count_read[lb2], 1);\
-                                                                               \
-                        if (iosfd) {                                           \
-                                STATS_ADD (iosfd->data_read, len);             \
-                                STATS_ADD (iosfd->block_count_read[lb2], 1);   \
-                        }                                                      \
-                }                                                              \
-                STATS_UNLOCK (&conf->lock);                                    \
+#ifdef HAVE_ATOMIC_BUILTINS
+#define BUMP_READ(fd, len)                                              \
+        do {                                                            \
+                struct ios_conf  *conf = NULL;                          \
+                struct ios_fd    *iosfd = NULL;                         \
+                int               lb2 = 0;                              \
+                                                                        \
+                conf = this->private;                                   \
+                lb2 = log_base2 (len);                                  \
+                ios_fd_ctx_get (fd, this, &iosfd);                      \
+                if (!conf)                                              \
+                        break;                                          \
+                                                                        \
+                __sync_fetch_and_add (&conf->cumulative.data_read,      \
+                    len);                                               \
+                __sync_fetch_and_add (&conf->incremental.data_read,     \
+                    len);                                               \
+                __sync_fetch_and_add (                                  \
+                    &conf->cumulative.block_count_read[lb2], 1);        \
+                __sync_fetch_and_add (                                  \
+                      &conf->incremental.block_count_read[lb2], 1);     \
+                if (iosfd) {                                            \
+                        __sync_fetch_and_add (&iosfd->data_read, len);  \
+                        __sync_fetch_and_add (                          \
+                            &iosfd->block_count_read[lb2], 1);          \
+                }                                                       \
         } while (0)
-
-#define BUMP_WRITE(fd, len)                                                    \
-        do {                                                                   \
-                struct ios_conf  *conf = NULL;                                 \
-                struct ios_fd    *iosfd = NULL;                                \
-                int               lb2 = 0;                                     \
-                                                                               \
-                conf = this->private;                                          \
-                lb2 = log_base2 (len);                                         \
-                ios_fd_ctx_get (fd, this, &iosfd);                             \
-                if (!conf)                                                     \
-                        break;                                                 \
-                STATS_LOCK (&conf->lock);                                      \
-                {                                                              \
-                        STATS_ADD (conf->cumulative.data_written, len);        \
-                        STATS_ADD (conf->incremental.data_written, len);       \
-                        STATS_ADD (conf->cumulative.block_count_write[lb2], 1);\
-                        STATS_ADD (conf->incremental.block_count_write[lb2], 1);\
-                                                                               \
-                        if (iosfd) {                                           \
-                                STATS_ADD (iosfd->data_written, len);          \
-                                STATS_ADD (iosfd->block_count_write[lb2], 1);  \
-                        }                                                      \
-                }                                                              \
-                STATS_UNLOCK (&conf->lock);                                    \
+#else
+#define BUMP_READ(fd, len)                                              \
+        do {                                                            \
+                struct ios_conf  *conf = NULL;                          \
+                struct ios_fd    *iosfd = NULL;                         \
+                int               lb2 = 0;                              \
+                                                                        \
+                conf = this->private;                                   \
+                lb2 = log_base2 (len);                                  \
+                ios_fd_ctx_get (fd, this, &iosfd);                      \
+                if (!conf)                                              \
+                        break;                                          \
+                                                                        \
+                LOCK (&conf->lock);                                     \
+                {                                                       \
+                        conf->cumulative.data_read += len;              \
+                        conf->incremental.data_read += len;             \
+                        conf->cumulative.block_count_read[lb2]++;       \
+                        conf->incremental.block_count_read[lb2]++;      \
+                                                                        \
+                        if (iosfd) {                                    \
+                                iosfd->data_read += len;                \
+                                iosfd->block_count_read[lb2]++;         \
+                        }                                               \
+                }                                                       \
+                UNLOCK (&conf->lock);                                   \
         } while (0)
+#endif
 
-#define BUMP_STATS(iosstat, type)                                       \
+#ifdef HAVE_ATOMIC_BUILTINS
+#define BUMP_WRITE(fd, len)                                             \
         do {                                                            \
-                struct ios_conf         *conf = NULL;                   \
-                uint64_t                 value = 0;                     \
+                struct ios_conf  *conf = NULL;                          \
+                struct ios_fd    *iosfd = NULL;                         \
+                int               lb2 = 0;                              \
                                                                         \
                 conf = this->private;                                   \
+                lb2 = log_base2 (len);                                  \
+                ios_fd_ctx_get (fd, this, &iosfd);                      \
+                if (!conf)                                              \
+                        break;                                          \
+                __sync_fetch_and_add (&conf->cumulative.data_written,   \
+                        len);                                           \
+                __sync_fetch_and_add (&conf->incremental.data_written,  \
+                        len);                                           \
+                __sync_fetch_and_add (                                  \
+                        &conf->cumulative.block_count_write[lb2], 1);   \
+                __sync_fetch_and_add (                                  \
+                        &conf->incremental.block_count_write[lb2], 1);  \
+                                                                        \
+                if (iosfd) {                                            \
+                        __sync_fetch_and_add (&iosfd->data_written,     \
+                                len);                                   \
+                        __sync_fetch_and_add (                          \
+                                &iosfd->block_count_write[lb2], 1);     \
+                }                                                       \
+        } while (0)
+#else
+#define BUMP_WRITE(fd, len)                                             \
+        do {                                                            \
+                struct ios_conf  *conf = NULL;                          \
+                struct ios_fd    *iosfd = NULL;                         \
+                int               lb2 = 0;                              \
                                                                         \
-                LOCK(&iosstat->lock);                                   \
+                conf = this->private;                                   \
+                lb2 = log_base2 (len);                                  \
+                ios_fd_ctx_get (fd, this, &iosfd);                      \
+                if (!conf)                                              \
+                        break;                                          \
+                LOCK (&conf->lock);                                     \
                 {                                                       \
-                        value = STATS_ADD (iosstat->counters[type], 1); \
+                        conf->cumulative.data_written += len;           \
+                        conf->incremental.data_written += len;          \
+                        conf->cumulative.block_count_write[lb2]++;      \
+                        conf->incremental.block_count_write[lb2]++;     \
+                                                                        \
+                        if (iosfd) {                                    \
+                                iosfd->data_written += len;             \
+                                iosfd->block_count_write[lb2]++;        \
+                        }                                               \
                 }                                                       \
-                UNLOCK (&iosstat->lock);                                \
-                ios_stat_add_to_list (&conf->list[type],                \
-                                     value, iosstat);                   \
+                UNLOCK (&conf->lock);                                   \
         } while (0)
+#endif
 
+#ifdef HAVE_ATOMIC_BUILTINS
+#define BUMP_STATS(iosstat, type)                                              \
+        do {                                                                   \
+                struct ios_conf         *conf = NULL;                          \
+                uint64_t                 value = 0;                            \
+                                                                               \
+                conf = this->private;                                          \
+                value = __sync_add_and_fetch (&iosstat->counters[type], 1);    \
+                ios_stat_add_to_list (&conf->list[type],                       \
+                                     value, iosstat);                          \
+                                                                               \
+        } while (0)
+#else
+#define BUMP_STATS(iosstat, type)                                               \
+        do {                                                                    \
+                struct ios_conf         *conf = NULL;                           \
+                uint64_t                 value = 0;                             \
+                                                                                \
+                conf = this->private;                                           \
+                                                                                \
+                LOCK(&iosstat->lock);                                           \
+                {                                                               \
+                        iosstat->counters[type]++;                              \
+                        value = iosstat->counters[type];                        \
+                }                                                               \
+                UNLOCK (&iosstat->lock);                                        \
+                ios_stat_add_to_list (&conf->list[type],                        \
+                                     value, iosstat);                           \
+                                                                                \
+        } while (0)
+#endif
+
+#ifdef HAVE_ATOMIC_BUILTINS
+#define BUMP_THROUGHPUT(iosstat, type)                                         \
+        do {                                                                   \
+                struct ios_conf         *conf = NULL;                          \
+                double                   elapsed;                              \
+                struct timeval          *begin, *end;                          \
+                double                   throughput;                           \
+                int                      flag = 0;                             \
+                                                                               \
+                begin = &frame->begin;                                         \
+                end   = &frame->end;                                           \
+                                                                               \
+                elapsed = (end->tv_sec - begin->tv_sec) * 1e6                  \
+                        + (end->tv_usec - begin->tv_usec);                     \
+                throughput = op_ret / elapsed;                                 \
+                                                                               \
+                conf = this->private;                                          \
+                                                                               \
+                if (iosstat->thru_counters[type].throughput                    \
+                        <= throughput) {                                       \
+                        iosstat->thru_counters[type].throughput =              \
+                                                        throughput;            \
+                        gettimeofday (&iosstat->                               \
+                                     thru_counters[type].time, NULL);          \
+                       flag = 1;                                               \
+               }                                                               \
+               if (flag)                                                       \
+                       ios_stat_add_to_list (&conf->thru_list[type],           \
+                                               throughput, iosstat);           \
+        } while (0)
+#else
 #define BUMP_THROUGHPUT(iosstat, type)                                         \
         do {                                                                   \
                 struct ios_conf         *conf = NULL;                          \
@@ -603,7 +412,7 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
                 throughput = op_ret / elapsed;                                 \
                                                                                \
                 conf = this->private;                                          \
-                STATS_LOCK (&iosstat->lock);                                   \
+                LOCK (&iosstat->lock);                                         \
                 {                                                              \
                         if (iosstat->thru_counters[type].throughput            \
                                 <= throughput) {                               \
@@ -614,11 +423,79 @@ ios_track_fd (call_frame_t *frame, fd_t *fd)
                                flag = 1;                                       \
                         }                                                      \
                 }                                                              \
-                STATS_UNLOCK (&iosstat->lock);                                 \
-                if (flag)                                                      \
+                UNLOCK (&iosstat->lock);                                       \
+               if (flag)                                                       \
                        ios_stat_add_to_list (&conf->thru_list[type],           \
                                                throughput, iosstat);           \
         } while (0)
+#endif
+
+
+/*
+ * So why goto all this trouble?  Why not just queue up some samples in
+ * a big list and malloc away?  Well malloc is expensive relative
+ * to what we are measuring, so cannot have any malloc's (or worse
+ * callocs) in our measurement code paths.  Instead, we are going to
+ * pre-allocate a circular buffer and collect a maximum number of samples.
+ * Prior to dumping them all we'll create a new buffer and swap the
+ * old buffer with the new, and then proceed to dump the statistics
+ * in our dump thread.
+ *
+ */
+ios_sample_buf_t *
+ios_create_sample_buf (size_t buf_size)
+{
+        ios_sample_buf_t *ios_sample_buf = NULL;
+        ios_sample_t     *ios_samples = NULL;
+
+        ios_sample_buf = GF_CALLOC (1,
+                sizeof (*ios_sample_buf),
+                gf_io_stats_mt_ios_sample_buf);
+        if (!ios_sample_buf)
+                goto err;
+
+        ios_samples = GF_CALLOC (buf_size,
+                sizeof (*ios_samples),
+                gf_io_stats_mt_ios_sample);
+
+        if (!ios_samples)
+                goto err;
+
+        ios_sample_buf->ios_samples = ios_samples;
+        ios_sample_buf->size = buf_size;
+        ios_sample_buf->pos = 0;
+        ios_sample_buf->observed = 0;
+        ios_sample_buf->collected = 0;
+
+        return ios_sample_buf;
+err:
+        GF_FREE (ios_sample_buf);
+        return NULL;
+}
+
+void
+ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf)
+{
+        GF_FREE (ios_sample_buf->ios_samples);
+        GF_FREE (ios_sample_buf);
+}
+
+static int
+ios_init_sample_buf (struct ios_conf *conf)
+{
+        int32_t        ret = -1;
+
+        GF_ASSERT (conf);
+        LOCK (&conf->lock);
+        conf->ios_sample_buf = ios_create_sample_buf (
+                conf->ios_sample_buf_size);
+        if (!conf->ios_sample_buf)
+                goto out;
+        ret = 0;
+out:
+        UNLOCK (&conf->lock);
+        return ret;
+}
 
 int
 ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
@@ -715,72 +592,6 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
 
 }
 
-/*
- * So why goto all this trouble?  Why not just queue up some samples in
- * a big list and malloc away?  Well malloc is expensive relative
- * to what we are measuring, so cannot have any malloc's (or worse
- * callocs) in our measurement code paths.  Instead, we are going to
- * pre-allocate a circular buffer and collect a maximum number of samples.
- * Prior to dumping them all we'll create a new buffer and swap the
- * old buffer with the new, and then proceed to dump the statistics
- * in our dump thread.
- *
- */
-ios_sample_buf_t *
-ios_create_sample_buf (size_t buf_size)
-{
-        ios_sample_buf_t *ios_sample_buf = NULL;
-        ios_sample_t     *ios_samples = NULL;
-
-        ios_sample_buf = GF_CALLOC (1,
-                sizeof (*ios_sample_buf),
-                gf_io_stats_mt_ios_sample_buf);
-        if (!ios_sample_buf)
-                goto err;
-
-        ios_samples = GF_CALLOC (buf_size,
-                sizeof (*ios_samples),
-                gf_io_stats_mt_ios_sample);
-
-        if (!ios_samples)
-                goto err;
-
-        ios_sample_buf->ios_samples = ios_samples;
-        ios_sample_buf->size = buf_size;
-        ios_sample_buf->pos = 0;
-        ios_sample_buf->observed = 0;
-        ios_sample_buf->collected = 0;
-
-        return ios_sample_buf;
-err:
-        GF_FREE (ios_sample_buf);
-        return NULL;
-}
-
-void
-ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf)
-{
-        GF_FREE (ios_sample_buf->ios_samples);
-        GF_FREE (ios_sample_buf);
-}
-
-static int
-ios_init_sample_buf (struct ios_conf *conf)
-{
-        int32_t        ret = -1;
-
-        GF_ASSERT (conf);
-        LOCK (&conf->lock);
-        conf->ios_sample_buf = ios_create_sample_buf (
-                conf->ios_sample_buf_size);
-        if (!conf->ios_sample_buf)
-                goto out;
-        ret = 0;
-out:
-        UNLOCK (&conf->lock);
-        return ret;
-}
-
 int
 ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
                             struct ios_stat *iosstat)
@@ -843,13 +654,11 @@ ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
                         new->value = value;
                         ios_stat_ref (iosstat);
                         list_add_tail (&new->list, &tmp->list);
-                        if (last) {
-                                stat = last->iosstat;
-                                last->iosstat = NULL;
-                                ios_stat_unref (stat);
-                                list_del (&last->list);
-                                GF_FREE (last);
-                        }
+                        stat = last->iosstat;
+                        last->iosstat = NULL;
+                        ios_stat_unref (stat);
+                        list_del (&last->list);
+                        GF_FREE (last);
                         if (reposition == MAX_LIST_MEMBERS)
                                 list_head->min_cnt = value;
                         else if (min_count) {
@@ -876,7 +685,7 @@ out:
         return 0;
 }
 
-static int
+static inline int
 ios_stats_cleanup (xlator_t *this, inode_t *inode)
 {
 
@@ -918,27 +727,24 @@ io_stats_log_px_stat (xlator_t *this, ios_sample_buf_t *sample_buf,
         int    i = 0;
         int    px = 0;
         struct ios_conf *conf = NULL;
- 
+
         collected = sample_buf->collected;
- 
+
         px = (int)(pval * 100);
         pn_idx = (int)(pval * collected);
         pn_val = global_ios_latency[pn_idx];
 
-        ios_log (this, logfp, "\"%s.%s.p%d_latency_usec\":\"%0.4lf\",", desc,
-                 "global", px, pn_val);
+        ios_log (this, logfp, "\"%s.%s.p%d_latency_usec\":\"%0.4lf\",", desc, "global", px, pn_val);
 
         for (i = 0; i < GF_FOP_MAXVALUE; i++) {
                 qsort (ios_latencies[i], num_fop[i], sizeof (double),
                        gf_compare_double);
                 pn_idx = (int)(pval*num_fop[i]);
                 pn_val = ios_latencies[i][pn_idx];
-                ios_log (this, logfp,
-                         "\"%s.%s.p%d_latency_usec\":\"%0.4lf\",", desc, gf_fop_list[i], px, pn_val);
+                ios_log (this, logfp, "\"%s.%s.p%d_latency_usec\":\"%0.4lf\",", desc, gf_fop_list[i], px, pn_val);
         }
 }
 
-
 double
 io_stats_prepare_latency_samples (ios_sample_buf_t *sample_buf, int *num_fop,
                                   double **ios_latencies,
@@ -970,7 +776,8 @@ io_stats_prepare_latency_samples (ios_sample_buf_t *sample_buf, int *num_fop,
 
 double
 io_stats_dump_pn_latencies (xlator_t *this, ios_sample_buf_t *sample_buf,
-                            FILE* logfp, char * key_prefix, char * str_prefix)
+                            FILE* logfp, char * key_prefix,
+                            char * str_prefix)
 {
         double *ios_latencies[GF_FOP_MAXVALUE] = {NULL, };
         double *global_ios_latency = NULL;
@@ -991,8 +798,6 @@ io_stats_dump_pn_latencies (xlator_t *this, ios_sample_buf_t *sample_buf,
         }
 
         for (i = 0; i < GF_FOP_MAXVALUE; i++) {
-                qsort (ios_latencies[i], num_fop[i], sizeof (double),
-                       gf_compare_double);
                 ios_latencies[i] = GF_CALLOC (collected,
                                               sizeof (double),
                                               0);
@@ -1028,7 +833,9 @@ io_stats_dump_pn_latencies (xlator_t *this, ios_sample_buf_t *sample_buf,
                               num_fop, ios_latencies, global_ios_latency);
 
 out:
+
         GF_FREE (prefix);
+
         GF_FREE (global_ios_latency);
         for (j = 0; j < i; j++) {
                 GF_FREE (ios_latencies[j]);
@@ -1038,8 +845,7 @@ out:
 }
 
 int
-ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this,
-                     FILE *logfp)
+ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this, FILE* logfp)
 {
         struct ios_stat_list *entry = NULL;
 
@@ -1056,7 +862,7 @@ ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this,
 
 int
 ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
-                           FILE *logfp, ios_stats_thru_t type)
+                            FILE* logfp, ios_stats_type_t type)
 {
         struct ios_stat_list *entry = NULL;
         struct timeval        time  = {0, };
@@ -1081,6 +887,7 @@ ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
 
 int
 _io_stats_get_key_prefix (xlator_t *this, char **key_prefix) {
+        struct ios_conf       *conf = NULL;
         char                  *key_root = "storage.gluster";
         char                  *xlator_name = NULL;
         char                  *instance_name = NULL;
@@ -1088,9 +895,10 @@ _io_stats_get_key_prefix (xlator_t *this, char **key_prefix) {
         int                   bytes_written = 0;
         int                   i = 0;
         int                   ret = 0;
-        struct ios_conf *conf = this->private;
 
-        xlator_name = strdupa (this->name);
+        conf = this->private;
+        xlator_name = strdupa (get_top_xlator_name (this));
+
         for (i = 0; i < strlen (xlator_name); i++) {
                 if (xlator_name[i] == '/')
                         xlator_name[i] = '_';
@@ -1100,15 +908,14 @@ _io_stats_get_key_prefix (xlator_t *this, char **key_prefix) {
         if (conf->iamshd) {
                 xlator_name = "shd";
         } else if (conf->iamnfsd) {
+                instance_name = xlator_name;
                 xlator_name = "nfsd";
-                instance_name = strdupa (this->name);
         } else if (conf->iamgfproxyd) {
+                instance_name = xlator_name;
                 xlator_name = "gfproxyd";
-                instance_name = strdupa (this->name);
-        }
-
-        if (strcmp (__progname, "glusterfsd") == 0)
+        } else if (conf->iambrickd) {
                 key_root = "storage.gluster.brick";
+        }
 
         if (instance_name) {
                 /* +3 for 2 x "." + NULL */
@@ -1221,8 +1028,9 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                 ios_log (this, logfp, "\"%s.%s.inodelk_count\": \"%u\",",
                          key_prefix, str_prefix, inodelk_count);
 
-                ret = syncop_getxattr (FIRST_CHILD (this), &unused_loc, &xattr,
-                                       GLUSTERFS_ENTRYLK_COUNT, NULL, NULL);
+                ret = syncop_getxattr (FIRST_CHILD (this), &unused_loc,
+                                       &xattr, GLUSTERFS_ENTRYLK_COUNT,
+                                       NULL, NULL);
                 if (ret == 0 && xattr) {
                         if (dict_get_int32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
                                             &entrylk_count) != 0) {
@@ -1319,12 +1127,11 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                                 fop_lat_max = stats->latency[i].max;
                         }
                 }
-                if (interval == -1) {
-                        ios_log (this, logfp,
-                                "\"%s.%s.fop.%s.count\": \"%"PRId64"\",",
-                                key_prefix, str_prefix, lc_fop_name,
-                                fop_hits);
-                } else {
+                ios_log (this, logfp,
+                        "\"%s.%s.fop.%s.count\": \"%"PRId64"\",",
+                        key_prefix, str_prefix, lc_fop_name,
+                        fop_hits);
+                if (interval != -1) {
                         ios_log (this, logfp,
                                 "\"%s.%s.fop.%s.per_sec\": \"%0.2lf\",",
                                 key_prefix, str_prefix, lc_fop_name,
@@ -1355,6 +1162,7 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                         "\"%s.%s.fop.weighted_latency_ave_usec_nozerofill\": \"%0.4lf\",",
                         key_prefix, str_prefix, weighted_fop_ave_usec);
         }
+
         ios_log (this, logfp,
                 "\"%s.%s.fop.weighted_latency_ave_usec\": \"%0.4lf\",",
                 key_prefix, str_prefix, weighted_fop_ave_usec);
@@ -1371,8 +1179,8 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                 "\"%s.%s.fop.GOT1\":\"%0.4lf\",",
                 key_prefix, str_prefix, fop_ave_usec);
         if (conf->dump_percentile_latencies) {
-                io_stats_dump_pn_latencies (this, sample_buf, logfp,
-                                            key_prefix, str_prefix);
+                io_stats_dump_pn_latencies (this, sample_buf, logfp, key_prefix,
+                                             str_prefix);
                 ios_log (this, logfp,
                         "\"%s.%s.fop.GOT2\":\"%0.4lf\",",
                         key_prefix, str_prefix, fop_ave_usec);
@@ -1381,29 +1189,32 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                 "\"%s.%s.fop.GOT3\":\"%0.4lf\",",
                 key_prefix, str_prefix, fop_ave_usec);
 
-        if (conf->iamnfsd) {
+        if (conf->iamnfsd || conf->iambrickd) {
                 dict_t *xattr = NULL;
-                ret = syncop_getxattr (this, &unused_loc, &xattr,
+                ret = syncop_getxattr (FIRST_CHILD (this), &unused_loc, &xattr,
                                        IO_THREADS_QUEUE_SIZE_KEY, NULL, NULL);
                 if (xattr) {
-                        // Iterate over the dictionary returned to us by io-threads and
-                        // dump the results to the stats file.
+                        /*
+                         * Iterate over the dictionary returned to us by
+                         * io-threads and dump the results to the stats file.
+                         */
                         data_pair_t *curr = NULL;
-                        dict_for_each (xattr, curr) {
+                        dict_foreach_inline (xattr, curr) {
                                 ios_log (this, logfp,
-                                          "\"%s.%s.%s.queue_size\": \"%d\",",
-                                          key_prefix, str_prefix, curr->key,
-                                          data_to_int32 (curr->value));
+                                         "\"%s.%s.%s.queue_size\": \"%d\",",
+                                         key_prefix, str_prefix, curr->key,
+                                         data_to_int32 (curr->value));
                         }
 
-                        // Free the dictionary
+                        /* Free the dictionary! */
                         dict_unref (xattr);
                 } else {
-                        gf_log (this->name, GF_LOG_WARNING,
-                                "Unable to get queue size counts from "
-                                "the io-threads translator!");
+                        gf_log (this->name, GF_LOG_DEBUG,
+                                "Unable to get queue size counts "
+                                "from the io-threads translator!");
                 }
         }
+
         if (interval == -1) {
                 ios_log (this, logfp, "\"%s.%s.uptime\": \"%"PRId64"\",",
                          key_prefix, str_prefix,
@@ -1427,7 +1238,19 @@ io_stats_dump_global_to_json_logfp (xlator_t *this,
                          (double)(stats->data_written / interval_sec));
         }
 
+#if OFFSET_WRITE_DETECTOR
+        if (conf->iamnfsd) {
+                ios_log (this, logfp,
+                         ",\"%s.%s.offset_writes\": \"%"PRId64"\",",
+                         key_prefix, str_prefix, stats->offset_write_ops);
+                ios_log (this, logfp,
+                         "\"%s.%s.total_writes\": \"%"PRId64"\"",
+                         key_prefix, str_prefix, stats->total_write_ops);
+        }
+#endif
+
         ios_log (this, logfp, "}");
+
         ret = 0;
 out:
         GF_FREE (key_prefix);
@@ -1443,14 +1266,14 @@ _resolve_username (xlator_t *this, uid_t uid)
         char   *pwd_buf = NULL;
         char   *ret = NULL;
 
-        /* Prepare our buffer for the uid->username translation */
+        // Prepare our buffer for the uid->username translation
 #ifdef _SC_GETGR_R_SIZE_MAX
         pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
 #else
         pwd_buf_len = -1;
 #endif
         if (pwd_buf_len == -1) {
-                pwd_buf_len = DEFAULT_PWD_BUF_SZ;  /* per the man page */
+                pwd_buf_len = DEFAULT_PWD_BUF_SZ;  // per the man page
         }
 
         pwd_buf = alloca (pwd_buf_len);
@@ -1482,14 +1305,14 @@ _resolve_group_name (xlator_t *this, gid_t gid)
         char   *grp_buf = NULL;
         char   *ret = NULL;
 
-        /* Prepare our buffer for the gid->group name translation */
+        // Prepare our buffer for the gid->group name translation
 #ifdef _SC_GETGR_R_SIZE_MAX
         grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX);
 #else
         grp_buf_len = -1;
 #endif
         if (grp_buf_len == -1) {
-                grp_buf_len = DEFAULT_GRP_BUF_SZ;  /* per the man page */
+                grp_buf_len = DEFAULT_GRP_BUF_SZ;  // per the man page
         }
 
         grp_buf = alloca (grp_buf_len);
@@ -1520,7 +1343,7 @@ err:
  */
 void
 _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
-                                FILE *logfp)
+                                FILE* logfp)
 {
         double epoch_time = 0.00;
         char   *xlator_name = NULL;
@@ -1531,8 +1354,12 @@ _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
         char   *port_pos = NULL;
         char   *group_name = NULL;
         char   *username = NULL;
-        char   *path = NULL;
         struct ios_conf *conf = NULL;
+        const char *pri_str = NULL;
+        const char *ns_name = NULL;
+        const char *path = NULL;
+        const char *name = NULL;
+        loc_t  *loc = NULL;
         const char *error_string = NULL;
         int32_t op_errno = 0;
 
@@ -1541,7 +1368,7 @@ _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
         epoch_time = (sample->timestamp).tv_sec +
           ((sample->timestamp).tv_usec / 1000000.0);
 
-        if (strlen (sample->identifier) == 0) {
+        if (!sample->identifier || (strlen (sample->identifier) == 0)) {
                 hostname = "Unknown";
                 port = "Unknown";
         } else {
@@ -1553,16 +1380,12 @@ _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
                 if (!port)
                         goto err;
                 *port_pos = '\0';
-                hostname = gf_rev_dns_lookup_cached (identifier,
-                                                     conf->dnscache);
+                hostname = gf_rev_dns_lookup (identifier);
                 if (!hostname)
-                        hostname = "Unknown";
+                        hostname ="Unknown";
         }
 
-        xlator_name = this->name;
-        if (!xlator_name || strlen (xlator_name) == 0)
-                xlator_name = "Unknown";
-
+        xlator_name = strdupa (get_top_xlator_name (this));
         instance_name = this->instance_name;
         if (!instance_name || strlen (instance_name) == 0)
                 instance_name = "N/A";
@@ -1581,6 +1404,24 @@ _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
                 sprintf (group_name, "%d", (int32_t)sample->gid);
         }
 
+        /*
+         * If the priority was unspecified, print out actual priority of the FOP,
+         * otherwise print the priority given on the frame.
+         */
+        if (sample->fop_pri != GF_FOP_PRI_UNSPEC) {
+                pri_str = fop_pri_to_string (sample->fop_pri);
+        } else {
+                pri_str = fop_enum_to_pri_string (sample->fop_type);
+        }
+
+        ns_name = get_ns_from_hash (conf, &sample->ns_info);
+        if (!ns_name) {
+                ns_name = "Unknown";
+        } else if (strcmp (ns_name, "/") == 0) {
+                /* Give the root a readable name in the samples. */
+                ns_name = "Root";
+        }
+
         path = "Unknown";
         if (sample->have_path)
                 path = sample->path;
@@ -1592,11 +1433,11 @@ _io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample,
         }
 
         ios_log (this, logfp,
-                 "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s,%s,%d,%s",
-                 epoch_time, fop_enum_to_pri_string (sample->fop_type),
+                 "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s,%s,%s,%d,%s",
+                 epoch_time, pri_str,
                  fop_enum_to_string (sample->fop_type),
                  sample->elapsed, xlator_name, instance_name, username,
-                 group_name, hostname, port, path, op_errno, error_string);
+                 group_name, hostname, port, ns_name, path, op_errno, error_string);
         goto out;
 err:
         gf_log (this->name, GF_LOG_ERROR,
@@ -1606,6 +1447,426 @@ out:
         GF_FREE (username);
 }
 
+/**
+ * get_ns_from_hash
+ *
+ * Given a hash (produced by the namespace hashing xlator), retrieve a string
+ * that can be used as a key for a few other dictionaries to retrieve crucial
+ * info for NS throttling.
+ *
+ * @conf: Translator configuration struct.
+ * @info: Namespace hash information.
+ */
+const char *
+get_ns_from_hash (struct ios_conf *conf, const ns_info_t *info)
+{
+        char  ns_key[DICT_UINT32_KEY_SIZE];
+        char *ns_name;
+        int   ret;
+
+        /* Handle the special case hashes. */
+        if (info->found == _gf_false) {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG, "NS > H: NO HASH.");
+                return NULL;
+        }
+
+        dict_uint32_to_key (info->hash, ns_key);
+        ret = dict_get_str (conf->hash_to_ns, ns_key, &ns_name);
+
+        if (ret == 0) {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG,
+                        "NS > H: %u -> %s.",
+                        info->hash, ns_name);
+                return ns_name;
+        } else {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG,
+                        "NS > H: HASH %u NOT FOUND.",
+                        info->hash);
+                return NULL;
+        }
+}
+
+/**
+ * get_fop_hits_for_ns
+ *
+ * Given a namespace, returns the ios_fop_hits_t structure tracking
+ * fop averages for this namespace.
+ *
+ * @this: Pointer to the io-stats xlator.
+ * @ns_name: Namespace we want to find the fop_hits structure for.
+ */
+ios_fop_hits_t *
+get_fop_hits_for_ns (const xlator_t *this, const char *ns_name)
+{
+        struct ios_conf *conf = this->private;
+        data_t          *fop_hits_dict_entry_data;
+        ios_fop_hits_t  *fop_hits;
+
+        fop_hits_dict_entry_data = dict_get (conf->fop_hits_dict, (char *)ns_name);
+
+        if (fop_hits_dict_entry_data) {
+                fop_hits = (ios_fop_hits_t *)fop_hits_dict_entry_data->data;
+
+                if (fop_hits) {
+                        return fop_hits;
+                }
+        }
+        return NULL;
+}
+
+/**
+ * get_fop_rate_for_ns
+ *
+ * Returns the currently computed fop rate for the given namespace.
+ *
+ * @this: Pointer to the io-stats xlator.
+ * @ns_name: Namespace we want to find the fop rate for.
+ */
+double
+get_fop_rate_for_ns (const xlator_t *this, const char *ns_name)
+{
+        ios_fop_hits_t *fop_hits = get_fop_hits_for_ns (this, ns_name);
+        return fop_hits ? fop_hits->avg_fops : -1;
+}
+
+int64_t get_allowed_fop_rate_for_ns (struct ios_conf *conf, const char *ns_name)
+{
+        int64_t rate;
+
+        if (dict_get_int64 (conf->throttling_rates,
+                            (char *)ns_name, &rate) == 0) {
+                return rate;
+        } else {
+                return -1;
+        }
+}
+
+gf_boolean_t
+should_throttle_fop (call_frame_t *frame, xlator_t *this)
+{
+        struct ios_conf *conf = this->private;
+        const char      *ns_name = NULL;
+        double          fop_rate = 0;
+        int64_t         allowed_fop_rate = 0;
+        int             ret = 0;
+
+        /*
+         * If throttling is not enabled, of course, do not throttle.
+         */
+        if (!conf->throttling_enabled) {
+                goto no_throttle;
+        }
+
+        /*
+         * If no throttling rates are defined, default to no throttling.
+         */
+        if (!conf->throttling_rates) {
+                goto no_throttle;
+        }
+
+        /*
+         * Attempt to find the namespace directory name from the id, and if
+         * cannot find it, default to no throttling.
+         */
+        ns_name = get_ns_from_hash (conf, &frame->root->ns_info);
+        if (!ns_name) {
+                goto no_throttle;
+        }
+
+        /*
+         * Attempt to find the current fop rate for the directory.
+         * If for some reason we cannot find it, default to no throttling.
+         */
+        fop_rate = get_fop_rate_for_ns (this, ns_name);
+
+        if (fop_rate < 0) {
+                goto no_throttle;
+        }
+
+        allowed_fop_rate = get_allowed_fop_rate_for_ns (conf, ns_name);
+
+        /*
+         * If we can't find an allowed fop rate for the namespace, or if it
+         * is set to (-1), then default to no throttling.
+         */
+        if (allowed_fop_rate < 0) {
+                goto no_throttle;
+        }
+
+        /*
+         * Finally, if the share has exceeded the fop rate,
+         * we should throttle it.
+         */
+        if (fop_rate <= allowed_fop_rate) {
+                goto no_throttle;
+        }
+
+        /* If we are here after all of that, finally throttle the namespace. */
+        gf_log (this->name, GF_LOG_DEBUG,
+                "%f > %ld | Throttling FOP to namespace '%s'",
+                fop_rate, allowed_fop_rate, ns_name);
+        return _gf_true;
+
+no_throttle:
+        return _gf_false;
+}
+
+static inline ios_fop_hits_t *ios_fop_hits_create ()
+{
+        return GF_CALLOC (1, sizeof (ios_fop_hits_t), gf_io_stats_mt_ios_sample_buf);
+}
+
+int
+io_stats_process_ios_sample (const xlator_t *this, const ios_sample_t *sample)
+{
+        struct ios_conf *conf     = this->private;
+        ios_fop_hits_t  *fop_hits = NULL;
+        const char      *ns_name  = NULL;
+        int              ret      = 0;
+        data_t          *fop_hits_dict_entry_data = NULL;
+
+        if (!conf) {
+                ret = -1;
+                goto out;
+        }
+
+        /* Silently skip if ns_info isn't initialized yet. Sometimes samples
+         * get dumped before the dict can be enabled, and while it's not bad,
+         * it's quite noisy on the brick logs. */
+        if (!conf->measure_namespace_rates || !conf->fop_hits_dict) {
+                return 0;
+        }
+
+        ns_name = get_ns_from_hash (conf, &sample->ns_info);
+        if (!ns_name) {
+                /* It is still useful to collect statistics on "unknown" samples
+                 * so we know how many FOPs are untagged by the namespace xlator. */
+                ns_name = "unknown";
+        }
+
+        fop_hits_dict_entry_data = dict_get (conf->fop_hits_dict,
+                                             (char *)ns_name);
+
+        if (!fop_hits_dict_entry_data) {
+                /*
+                 *  If no FOP hits are detected for a namespace, lets create
+                 *  one and place it in the dictionary
+                 */
+                fop_hits = ios_fop_hits_create ();
+                fop_hits_dict_entry_data = bin_to_data (fop_hits,
+                                                        sizeof (*fop_hits));
+                ret = dict_set (conf->fop_hits_dict, (char *)ns_name,
+                                fop_hits_dict_entry_data);
+                if (ret != 0) {
+                        goto out;
+                }
+        }
+
+        /*
+         * Let's now take the fop_hits and increment the appropriate counter.
+         */
+        fop_hits = (ios_fop_hits_t *)fop_hits_dict_entry_data->data;
+        fop_hits->hits[fop_hits->idx]++;
+        fop_hits->elapsed[fop_hits->idx] += sample->elapsed;
+
+out:
+        return ret;
+}
+
+int
+io_stats_compute_sma_for_ns (struct ios_dump_args *args, const char *ns_name,
+                                        ios_fop_hits_t *fop_hits, dict_t *dict, uint64_t idx)
+{
+        xlator_t        *this = args->this;
+        struct ios_conf *conf = this->private;
+        int              ret  = -1;
+        int              hit_idx, i;
+        int              num_samples = 0;
+        double           avg_fops = 0;
+        double           elapsed = 0;
+        double           avg_latency = 0;
+        char            *key_prefix = NULL;
+
+        GF_VALIDATE_OR_GOTO (this->name, this, out);
+        GF_VALIDATE_OR_GOTO (this->name, ns_name, out);
+        GF_VALIDATE_OR_GOTO (this->name, fop_hits, out);
+
+        ret = _io_stats_get_key_prefix (this, &key_prefix);
+        if (ret) {
+                goto out;
+        }
+
+        /* For each i'th window before our current sliding index (e.g. the
+         * 0th window, 1st window, 2nd window preceding the current index),
+         * we want to try to sum the counted hits */
+        for (i = 0; i < conf->ns_rate_window; i++) {
+                /* This gets the window at `idx - i` and wraps correctly around
+                 * the FOP_HITS_SIZE-sized array. */
+                hit_idx = (fop_hits->idx - i + FOP_HITS_SIZE) % FOP_HITS_SIZE;
+                gf_log (this->name, GF_LOG_DEBUG, "%s.hits[%d] = %lu * %d",
+                        ns_name, hit_idx, fop_hits->hits[hit_idx],
+                        conf->ios_sample_interval);
+                gf_log (this->name, GF_LOG_DEBUG, "%s.latency[%d] = %lf",
+                        ns_name, hit_idx, fop_hits->elapsed[hit_idx]);
+                /* Accumulate the number of samples that we've seen. */
+                num_samples += fop_hits->hits[hit_idx];
+                /* We add the total elapsed time for all samples in our window,
+                 * which we will divide later by the total number of processed
+                 * samples. */
+                elapsed += fop_hits->elapsed[hit_idx];
+        }
+
+        /* The number of fops is given by the samples multiplied by the sample
+         * interval, divided by the total length in time of the window. */
+        avg_fops = (double)num_samples * conf->ios_sample_interval /
+                       ((double)conf->ns_rate_window * conf->ios_dump_interval);
+        gf_log (this->name, GF_LOG_DEBUG,
+                "Average fop rate for %s = %lf / sec", ns_name, avg_fops);
+
+        /* The average latency is given by the total elapsed time for all of
+         * the samples divided by the total number of samples. If we have 0
+         * samples, just return 0. */
+        avg_latency = (num_samples > 0) ? (elapsed / (double)num_samples) : 0;
+        gf_log (this->name, GF_LOG_DEBUG,
+                "Average fop latency for %s = %lf usec / fop", ns_name, avg_latency);
+
+        if (strcmp ("/", ns_name) == 0) {
+                /* Give the root namespace a readable name. */
+                ns_name = "root";
+        }
+
+        /* Include a ',' unless we're at the end of the dict. */
+        ios_log (this, args->u.logfp,
+                "\"%s.namespaces.%s.fop_rate\": \"%lf\",",
+                key_prefix, ns_name, avg_fops);
+        ios_log (this, args->u.logfp,
+                "\"%s.namespaces.%s.avg_fop_latency\": \"%lf\"%s",
+                key_prefix, ns_name, avg_latency, (idx == dict->count - 1) ? "" : ",");
+
+        fop_hits->avg_fops = avg_fops;
+        fop_hits->avg_latency = avg_latency;
+        ret = 0;
+out:
+        if (key_prefix) {
+                GF_FREE (key_prefix);
+        }
+
+        return ret;
+}
+
+int
+__io_stats_compute_moving_average_foreach (dict_t *this, char *key,
+                                                data_t *value, void *data, uint64_t idx)
+{
+        struct ios_dump_args *args = data;
+        xlator_t *x_this = args->this;
+        if (key && value) {
+                gf_log (x_this->name, GF_LOG_TRACE, "Will compute averages for %s", key);
+                ios_fop_hits_t *fop_hits = (ios_fop_hits_t *)value->data;
+                io_stats_compute_sma_for_ns (args, key, fop_hits, this, idx);
+        }
+        return 0;
+}
+
+int
+ios_compute_moving_average (struct ios_dump_args *args)
+{
+        xlator_t *this = args->this;
+        struct ios_conf *conf = this->private;
+
+        if (!conf->measure_namespace_rates) {
+                return 0;
+        }
+
+        if (!conf->fop_hits_dict) {
+                return 0;
+        }
+
+        if (conf->fop_hits_dict->count > 0) {
+                ios_log (args->this, args->u.logfp, "{");
+                gf_log (this->name, GF_LOG_TRACE, __func__);
+                dict_foreach_with_idx (conf->fop_hits_dict, __io_stats_compute_moving_average_foreach, args);
+                ios_log (args->this, args->u.logfp, "}");
+        }
+
+        return 0;
+}
+
+
+int
+io_stats_compute_ema_for_dir (xlator_t *this, const char *dirname,
+                                        ios_fop_hits_t *fop_hits)
+{
+        return 0;
+}
+
+int
+__io_stats_bump_ctr_foreach (dict_t *this, char *key, data_t *value, void *data)
+{
+        xlator_t       *x_this   = (xlator_t *)data;
+        ios_fop_hits_t *fop_hits = (ios_fop_hits_t *)value->data;
+
+        /* Increment counter (and wrap), then reset hits for that index. */
+        fop_hits->idx = (fop_hits->idx + 1) % FOP_HITS_SIZE;
+        fop_hits->hits[fop_hits->idx] = 0;
+        fop_hits->elapsed[fop_hits->idx] = 0;
+
+        gf_log (x_this->name, GF_LOG_TRACE, "Moved counters for %s to %ld",
+                key, fop_hits->idx);
+
+        return 0;
+}
+
+void
+io_stats_bump_ctrs (xlator_t *this)
+{
+        struct ios_conf *conf = this->private;
+
+        if (!conf->measure_namespace_rates) {
+                return;
+        }
+
+        gf_log (this->name, GF_LOG_TRACE, __func__);
+        dict_foreach (conf->fop_hits_dict, __io_stats_bump_ctr_foreach,
+                        this);
+}
+
+int
+__io_stats_destroy_fop_hits_dict_foreach (dict_t *this, char *key,
+                                                data_t *value, void *data)
+{
+        if (key && value) {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG, "Will destroy values for %s", key);
+                GF_FREE (value->data);
+                value->data = NULL;
+        }
+        return 0;
+}
+
+static inline void
+io_stats_destroy_fop_hits_dict (dict_t **fop_hits_dict)
+{
+        if (!fop_hits_dict) return;
+
+        /**
+         * Loop through each of the entries in the dict and destroy
+         * allocated values.
+         */
+        dict_foreach (*fop_hits_dict, __io_stats_destroy_fop_hits_dict_foreach, NULL);
+        dict_destroy (*fop_hits_dict);
+        *fop_hits_dict = NULL;
+}
+
+void
+io_stats_dump_foprate_stats (struct ios_dump_args *args)
+{
+        /* Snapshot a moving average for this interval */
+        ios_compute_moving_average (args);
+
+        /* Bump some counters */
+        io_stats_bump_ctrs (args->this);
+}
+
 /*
  * Takes in our sample buffer and then dumps out the contents
  */
@@ -1618,36 +1879,39 @@ io_stats_dump_latency_samples_logfp (xlator_t *this, FILE* logfp,
 
         /* Empty case, nothing to do, exit. */
         if (sample_buf == NULL || sample_buf->collected == 0) {
-                gf_log (this->name, GF_LOG_DEBUG,
+                gf_log (this->name, GF_LOG_TRACE,
                         "No samples, dump not required.");
-                ret = 0;
+                ret =0;
                 goto out;
         }
 
-        /* Wrap-around case, dump from pos to sample_buf->size -1
-         * and then from 0 to sample_buf->pos (covered off by
-         * "simple case")
-         */
-        if (sample_buf->collected > sample_buf->pos + 1) {
+        /* If we've collected more samples than the buffer can hold, then we
+         * should dump the whole buffer, starting from the oldest entries which
+         * begin at sample_buf->pos. */
+        if (sample_buf->collected > sample_buf->size) {
                 for (i = sample_buf->pos; i < sample_buf->size; i++) {
-                        _io_stats_write_latency_sample (this,
-                                &(sample_buf->ios_samples[i]), logfp);
+                        ios_sample_t *sample = &(sample_buf->ios_samples[i]);
+                        io_stats_process_ios_sample (this, sample);
+                        _io_stats_write_latency_sample (this, sample, logfp);
                 }
         }
 
-        /* Simple case: Dump from 0 to sample_buf->pos */
+        /* Simple case: Dump from 0 to sample_buf->pos. These are the newest
+         * entries if we have wrapped around our buffer, as well. */
         for (i = 0; i < sample_buf->pos; i++) {
-                _io_stats_write_latency_sample (this,
-                        &(sample_buf->ios_samples[i]), logfp);
+                ios_sample_t *sample = &(sample_buf->ios_samples[i]);
+                io_stats_process_ios_sample (this, sample);
+                _io_stats_write_latency_sample (this, sample, logfp);
         }
 
 out:
         return ret;
 }
 
+
 int
 io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
-                               struct timeval *now, int interval, FILE *logfp)
+                               struct timeval *now, int interval, FILE* logfp)
 {
         int                   i = 0;
         int                   per_line = 0;
@@ -1789,15 +2053,13 @@ io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
                 ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
                          "\tFILE NAME");
                 list_head = &conf->thru_list[IOS_STATS_THRU_READ];
-                ios_dump_throughput_stats(list_head, this, logfp,
-                                          IOS_STATS_THRU_READ);
+                ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_TYPE_READ);
 
                 ios_log (this, logfp, "\n======Write Throughput File Stats======");
                 ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
                          "\tFILE NAME");
                 list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
-                ios_dump_throughput_stats (list_head, this, logfp,
-                                           IOS_STATS_THRU_WRITE);
+                ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_TYPE_WRITE);
         }
         return 0;
 }
@@ -2001,58 +2263,6 @@ ios_global_stats_clear (struct ios_global_stats *stats, struct timeval *now)
         stats->started_at = *now;
 }
 
-static int io_stats_dump_quorum (xlator_t *this, struct ios_dump_args *args) {
-        FILE *logf = args->u.logfp;
-        loc_t root_loc = {0};
-        dict_t *dict = NULL;
-        xlator_list_t *child = NULL;
-        const char *leading_comma = "";
-
-        if (args->type != IOS_DUMP_TYPE_JSON_FILE) {
-                        return -EINVAL;
-        }
-
-        if (!this->itable->root) {
-                return -ENOENT;
-        }
-
-        // If we don't build a valid 'loc', dht_getxattr swallows our request
-        // instead of passing it down to AFR.
-        root_loc.path = "/";
-        root_loc.name = "";
-        root_loc.inode = inode_ref (this->itable->root);
-        gf_uuid_copy (root_loc.gfid, root_loc.inode->gfid);
-
-        ios_log (this, logf, "{");
-
-        for (child = this->children; child; child = child->next) {
-                dict = NULL;
-
-                syncop_getxattr (child->xlator, &root_loc, &dict,
-                                 GF_AFR_QUORUM_CHECK, NULL, NULL);
-
-                if (dict) {
-                        const data_pair_t *e;
-
-                        dict_for_each (dict, e) {
-                                ios_log (this, logf,
-                                         "%s\"storage.gluster.nfsd.%s\": \"%d\"",
-                                         leading_comma,
-                                         e->key, data_to_int32 (e->value));
-                                leading_comma = ",";
-                        }
-
-                        dict_unref (dict);
-                }
-        }
-
-        ios_log (this, logf, "}");
-
-        inode_unref (root_loc.inode);
-
-        return 0;
-}
-
 int
 io_stats_dump (xlator_t *this, ios_sample_buf_t *sample_buf,
                struct ios_dump_args *args, gf1_cli_info_op op,
@@ -2093,19 +2303,15 @@ io_stats_dump (xlator_t *this, ios_sample_buf_t *sample_buf,
         }
         UNLOCK (&conf->lock);
 
-        if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_CUMULATIVE) {
+        if (op == GF_CLI_INFO_ALL ||
+            op == GF_CLI_INFO_CUMULATIVE)
                 io_stats_dump_global (this, &cumulative, sample_buf, &now,
                                       -1, args);
-        }
 
-        if (op == GF_CLI_INFO_ALL || op == GF_CLI_INFO_INCREMENTAL) {
+        if (op == GF_CLI_INFO_ALL ||
+            op == GF_CLI_INFO_INCREMENTAL)
                 io_stats_dump_global (this, &incremental, sample_buf, &now,
                                       increment, args);
-        }
-
-        if (conf->iamnfsd) {
-                io_stats_dump_quorum (this, args);
-        }
 
         return 0;
 }
@@ -2176,6 +2382,50 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
         return 0;
 }
 
+gf_boolean_t _should_sample (struct ios_conf *conf, glusterfs_fop_t fop_type, ios_sample_buf_t* ios_sample_buf,
+                             int32_t op_ret, int32_t op_errno)
+{
+        int i;
+
+        /* If sampling is disabled, return false */
+        if (conf->ios_sample_interval == 0)
+                return _gf_false;
+
+        /* Sometimes it's useful to sample errors. If `fop-sample-all-errors`
+         * is active, then we should sample ALL errors. */
+        if (op_ret < 0 && op_errno != 0 && conf->sample_all_errors) {
+                return _gf_true;
+        }
+
+        /* If `fop-sample-hard-errors` is active, we only look through a small
+         * subset of errno values to sample, those which are critical to Gluster
+         * functioning. */
+        if (op_ret < 0 && op_errno != 0 && conf->sample_hard_errors) {
+                for (i = 0; i < IOS_HARD_ERROR_LIST_SIZE; i++) {
+                        if (abs (op_errno) == ios_hard_error_list[i]) {
+                                return _gf_true;
+                        }
+                }
+        }
+
+        /* If auditing is on, sample TRUNCATE, CREATE, UNLINK, RMDIR, MKDIR 1:1 */
+        if (conf->audit_creates_and_unlinks) {
+                switch (fop_type) {
+                        case GF_FOP_TRUNCATE:
+                        case GF_FOP_CREATE:
+                        case GF_FOP_UNLINK:
+                        case GF_FOP_MKDIR:
+                        case GF_FOP_RMDIR:
+                                return _gf_true;
+                        default:
+                                break;
+                }
+        }
+
+        /* Sample only 1 out of ios_sample_interval number of fops. */
+        return (ios_sample_buf->observed % conf->ios_sample_interval == 0);
+}
+
 void ios_local_get_inode (struct ios_local *local, inode_t **inode)
 {
         if (!local)
@@ -2215,8 +2465,7 @@ void ios_local_get_path (call_frame_t *frame, const char **path)
                  */
                 ios_inode_ctx_get (inode, frame->this, &iosstat);
                 if (iosstat) {
-                        gf_log ("io-stats", GF_LOG_DEBUG,
-                                "[%s] Getting path from iostat struct",
+                        gf_log ("io-stats", GF_LOG_DEBUG, "[%s] Getting path from iostat struct",
                                 fop_enum_to_string (frame->op));
                         *path = iosstat->filename;
                         goto out;
@@ -2228,8 +2477,7 @@ void ios_local_get_path (call_frame_t *frame, const char **path)
          * inside the local.
          */
         if (local->loc.path) {
-                gf_log ("io-stats", GF_LOG_DEBUG,
-                        "[%s] Getting path from loc_t",
+                gf_log ("io-stats", GF_LOG_DEBUG, "[%s] Getting path from loc_t",
                         fop_enum_to_string (frame->op));
                 *path = local->loc.path;
                 goto out;
@@ -2239,60 +2487,13 @@ out:
         /* If the inode and the loc don't have the path, we're out of luck.
          */
         if (!*path) {
-                gf_log ("io-stats", GF_LOG_DEBUG,
-                        "Unable to get path for fop: %s",
+                gf_log ("io-stats", GF_LOG_DEBUG, "Unable to get path for fop: %s",
                         fop_enum_to_string (frame->op));
         }
 
         return;
 }
 
-gf_boolean_t
-_should_sample (struct ios_conf *conf, glusterfs_fop_t fop_type,
-                ios_sample_buf_t* ios_sample_buf, int32_t op_ret,
-                int32_t op_errno)
-{
-        int i;
-
-        /* If sampling is disabled, return false */
-        if (conf->ios_sample_interval == 0)
-                return _gf_false;
-
-        /* Sometimes it's useful to sample errors. If `fop-sample-all-errors`
-         * is active, then we should sample ALL errors. */
-        if (op_ret < 0 && op_errno != 0 && conf->sample_all_errors) {
-                return _gf_true;
-        }
-
-        /* If `fop-sample-hard-errors` is active, we only look through a small
-         * subset of errno values to sample, those which are critical to Gluster
-         * functioning. */
-        if (op_ret < 0 && op_errno != 0 && conf->sample_hard_errors) {
-                for (i = 0; i < IOS_HARD_ERROR_LIST_SIZE; i++) {
-                        if (abs (op_errno) == ios_hard_error_list[i]) {
-                                return _gf_true;
-                        }
-                }
-        }
-
-        /* If auditing is on, sample TRUNCATE, CREATE, UNLINK, RMDIR, MKDIR 1:1 */
-        if (conf->audit_creates_and_unlinks) {
-                switch (fop_type) {
-                        case GF_FOP_TRUNCATE:
-                        case GF_FOP_CREATE:
-                        case GF_FOP_UNLINK:
-                        case GF_FOP_MKDIR:
-                        case GF_FOP_RMDIR:
-                                return _gf_true;
-                        default:
-                                break;
-                }
-        }
-
-        /* Sample only 1 out of ios_sample_interval number of fops. */
-        return (ios_sample_buf->observed % conf->ios_sample_interval == 0);
-}
-
 void collect_ios_latency_sample (struct ios_conf *conf,
                 glusterfs_fop_t fop_type, double elapsed,
                 call_frame_t *frame, int32_t op_ret, int32_t op_errno)
@@ -2304,12 +2505,11 @@ void collect_ios_latency_sample (struct ios_conf *conf,
         call_stack_t     *root = NULL;
         const char       *path = NULL;
 
-
         ios_sample_buf = conf->ios_sample_buf;
         LOCK (&conf->ios_sampling_lock);
-        if (!_should_sample (conf, fop_type, ios_sample_buf, op_ret, op_errno)) {
+
+        if (!_should_sample (conf, fop_type, ios_sample_buf, op_ret, op_errno))
                 goto out;
-        }
 
         timestamp = &frame->begin;
         root = frame->root;
@@ -2317,54 +2517,44 @@ void collect_ios_latency_sample (struct ios_conf *conf,
         ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]);
         ios_sample->elapsed = elapsed;
         ios_sample->fop_type = fop_type;
-        ios_sample->op_ret = op_ret;
-        ios_sample->op_errno = op_errno;
-        ios_sample->uid = root->uid;
-        ios_sample->gid = root->gid;
-        (ios_sample->timestamp).tv_sec = timestamp->tv_sec;
-        (ios_sample->timestamp).tv_usec = timestamp->tv_usec;
-        memcpy (&ios_sample->identifier, &root->identifier,
-                sizeof (root->identifier));
 
-        /* Eventually every FOP will be supported
-         * (i.e., the frame->local will be
+        /* Eventually every FOP will be supported (i.e., the frame->local will be
          * of type struct ios_local), but for now, this is a safety.
          */
         switch (ios_sample->fop_type) {
-
-        case GF_FOP_CREATE:
-        case GF_FOP_OPEN:
-        case GF_FOP_STAT:
-        case GF_FOP_FSTAT:
-        case GF_FOP_READ:
-        case GF_FOP_WRITE:
-        case GF_FOP_OPENDIR:
-        case GF_FOP_READDIRP:
-        case GF_FOP_READDIR:
-        case GF_FOP_FLUSH:
-        case GF_FOP_ACCESS:
-        case GF_FOP_UNLINK:
-        case GF_FOP_TRUNCATE:
-        case GF_FOP_MKDIR:
-        case GF_FOP_RMDIR:
-        case GF_FOP_SETATTR:
-        case GF_FOP_LOOKUP:
-        case GF_FOP_INODELK:
-        case GF_FOP_FINODELK:
-        case GF_FOP_ENTRYLK:
-        case GF_FOP_FXATTROP:
-        case GF_FOP_XATTROP:
-        case GF_FOP_GETXATTR:
-        case GF_FOP_FGETXATTR:
-        case GF_FOP_SETXATTR:
-        case GF_FOP_FSETXATTR:
-        case GF_FOP_STATFS:
-        case GF_FOP_FSYNC:
-                ios_local_get_path (frame, &path);
-                break;
-        default:
-                path = NULL;
-                break;
+                case GF_FOP_CREATE:
+                case GF_FOP_OPEN:
+                case GF_FOP_STAT:
+                case GF_FOP_FSTAT:
+                case GF_FOP_READ:
+                case GF_FOP_WRITE:
+                case GF_FOP_OPENDIR:
+                case GF_FOP_READDIRP:
+                case GF_FOP_READDIR:
+                case GF_FOP_FLUSH:
+                case GF_FOP_ACCESS:
+                case GF_FOP_UNLINK:
+                case GF_FOP_TRUNCATE:
+                case GF_FOP_MKDIR:
+                case GF_FOP_RMDIR:
+                case GF_FOP_SETATTR:
+                case GF_FOP_LOOKUP:
+                case GF_FOP_INODELK:
+                case GF_FOP_FINODELK:
+                case GF_FOP_ENTRYLK:
+                case GF_FOP_FXATTROP:
+                case GF_FOP_XATTROP:
+                case GF_FOP_GETXATTR:
+                case GF_FOP_FGETXATTR:
+                case GF_FOP_SETXATTR:
+                case GF_FOP_FSETXATTR:
+                case GF_FOP_STATFS:
+                case GF_FOP_FSYNC:
+                        ios_local_get_path (frame, &path);
+                        break;
+                default:
+                        path = NULL;
+                        break;
         }
 
         if (path) {
@@ -2372,12 +2562,19 @@ void collect_ios_latency_sample (struct ios_conf *conf,
                 ios_sample->have_path = _gf_true;
         }
 
-        /* We've reached the end of the circular buffer, start from the
-         * beginning. */
-        if (ios_sample_buf->pos == (ios_sample_buf->size - 1))
-                ios_sample_buf->pos = 0;
-        else
-                ios_sample_buf->pos++;
+        ios_sample->op_ret = op_ret;
+        ios_sample->op_errno = op_errno;
+        ios_sample->fop_pri = frame->pri;
+        ios_sample->uid = root->uid;
+        ios_sample->gid = root->gid;
+        ios_sample->ns_info = frame->root->ns_info;
+        (ios_sample->timestamp).tv_sec = timestamp->tv_sec;
+        (ios_sample->timestamp).tv_usec = timestamp->tv_usec;
+        memcpy (&ios_sample->identifier, &root->identifier, sizeof (root->identifier));
+
+        /* Increment, making sure that if we've reached the end of the circular
+         * buffer, then start from the beginning (modulo). */
+        ios_sample_buf->pos = (ios_sample_buf->pos + 1) % ios_sample_buf->size;
         ios_sample_buf->collected++;
 out:
         ios_sample_buf->observed++;
@@ -2547,8 +2744,7 @@ unlock_list_head:
 }
 
 static int
-attach_iosstat_to_inode (xlator_t *this, inode_t *inode, const char *path,
-                                const uuid_t gfid) {
+attach_iosstat_to_inode (xlator_t *this, inode_t *inode, const char *path, const uuid_t gfid) {
         struct   ios_stat *iosstat = NULL;
 
         if (!inode) {
@@ -2557,21 +2753,22 @@ attach_iosstat_to_inode (xlator_t *this, inode_t *inode, const char *path,
 
         ios_inode_ctx_get (inode, this, &iosstat);
         if (!iosstat) {
-                iosstat = GF_CALLOC (1, sizeof (*iosstat),
-                                        gf_io_stats_mt_ios_stat);
+                iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
                 if (!iosstat) {
                         return -ENOMEM;
                 }
                 iosstat->filename = gf_strdup (path);
                 gf_uuid_copy (iosstat->gfid, gfid);
                 LOCK_INIT (&iosstat->lock);
+#if OFFSET_WRITE_DETECTOR
+                iosstat->expected_write_offset = UNKNOWN_WRITE_OFFSET;
+#endif
                 ios_inode_ctx_set (inode, this, iosstat);
         }
 
         return 0;
 }
 
-
 int
 ios_build_fd (xlator_t *this, const char *path, fd_t *fd, struct ios_fd **iosfd)
 {
@@ -2613,7 +2810,6 @@ free_and_out:
         return ret;
 }
 
-
 int
 io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                      int32_t op_ret, int32_t op_errno, fd_t *fd,
@@ -2650,12 +2846,12 @@ io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         }
         UNLOCK (&conf->lock);
 
-        attach_iosstat_to_inode (this, local->loc.inode, local->loc.path,
-                                        buf->ia_gfid);
-
+        attach_iosstat_to_inode (this, local->loc.inode, local->loc.path, buf->ia_gfid);
 unwind:
         UPDATE_PROFILE_STATS (frame, CREATE, op_ret, op_errno);
+
         ios_free_local (frame);
+
         STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
                              preparent, postparent, xdata);
         return 0;
@@ -2681,6 +2877,7 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         }
 
         conf = this->private;
+
         ios_build_fd (this, local->loc.path, fd, &iosfd);
         if (!iosfd) {
                 goto unwind;
@@ -2701,16 +2898,15 @@ io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
               BUMP_STATS (iosstat, IOS_STATS_TYPE_OPEN);
         }
 
-        attach_iosstat_to_inode (this, local->loc.inode,
-                                        local->loc.path,
-                                        local->loc.inode->gfid);
-
+        attach_iosstat_to_inode (this, local->loc.inode, local->loc.path, local->loc.inode->gfid);
 unwind:
         UPDATE_PROFILE_STATS (frame, OPEN, op_ret, op_errno);
+
         ios_free_local (frame);
+
         STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
-        return 0;
 
+        return 0;
 }
 
 
@@ -2719,7 +2915,9 @@ io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                    int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, STAT, op_ret, op_errno);
+
         ios_free_local (frame);
+
         STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
         return 0;
 }
@@ -2750,7 +2948,6 @@ io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         if (iosstat) {
                 BUMP_STATS (iosstat, IOS_STATS_TYPE_READ);
                 BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_READ);
-
         }
 
 unwind:
@@ -2778,39 +2975,34 @@ io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         UPDATE_PROFILE_STATS (frame, WRITE, op_ret, op_errno);
 
         ios_inode_ctx_get (local->inode, this, &iosstat);
-
         if (iosstat) {
                 BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
                 BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
         }
+
 unwind:
         ios_free_local (frame);
         STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
         return 0;
-
 }
 
-
-
-
 int
 io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                        int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
 {
+        struct ios_local *local  = NULL;
         struct ios_stat *iosstat = NULL;
-        inode_t         *inode   = frame->local;
 
-        frame->local = NULL;
+        local = frame->local;
 
         UPDATE_PROFILE_STATS (frame, READDIRP, op_ret, op_errno);
 
-        ios_inode_ctx_get (inode, this, &iosstat);
-
+        ios_inode_ctx_get (local->inode, this, &iosstat);
         if (iosstat) {
-              BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
-               iosstat = NULL;
+                BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
         }
 
+        ios_free_local (frame);
         STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
         return 0;
 }
@@ -2829,7 +3021,6 @@ io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         ios_free_local (frame);
 
-        UPDATE_PROFILE_STATS (frame, READDIR, op_ret, op_errno);
         STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
         return 0;
 }
@@ -2842,8 +3033,7 @@ io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 {
         UPDATE_PROFILE_STATS (frame, FSYNC, op_ret, op_errno);
         ios_free_local (frame);
-        STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
-                                xdata);
+        STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
         return 0;
 }
 
@@ -2894,7 +3084,6 @@ io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                        struct iatt *sbuf, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, READLINK, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
         return 0;
 }
@@ -2907,11 +3096,10 @@ io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                      dict_t *xdata, struct iatt *postparent)
 {
         struct ios_local *local = frame->local;
-
         if (local && local->loc.path && inode && op_ret >= 0) {
-                attach_iosstat_to_inode (this, inode, local->loc.path,
-                                                inode->gfid);
+                attach_iosstat_to_inode (this, inode, local->loc.path, inode->gfid);
         }
+
         UPDATE_PROFILE_STATS (frame, LOOKUP, op_ret, op_errno);
         ios_free_local (frame);
         STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
@@ -2956,9 +3144,11 @@ io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         struct ios_local *local = frame->local;
 
         if (local && local->loc.path) {
-                local->inode = inode_ref (inode);
-                attach_iosstat_to_inode (this, inode, local->loc.path,
-                                                buf->ia_gfid);
+                local->inode = inode_ref (inode); /* Just for consistency's
+                                                   * sake.
+                                                   */
+
+                attach_iosstat_to_inode (this, inode, local->loc.path, buf->ia_gfid);
         }
 
         UPDATE_PROFILE_STATS (frame, MKDIR, op_ret, op_errno);
@@ -3008,14 +3198,12 @@ io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         if (op_ret < 0)
                 goto unwind;
 
-        attach_iosstat_to_inode (this, local->inode, local->loc.path,
-                                        local->inode->gfid);
+        attach_iosstat_to_inode (this, local->inode, local->loc.path, local->inode->gfid);
 
         ios_fd_ctx_set (local->fd, this, 0);
         ios_inode_ctx_get (local->fd->inode, this, &iosstat);
         if (iosstat)
                 BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
-
 unwind:
         UPDATE_PROFILE_STATS (frame, OPENDIR, op_ret, op_errno);
         ios_free_local (frame);
@@ -3029,7 +3217,6 @@ io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                     int32_t op_ret, int32_t op_errno,
                     struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
 {
-
         UPDATE_PROFILE_STATS (frame, RMDIR, op_ret, op_errno);
         ios_free_local (frame);
         STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
@@ -3121,7 +3308,6 @@ io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                            int32_t op_ret, int32_t op_errno, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, FREMOVEXATTR, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
         return 0;
 }
@@ -3132,7 +3318,6 @@ io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                        int32_t op_ret, int32_t op_errno, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, FSYNCDIR, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
         return 0;
 }
@@ -3148,15 +3333,14 @@ io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
          * in NFS. We need to make sure that we are attaching the
          * data correctly to this inode.
          */
-        if (local->loc.inode && local->loc.path) {
-                attach_iosstat_to_inode (this, local->loc.inode,
-                                                local->loc.path,
-                                                local->loc.inode->gfid);
+        if (local && local->loc.inode && local->loc.path) {
+                attach_iosstat_to_inode (this, local->loc.inode, local->loc.path, local->loc.inode->gfid);
         }
 
         UPDATE_PROFILE_STATS (frame, ACCESS, op_ret, op_errno);
         ios_free_local (frame);
         STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+
         return 0;
 }
 
@@ -3167,7 +3351,6 @@ io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, FTRUNCATE, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
                              prebuf, postbuf, xdata);
         return 0;
@@ -3191,8 +3374,7 @@ io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 		       struct iatt *postbuf, dict_t *xdata)
 {
 	UPDATE_PROFILE_STATS (frame, FALLOCATE, op_ret, op_errno);
-	ios_free_local (frame);
-        STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+	STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
 			    xdata);
 	return 0;
 }
@@ -3204,8 +3386,7 @@ io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
 		     struct iatt *postbuf, dict_t *xdata)
 {
 	UPDATE_PROFILE_STATS (frame, DISCARD, op_ret, op_errno);
-	ios_free_local (frame);
-        STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+	STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
 			    xdata);
 	return 0;
 }
@@ -3216,7 +3397,6 @@ io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
                      struct iatt *postbuf, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, ZEROFILL, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
                             xdata);
         return 0;
@@ -3227,7 +3407,6 @@ io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                  int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
 {
         UPDATE_PROFILE_STATS (frame, LK, op_ret, op_errno);
-        ios_free_local (frame);
         STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
         return 0;
 }
@@ -3285,6 +3464,8 @@ io_stats_entrylk (call_frame_t *frame, xlator_t *this,
 
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_entrylk_cbk,
                     FIRST_CHILD (this),
                     FIRST_CHILD (this)->fops->entrylk,
@@ -3301,6 +3482,8 @@ io_stats_inodelk (call_frame_t *frame, xlator_t *this,
 
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_inodelk_cbk,
                     FIRST_CHILD (this),
                     FIRST_CHILD (this)->fops->inodelk,
@@ -3325,8 +3508,11 @@ io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
                    fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_finodelk_cbk,
                     FIRST_CHILD (this),
                     FIRST_CHILD (this)->fops->finodelk,
@@ -3340,8 +3526,11 @@ io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
                   gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_xattrop_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->xattrop,
@@ -3355,8 +3544,11 @@ io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
                    gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_fxattrop_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->fxattrop,
@@ -3370,8 +3562,11 @@ io_stats_lookup (call_frame_t *frame, xlator_t *this,
                  loc_t *loc, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_lookup_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->lookup,
@@ -3384,8 +3579,11 @@ int
 io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_stat_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->stat,
@@ -3398,7 +3596,6 @@ int
 io_stats_readlink (call_frame_t *frame, xlator_t *this,
                    loc_t *loc, size_t size, dict_t *xdata)
 {
-        ios_track_loc (frame, loc);
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_readlink_cbk,
@@ -3413,9 +3610,10 @@ int
 io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
                 mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
 {
-        ios_track_loc (frame, loc);
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_mknod_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->mknod,
@@ -3429,8 +3627,11 @@ io_stats_mkdir (call_frame_t *frame, xlator_t *this,
                 loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_mkdir_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->mkdir,
@@ -3444,9 +3645,12 @@ io_stats_unlink (call_frame_t *frame, xlator_t *this,
                  loc_t *loc, int xflag, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
-        STACK_WIND (frame, io_stats_unlink_cbk,
+        FOP_THROTTLE (frame, this);
+
+        STACK_WIND_COOKIE (frame, io_stats_unlink_cbk, loc,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->unlink,
                     loc, xflag, xdata);
@@ -3459,9 +3663,12 @@ io_stats_rmdir (call_frame_t *frame, xlator_t *this,
                 loc_t *loc, int flags, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
-        STACK_WIND (frame, io_stats_rmdir_cbk,
+        FOP_THROTTLE (frame, this);
+
+        STACK_WIND_COOKIE (frame, io_stats_rmdir_cbk, loc,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->rmdir,
                     loc, flags, xdata);
@@ -3475,6 +3682,8 @@ io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
 {
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_symlink_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->symlink,
@@ -3489,6 +3698,8 @@ io_stats_rename (call_frame_t *frame, xlator_t *this,
 {
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_rename_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->rename,
@@ -3503,6 +3714,8 @@ io_stats_link (call_frame_t *frame, xlator_t *this,
 {
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_link_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->link,
@@ -3516,8 +3729,11 @@ io_stats_setattr (call_frame_t *frame, xlator_t *this,
                   loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_setattr_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->setattr,
@@ -3531,8 +3747,11 @@ io_stats_truncate (call_frame_t *frame, xlator_t *this,
                    loc_t *loc, off_t offset, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_truncate_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->truncate,
@@ -3550,6 +3769,8 @@ io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
 
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_open_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->open,
@@ -3562,13 +3783,14 @@ int
 io_stats_create (call_frame_t *frame, xlator_t *this,
                  loc_t *loc, int32_t flags, mode_t mode,
                  mode_t umask, fd_t *fd, dict_t *xdata)
-
 {
         ios_track_loc (frame, loc);
         ios_track_fd (frame, fd);
 
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_create_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->create,
@@ -3582,8 +3804,11 @@ io_stats_readv (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_readv_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->readv,
@@ -3607,13 +3832,36 @@ io_stats_writev (call_frame_t *frame, xlator_t *this,
 
         len = iov_length (vector, count);
 
+#if OFFSET_WRITE_DETECTOR
+        if (conf->iamnfsd && fd->inode) {
+                struct ios_stat *iosstat = NULL;
+
+                ios_inode_ctx_get (fd->inode, this, &iosstat);
+                if (iosstat) {
+                        int is_offset = iosstat->expected_write_offset != UNKNOWN_WRITE_OFFSET &&
+                                        iosstat->expected_write_offset != offset;
+
+                        if (is_offset) {
+                                ++conf->cumulative.offset_write_ops;
+                                ++conf->incremental.offset_write_ops;
+                        }
+                        ++conf->cumulative.total_write_ops;
+                        ++conf->incremental.total_write_ops;
+                        iosstat->expected_write_offset = offset + len;
+                }
+        }
+#endif
+
         BUMP_WRITE (fd, len);
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_writev_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->writev,
                     fd, vector, count, offset, flags, iobref, xdata);
+
         return 0;
 
 }
@@ -3624,6 +3872,7 @@ io_stats_statfs (call_frame_t *frame, xlator_t *this,
                  loc_t *loc, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_statfs_cbk,
@@ -3639,8 +3888,11 @@ io_stats_flush (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_flush_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->flush,
@@ -3654,8 +3906,11 @@ io_stats_fsync (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, int32_t flags, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_fsync_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->fsync,
@@ -3686,37 +3941,44 @@ conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
         memset (filename, 0, value->len + 1);
         memcpy (filename, data_to_str (value), value->len);
 
-        pid = getpid ();
+        if (fnmatch ("*io*stat*dump", key, 0) == 0) {
+                pid = getpid ();
 
-        if (!strncmp (filename, "", 1)) {
-                gf_log (this->name, GF_LOG_ERROR, "No filename given");
-                return -1;
-        }
-        logfp = fopen (filename, "w+");
-        if (!logfp) {
-                gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
+
+                if (!strncmp (filename, "", 1)) {
+                        gf_log (this->name, GF_LOG_ERROR, "No filename given");
+                        return -1;
+                }
+                logfp = fopen (filename, "w+");
+                if (!logfp) {
+                        gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
                                 "for writing", filename);
-                return -1;
-        }
-        sprintf (dump_key, "*io*stat*%d_json_dump", pid);
-        if (fnmatch (dump_key, key, 0) == 0) {
-                (void) ios_dump_args_init (
-                                &args, IOS_DUMP_TYPE_JSON_FILE,
-                                logfp);
-        } else {
-                (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
-                                logfp);
+                        return -1;
+                }
+                sprintf (dump_key, "*io*stat*%d_json_dump", pid);
+                if (fnmatch (dump_key, key, 0) == 0) {
+                        (void) ios_dump_args_init (
+                                        &args, IOS_DUMP_TYPE_JSON_FILE,
+                                        logfp);
+                } else {
+                        (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
+                                                   logfp);
+                }
+
+                io_stats_dump (this, NULL, &args, GF_CLI_INFO_ALL,
+                               _gf_false);
+
+                fclose (logfp);
         }
-        io_stats_dump (this, NULL, &args, GF_CLI_INFO_ALL, _gf_false);
-        fclose (logfp);
+
         return 0;
 }
 
 int
 _ios_destroy_dump_thread (struct ios_conf *conf) {
         conf->dump_thread_should_die = _gf_true;
-        if (conf->ios_dump_interval > 0) {
-                (void) pthread_cancel (conf->dump_thread);
+
+        if (conf->dump_thread) {
                 (void) pthread_join (conf->dump_thread, NULL);
         }
         return 0;
@@ -3768,60 +4030,55 @@ _ios_dump_thread (xlator_t *this) {
         struct ios_conf         *conf = NULL;
         FILE                    *stats_logfp = NULL;
         FILE                    *samples_logfp = NULL;
+        FILE                    *foprate_logfp = NULL;
         struct ios_dump_args args = {0};
         ios_sample_buf_t        *sample_buf = NULL;
         int                     i;
         int                     stats_bytes_written = 0;
         int                     samples_bytes_written = 0;
+        int                     foprate_bytes_written = 0;
         char                    stats_filename[PATH_MAX];
         char                    samples_filename[PATH_MAX];
-        char                    *xlator_name;
-        char                    *instance_name;
-        gf_boolean_t            log_stats_fopen_failure = _gf_true;
-        gf_boolean_t            log_samples_fopen_failure = _gf_true;
-        int                     old_cancel_type;
-        int                     ret;
+        char                    foprate_filename[PATH_MAX];
+        char                    *xlator_name = NULL;
+        char                    *instance_name = NULL;
+        time_t                  namespace_conf_mtime;
+        int                     ret = 0;
 
         conf = this->private;
+        xlator_name = strdupa (get_top_xlator_name (this));
+
         gf_log (this->name, GF_LOG_INFO, "IO stats dump thread started, "
                 "polling IO stats every %d seconds", conf->ios_dump_interval);
-        xlator_name = strdupa (this->name);
+
         for (i = 0; i < strlen (xlator_name); i++) {
                 if (xlator_name[i] == '/')
                         xlator_name[i] = '_';
         }
+
         instance_name = this->instance_name;
         if (conf->iamshd) {
                 xlator_name = "shd";
         } else if (conf->iamnfsd) {
+                instance_name = xlator_name;
                 xlator_name = "nfsd";
-                instance_name = strdupa (this->name);
-        } else if (conf->iamgfproxyd == _gf_true) {
+        } else if (conf->iamgfproxyd) {
+                instance_name = xlator_name;
                 xlator_name = "gfproxyd";
-                instance_name = strdupa (this->name);
-        }
-        if (sys_mkdir (_IOS_DUMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
-                if (errno != EEXIST) {
-                        gf_log (this->name, GF_LOG_ERROR,
-                                "could not create stats-dump directory %s",
-                                _IOS_DUMP_DIR);
-                        goto out;
-                }
-        }
-        if (sys_mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
-                if (errno != EEXIST) {
-                        gf_log (this->name, GF_LOG_ERROR,
-                                "could not create stats-sample directory %s",
-                                _IOS_SAMP_DIR);
-                        goto out;
-                }
         }
+
+        mkdir (_IOS_DUMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG);
+        mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG);
+
         if (instance_name) {
                 stats_bytes_written = snprintf (stats_filename, PATH_MAX,
                         "%s/%s_%s_%s.dump", _IOS_DUMP_DIR,
                         __progname, xlator_name, instance_name);
-                samples_bytes_written = snprintf (samples_filename, PATH_MAX,
-                        "%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
+                samples_bytes_written = snprintf (samples_filename,
+                        PATH_MAX, "%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
+                        __progname, xlator_name, instance_name);
+                foprate_bytes_written = snprintf (foprate_filename,
+                        PATH_MAX, "%s/%s_%s_%s_namespace_stats.dump", _IOS_DUMP_DIR,
                         __progname, xlator_name, instance_name);
         } else {
                 stats_bytes_written = snprintf (stats_filename, PATH_MAX,
@@ -3830,22 +4087,40 @@ _ios_dump_thread (xlator_t *this) {
                 samples_bytes_written = snprintf (samples_filename, PATH_MAX,
                         "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname,
                         xlator_name);
+                foprate_bytes_written = snprintf (foprate_filename, PATH_MAX,
+                        "%s/%s_%s_namespace_stats.dump", _IOS_DUMP_DIR, __progname,
+                        xlator_name);
         }
-        if ((stats_bytes_written >= PATH_MAX) ||
-            (samples_bytes_written >= PATH_MAX)) {
+        if (stats_bytes_written >= PATH_MAX ||
+            samples_bytes_written >= PATH_MAX) {
                 gf_log (this->name, GF_LOG_ERROR,
                         "Invalid path for stats dump (%s) and/or latency "
                         "samples (%s)", stats_filename, samples_filename);
                 goto out;
         }
-        while (1) {
-                if (conf->dump_thread_should_die)
+
+        gf_log (this->name, GF_LOG_INFO,
+                "Writing to files: stats - %s, samples - %s, foprate - %s",
+                stats_filename, samples_filename, foprate_filename);
+
+        while (_gf_true) {
+                if (conf->dump_thread_should_die) {
                         break;
-                (void) pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS,
-                                              &old_cancel_type);
+                }
+
                 sleep (conf->ios_dump_interval);
-                (void) pthread_setcanceltype (PTHREAD_CANCEL_DEFERRED,
-                                              &old_cancel_type);
+
+                /* First try and load the throttling conf file */
+                if (conf->measure_namespace_rates) {
+                        ret = ios_conf_load_namespace_conf (conf);
+
+                        if (ret != 0) {
+                                gf_log (GF_IO_STATS, GF_LOG_WARNING,
+                                        "Failed to re-load & parse \"%s\", error=%s.",
+                                        _IOS_NAMESPACE_CONF, strerror (ret));
+                        }
+                }
+
                 /* Swap buffers */
                 ret = _ios_swap_sample_buf (this, &sample_buf);
                 if (ret != 0) {
@@ -3854,41 +4129,45 @@ _ios_dump_thread (xlator_t *this) {
                         goto out;
                 }
 
-                /*
-                 * It's not clear whether we should reopen this each time, or
-                 * just hold it open and rewind/truncate on each iteration.
-                 * Leaving it alone for now.
-                 */
                 stats_logfp = fopen (stats_filename, "w+");
+
+                /* Dump statistics */
                 if (stats_logfp) {
-                        (void) ios_dump_args_init (&args,
-                                                   IOS_DUMP_TYPE_JSON_FILE,
-                                                   stats_logfp);
-                        io_stats_dump (this, sample_buf, &args,
-                                       GF_CLI_INFO_ALL, _gf_false);
+                        (void) ios_dump_args_init (
+                                &args, IOS_DUMP_TYPE_JSON_FILE,
+                                stats_logfp);
+                        io_stats_dump (this, sample_buf, &args, GF_CLI_INFO_ALL, _gf_false);
                         fclose (stats_logfp);
-                        log_stats_fopen_failure = _gf_true;
-                } else if (log_stats_fopen_failure) {
-                        gf_log (this->name, GF_LOG_ERROR,
-                                "could not open stats-dump file %s (%s)",
-                                stats_filename, strerror(errno));
-                        log_stats_fopen_failure = _gf_false;
+                } else {
+                        gf_log (this->name, GF_LOG_ERROR, "%s",
+                                strerror (errno));
                 }
+
+                /* Dump latency samples */
                 samples_logfp = fopen (samples_filename, "a");
                 if (samples_logfp) {
                         io_stats_dump_latency_samples_logfp (this,
-                                                             samples_logfp,
-                                                             sample_buf);
+                                samples_logfp, sample_buf);
                         fclose (samples_logfp);
-                        log_samples_fopen_failure = _gf_true;
-                } else if (log_samples_fopen_failure) {
-                        gf_log (this->name, GF_LOG_ERROR,
-                                "could not open samples-dump file %s (%s)",
-                                samples_filename, strerror(errno));
-                        log_samples_fopen_failure = _gf_false;
+                } else {
+                        gf_log (this->name, GF_LOG_ERROR, "%s",
+                                strerror (errno));
                 }
 
-                /* cleanup sample buf */
+                /* Log throttling stats only if we were asked to measure namespace rates */
+                if (conf->measure_namespace_rates) {
+                        foprate_logfp = fopen (foprate_filename, "w+");
+                        if (foprate_logfp) {
+                               (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_JSON_FILE, foprate_logfp);
+                               args.this = this;
+                               io_stats_dump_foprate_stats (&args);
+                               fclose (foprate_logfp);
+                        } else {
+                               gf_log (this->name, GF_LOG_ERROR, "%s", strerror (errno));
+                        }
+                }
+
+                // cleanup sample buf
                 ios_destroy_sample_buf (sample_buf);
         }
 out:
@@ -3896,23 +4175,11 @@ out:
         return NULL;
 }
 
-static gf_boolean_t
-match_special_xattr (dict_t *d, char *k, data_t *val, void *mdata)
-{
-        gf_boolean_t ret = _gf_false;
-        if (fnmatch ("*io*stat*dump", k, 0) == 0) {
-                ret = _gf_true;
-        }
-
-        return ret;
-}
-
 int
 io_stats_setxattr (call_frame_t *frame, xlator_t *this,
                    loc_t *loc, dict_t *dict,
                    int32_t flags, dict_t *xdata)
 {
-        int ret = 0;
         struct {
                 xlator_t     *this;
                 inode_t      *inode;
@@ -3923,13 +4190,7 @@ io_stats_setxattr (call_frame_t *frame, xlator_t *this,
         stub.inode = loc->inode;
         stub.path  = loc->path;
 
-        ret = dict_foreach_match (dict, match_special_xattr, NULL,
-                                  conditional_dump, &stub);
-        if (ret > 0) {
-                /* Setxattr was on key 'io-stat-dump', hence dump and unwind
-                 * from here */
-                goto out;
-        }
+        dict_foreach (dict, conditional_dump, &stub);
 
         ios_track_loc (frame, loc);
 
@@ -3940,10 +4201,6 @@ io_stats_setxattr (call_frame_t *frame, xlator_t *this,
                     FIRST_CHILD(this)->fops->setxattr,
                     loc, dict, flags, xdata);
         return 0;
-
-out:
-        STACK_UNWIND_STRICT (setxattr, frame, 0, 0, NULL);
-        return 0;
 }
 
 
@@ -3952,6 +4209,7 @@ io_stats_getxattr (call_frame_t *frame, xlator_t *this,
                    loc_t *loc, const char *name, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_getxattr_cbk,
@@ -3967,6 +4225,7 @@ io_stats_removexattr (call_frame_t *frame, xlator_t *this,
                       loc_t *loc, const char *name, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_removexattr_cbk,
@@ -3983,6 +4242,7 @@ io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
                     int32_t flags, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_fsetxattr_cbk,
@@ -3998,6 +4258,7 @@ io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
                     fd_t *fd, const char *name, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_fgetxattr_cbk,
@@ -4012,7 +4273,6 @@ int
 io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
                        fd_t *fd, const char *name, dict_t *xdata)
 {
-        ios_track_fd (frame, fd);
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_fremovexattr_cbk,
@@ -4027,9 +4287,13 @@ int
 io_stats_opendir (call_frame_t *frame, xlator_t *this,
                   loc_t *loc, fd_t *fd, dict_t *xdata)
 {
+        ios_track_loc (frame, loc);
+        ios_track_fd (frame, fd);
 
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_opendir_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->opendir,
@@ -4041,9 +4305,12 @@ int
 io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
                    off_t offset, dict_t *dict)
 {
-        frame->local = fd->inode;
+        ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_readdirp_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->readdirp,
@@ -4056,8 +4323,12 @@ int
 io_stats_readdir (call_frame_t *frame, xlator_t *this,
                   fd_t *fd, size_t size, off_t offset, dict_t *xdata)
 {
+        ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_readdir_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->readdir,
@@ -4072,6 +4343,8 @@ io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
 {
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_fsyncdir_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->fsyncdir,
@@ -4085,8 +4358,11 @@ io_stats_access (call_frame_t *frame, xlator_t *this,
                  loc_t *loc, int32_t mask, dict_t *xdata)
 {
         ios_track_loc (frame, loc);
+
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_access_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->access,
@@ -4101,6 +4377,8 @@ io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
 {
         START_FOP_LATENCY (frame);
 
+        FOP_THROTTLE (frame, this);
+
         STACK_WIND (frame, io_stats_ftruncate_cbk,
                     FIRST_CHILD(this),
                     FIRST_CHILD(this)->fops->ftruncate,
@@ -4128,6 +4406,7 @@ io_stats_fstat (call_frame_t *frame, xlator_t *this,
                 fd_t *fd, dict_t *xdata)
 {
         ios_track_fd (frame, fd);
+
         START_FOP_LATENCY (frame);
 
         STACK_WIND (frame, io_stats_fstat_cbk,
@@ -4144,6 +4423,8 @@ io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
 {
 	START_FOP_LATENCY(frame);
 
+        FOP_THROTTLE (frame, this);
+
 	STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
 		   FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
 		   xdata);
@@ -4411,13 +4692,73 @@ io_priv (xlator_t *this)
 }
 
 int
+init_namespace_conf (struct ios_conf *conf)
+{
+        int ret;
+
+        gf_log (GF_IO_STATS, GF_LOG_INFO,
+                "Initializing structures for namespace stats tracking.");
+
+        /* Load namespace conf for the first time. */
+        ret = ios_conf_load_namespace_conf (conf);
+        if (ret < 0) {
+                goto out;
+        }
+
+        ret = 0;
+out:
+        return ret;
+}
+
+void
+init_namespace_options (struct ios_conf *conf)
+{
+        if (!conf->iambrickd) {
+                conf->throttling_enabled = _gf_false;
+                conf->measure_namespace_rates = _gf_false;
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG,
+                        "Disabling throttling because I'm not a brick.");
+                return;
+        }
+
+        /* Can't throttle anything without measuring namespace rates */
+        /* TODO: is there a way to do this same enforcement with the namespace
+         * collection option? */
+        if (conf->throttling_enabled) {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG, "Throttling enabled.");
+                conf->measure_namespace_rates = _gf_true;
+        }
+
+        if (conf->measure_namespace_rates) {
+                if (init_namespace_conf (conf) != 0) {
+                        gf_log (GF_IO_STATS, GF_LOG_WARNING,
+                                "Disabling namespace measurements even though "
+                                "it was marked as enabled, due to failures "
+                                "during initialization!");
+                        conf->throttling_enabled = _gf_false;
+                        conf->measure_namespace_rates = _gf_false;
+                } else {
+                        gf_log (GF_IO_STATS, GF_LOG_DEBUG, "Namespace rates enabled.");
+                }
+
+                if (conf->audit_creates_and_unlinks || conf->sample_all_errors ||
+                    conf->sample_hard_errors) {
+                        gf_log (GF_IO_STATS, GF_LOG_WARNING,
+                                "Fop sample auditing or 1:1 error sampling is "
+                                "enabled, which means that namespace rates might "
+                                "be reported incorrectly in some cases!");
+                }
+        }
+}
+
+int
 reconfigure (xlator_t *this, dict_t *options)
 {
         struct ios_conf    *conf = NULL;
         int                 ret = -1;
         char               *sys_log_str = NULL;
         char               *log_format_str = NULL;
-        char               *logger_str    = NULL;
+        char               *logger_str = NULL;
         int                 sys_log_level = -1;
         char               *log_str = NULL;
         int                 log_level = -1;
@@ -4425,7 +4766,6 @@ reconfigure (xlator_t *this, dict_t *options)
         int                 logger = -1;
         uint32_t            log_buf_size = 0;
         uint32_t            log_flush_timeout = 0;
-        int32_t             old_dump_interval;
 
         if (!this || !this->private)
                 goto out;
@@ -4441,13 +4781,11 @@ reconfigure (xlator_t *this, dict_t *options)
         GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
                           options, bool, out);
 
-        old_dump_interval = conf->ios_dump_interval;
         GF_OPTION_RECONF ("ios-dump-interval", conf->ios_dump_interval, options,
                          int32, out);
-        if ((old_dump_interval <= 0) && (conf->ios_dump_interval > 0)) {
-                pthread_create (&conf->dump_thread, NULL,
-                                (void *) &_ios_dump_thread, this);
-        }
+
+         GF_OPTION_RECONF ("ns-rate-window", conf->ns_rate_window, options,
+                          int32, out);
 
         GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval,
                          options, int32, out);
@@ -4484,22 +4822,25 @@ reconfigure (xlator_t *this, dict_t *options)
                           time, out);
         gf_log_set_log_flush_timeout (log_flush_timeout);
 
-        GF_OPTION_RECONF ("fop-sample-enable-audit",
-                          conf->audit_creates_and_unlinks, options, bool, out);
+        GF_OPTION_RECONF ("fop-sample-enable-audit", conf->audit_creates_and_unlinks, options, bool, out);
+
+        GF_OPTION_RECONF ("fop-sample-all-errors", conf->sample_all_errors, options, bool, out);
+
+        GF_OPTION_RECONF ("fop-sample-hard-errors", conf->sample_hard_errors, options, bool, out);
+
+        GF_OPTION_RECONF ("dump-percentile-latencies", conf->dump_percentile_latencies, options, bool, out);
 
-        GF_OPTION_RECONF ("fop-sample-all-errors",
-                          conf->sample_all_errors, options, bool, out);
+        GF_OPTION_RECONF ("iam-gfproxy-daemon", conf->iamgfproxyd, options, bool, out);
 
-        GF_OPTION_RECONF ("fop-sample-hard-errors",
-                          conf->sample_hard_errors, options, bool, out);
+        GF_OPTION_RECONF ("fop-throttling-enable", conf->throttling_enabled, options, bool, out);
 
-        GF_OPTION_RECONF ("dump-percentile-latencies",
-                          conf->dump_percentile_latencies, options, bool, out);
+        GF_OPTION_RECONF ("measure-namespace-rates", conf->measure_namespace_rates, options, bool, out);
+
+        init_namespace_options (conf);
 
         ret = 0;
 out:
-        gf_log (this ? this->name : "io-stats",
-                        GF_LOG_DEBUG, "reconfigure returning %d", ret);
+        gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
         return ret;
 }
 
@@ -4529,10 +4870,149 @@ ios_conf_destroy (struct ios_conf *conf)
         if (!conf)
                 return;
 
+        dict_destroy (conf->hash_to_ns);
+        dict_destroy (conf->throttling_rates);
+        io_stats_destroy_fop_hits_dict (&conf->fop_hits_dict);
         ios_destroy_top_stats (conf);
         _ios_destroy_dump_thread (conf);
         LOCK_DESTROY (&conf->lock);
-        GF_FREE(conf);
+        GF_FREE (conf);
+}
+
+int
+ios_conf_parse_namespace_conf_line (char *file_line, dict_t *hash_to_ns,
+                                     dict_t *throttle_rates)
+{
+        int       ret       = 0;
+        int       scanned   = 0;
+        uint32_t  ns_hash   = 0;
+        int64_t   fop_rate  = 0;
+        char     *ns_name   = NULL;
+        char      ns_key[DICT_UINT32_KEY_SIZE];
+
+        ns_name = GF_CALLOC (strlen (file_line), sizeof (char), 0);
+
+        if (!ns_name) {
+                gf_log (GF_IO_STATS, GF_LOG_CRITICAL,
+                        "Memory allocation error!");
+                ret = ENOMEM;
+                goto out;
+        }
+
+        scanned = sscanf (file_line, "%s %ld", ns_name, &fop_rate);
+
+        if (scanned < 1 || strlen (ns_name) < 1) {
+                /* TODO(mgoulet): Log this line as skipped. */
+                goto out;
+        }
+
+        ns_hash = SuperFastHash (ns_name, strlen (ns_name));
+        dict_uint32_to_key (ns_hash, ns_key);
+        ret = dict_set_dynstr_with_alloc (hash_to_ns, ns_key, ns_name);
+
+        if (ret != 0) {
+                gf_log (GF_IO_STATS, GF_LOG_INFO,
+                        "Failed to set hash/namespace pair %u -> \'%s\'",
+                        ns_hash, ns_name);
+                goto out;
+        } else {
+                gf_log (GF_IO_STATS, GF_LOG_INFO,
+                        "Loaded namespace %u -> \'%s\'",
+                        ns_hash, ns_name);
+        }
+
+        if (scanned == 1) {
+                /* We only detected a namespace, but no throttle rate,
+                 * so let's quit now. */
+                goto out;
+        }
+
+        ret = dict_set_int64 (throttle_rates, ns_name, fop_rate);
+
+        if (ret != 0) {
+                gf_log (GF_IO_STATS, GF_LOG_INFO,
+                        "Failed to set fop rate \'%s\' -> %ld",
+                        ns_name, fop_rate);
+                goto out;
+        } else {
+                gf_log (GF_IO_STATS, GF_LOG_INFO,
+                        "Loaded fop rate \'%s\' -> %ld",
+                        ns_name, fop_rate);
+        }
+
+out:
+        if (ns_name) {
+                GF_FREE (ns_name);
+        }
+
+        return ret;
+}
+
+int
+ios_conf_load_namespace_conf (struct ios_conf *conf)
+{
+        int ret = 0;
+        FILE *fp = NULL;
+        char *line = NULL;
+        size_t len = 0;
+        time_t curr_mtime = {0};
+
+        get_file_mtime (_IOS_NAMESPACE_CONF, &curr_mtime);
+        if (conf->namespace_conf_mtime == curr_mtime) {
+                gf_log (GF_IO_STATS, GF_LOG_DEBUG,
+                        "%s has not changed, not reloading.",
+                        _IOS_NAMESPACE_CONF);
+                return 0;
+        }
+
+        if (!conf->fop_hits_dict) {
+                conf->fop_hits_dict = dict_new ();
+                if (!conf->fop_hits_dict) {
+                        ret = ENOMEM;
+                        goto done;
+                }
+        }
+
+        if (!conf->hash_to_ns) {
+                conf->hash_to_ns = dict_new ();
+                if (!conf->hash_to_ns) {
+                        ret = ENOMEM;
+                        goto done;
+                }
+        }
+
+        if (!conf->throttling_rates) {
+                conf->throttling_rates = dict_new ();
+                if (!conf->throttling_rates) {
+                        ret = ENOMEM;
+                        goto done;
+                }
+        }
+
+        fp = fopen (_IOS_NAMESPACE_CONF, "r");
+        if (!fp) {
+                ret = ENOENT;
+                goto done;
+        }
+
+        gf_log (GF_IO_STATS, GF_LOG_INFO,
+                "Reloading %s from disk.",
+                _IOS_NAMESPACE_CONF);
+
+        while (getline (&line, &len, fp) != -1) {
+                ios_conf_parse_namespace_conf_line (line, conf->hash_to_ns,
+                                                          conf->throttling_rates);
+        }
+
+        free (line);
+        conf->namespace_conf_mtime = curr_mtime;
+
+done:
+        if (fp) {
+                fclose (fp);
+        }
+
+        return ret;
 }
 
 int
@@ -4550,6 +5030,7 @@ init (xlator_t *this)
         int                 ret = -1;
         uint32_t            log_buf_size = 0;
         uint32_t            log_flush_timeout = 0;
+        glusterfs_ctx_t    *ctx = NULL;
 
         if (!this)
                 return -1;
@@ -4568,11 +5049,19 @@ init (xlator_t *this)
                         "dangling volume. check volfile ");
         }
 
+        ctx = this->ctx;
+        GF_ASSERT (ctx);
+
+        if (ctx->cmd_args.stats_instance_name) {
+                this->instance_name = ctx->cmd_args.stats_instance_name;
+        }
+
         conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
 
         if (!conf)
                 goto out;
 
+
         /*
          * Init it just after calloc, so that we are sure the lock is inited
          * in case of error paths.
@@ -4595,14 +5084,19 @@ init (xlator_t *this)
 
         GF_OPTION_INIT ("iam-gfproxy-daemon", conf->iamgfproxyd, bool, out);
 
-        GF_OPTION_INIT ("fop-sample-hard-errors", conf->sample_hard_errors,
-                        bool, out);
+        GF_OPTION_INIT ("fop-throttling-enable", conf->throttling_enabled, bool, out);
 
-        GF_OPTION_INIT ("fop-sample-all-errors", conf->sample_all_errors,
-                        bool, out);
+        GF_OPTION_INIT ("measure-namespace-rates", conf->measure_namespace_rates, bool, out);
+
+        GF_OPTION_INIT ("fop-sample-enable-audit", conf->audit_creates_and_unlinks, bool, out);
+
+        GF_OPTION_INIT ("fop-sample-all-errors", conf->sample_all_errors, bool, out);
 
-        GF_OPTION_INIT ("fop-sample-enable-audit",
-                        conf->audit_creates_and_unlinks, bool, out);
+        GF_OPTION_INIT ("fop-sample-hard-errors", conf->sample_hard_errors, bool, out);
+
+        GF_OPTION_INIT ("dump-percentile-latencies", conf->dump_percentile_latencies, bool, out);
+
+        init_namespace_options (conf);
 
         GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
 
@@ -4614,6 +5108,9 @@ init (xlator_t *this)
         GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,
                         int32, out);
 
+        GF_OPTION_INIT ("ns-rate-window", conf->ns_rate_window,
+                        int32, out);
+
         GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval,
                         int32, out);
 
@@ -4639,8 +5136,7 @@ init (xlator_t *this)
         GF_OPTION_INIT ("log-level", log_str, str, out);
         if (log_str) {
                 log_level = glusterd_check_log_level (log_str);
-                if (DEFAULT_LOG_LEVEL != log_level)
-                        gf_log_set_loglevel (log_level);
+                gf_log_set_loglevel (log_level);
         }
 
         GF_OPTION_INIT ("logger", logger_str, str, out);
@@ -4661,14 +5157,10 @@ init (xlator_t *this)
         GF_OPTION_INIT ("log-flush-timeout", log_flush_timeout, time, out);
         gf_log_set_log_flush_timeout (log_flush_timeout);
 
-        GF_OPTION_INIT ("dump-percentile-latencies",
-                        conf->dump_percentile_latencies, bool, out);
- 
+
         this->private = conf;
-        if (conf->ios_dump_interval > 0) {
-                pthread_create (&conf->dump_thread, NULL,
-                                (void *) &_ios_dump_thread, this);
-        }
+        pthread_create (&conf->dump_thread, NULL,
+                        (void *) &_ios_dump_thread, this);
         ret = 0;
 out:
         if (!this->private) {
@@ -4688,9 +5180,8 @@ fini (xlator_t *this)
                 return;
 
         conf = this->private;
-
-        ios_conf_destroy (conf);
         this->private = NULL;
+        ios_conf_destroy (conf);
         gf_log (this->name, GF_LOG_INFO,
                 "io-stats translator unloaded");
         return;
@@ -4708,6 +5199,7 @@ notify (xlator_t *this, int32_t event, void *data, ...)
         double        throughput = 0;
         double        time = 0;
         gf_boolean_t  is_peek = _gf_false;
+
         va_list ap;
 
         dict = data;
@@ -4800,8 +5292,8 @@ notify (xlator_t *this, int32_t event, void *data, ...)
 
                                 (void) ios_dump_args_init (&args,
                                                 IOS_DUMP_TYPE_DICT, output);
-                                ret = io_stats_dump (this, NULL, &args, op,
-                                                     is_peek);
+                                ret = io_stats_dump (this, NULL, &args,
+                                                     op, is_peek);
                         }
                 }
                 break;
@@ -4876,21 +5368,13 @@ struct volume_options options[] = {
           .description = "If on stats related to file-operations would be "
                          "tracked inside GlusterFS data-structures."
         },
-        { .key  = { "ios-dump-interval" },
-          .type = GF_OPTION_TYPE_INT,
-          .min = 0,
-          .max = 3600,
-          .default_value = "0",
-          .description = "Interval (in seconds) at which to auto-dump "
-                         "statistics. Zero disables automatic dumping."
-        },
         { .key  = { "ios-sample-interval" },
           .type = GF_OPTION_TYPE_INT,
           .min = 0,
           .max = 65535,
-          .default_value = "0",
+          .default_value = "100",
           .description = "Interval in which we want to collect FOP latency "
-                         "samples.  2 means collect a sample every 2nd FOP."
+                         "samples. 2 means collect a sample every 2nd FOP."
         },
         { .key  = { "ios-sample-buf-size" },
           .type = GF_OPTION_TYPE_INT,
@@ -4899,13 +5383,27 @@ struct volume_options options[] = {
           .default_value = "65535",
           .description = "The maximum size of our FOP sampling ring buffer."
         },
+        { .key  = { "ios-dump-interval" },
+          .type = GF_OPTION_TYPE_INT,
+          .min = 1,
+          .max = 3600,
+          .default_value = "5",
+          .description = "Interval in which we want to auto-dump statistics."
+        },
+        { .key  = { "ns-rate-window" },
+          .type = GF_OPTION_TYPE_INT,
+          .min = 1,
+          .max = FOP_HITS_SIZE,
+          .default_value = "5",
+          .description = "Number of ios-dump-interval length windows to calculate "
+                         "our namespace FOP sma with."
+        },
         { .key  = { "ios-dnscache-ttl-sec" },
           .type = GF_OPTION_TYPE_INT,
           .min = 1,
           .max = 3600 * 72,
-          .default_value = "86400",
-          .description = "The interval after wish a cached DNS entry will be "
-                         "re-validated.  Default: 24 hrs"
+          .default_value = "43200",
+          .description = "Interval in which we want to auto-dump statistics."
         },
         { .key  = { "latency-measurement" },
           .type = GF_OPTION_TYPE_BOOL,
@@ -5026,58 +5524,71 @@ struct volume_options options[] = {
                          "log messages that can be buffered for a time equal to"
                          " the value of the option brick-log-flush-timeout."
         },
-        {  .key = {"iam-self-heal-daemon"},
-           .type = GF_OPTION_TYPE_BOOL,
-           .default_value = "off",
-           .description = "This option differentiates if the io-stats "
-                          "translator is running as part of an self-heal "
-                          "daemon or not."
-        },
-        {  .key = {"iam-nfs-daemon"},
-           .type = GF_OPTION_TYPE_BOOL,
-           .default_value = "off",
-           .description = "This option differentiates if the io-stats "
-                          "translator is running as part of an NFS daemon "
-                          "or not."
+        { .key = {"iam-self-heal-daemon"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "This option differentiates if the io-stats "
+                         "translator is running as part of self-heal-daemon "
+                         "or not."
         },
-        { .key = {"iam-brick-daemon"},
+        { .key = {"iam-nfs-daemon"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
           .description = "This option differentiates if the io-stats "
-                         "translator is running as part of brick daemon "
+                         "translator is running as part of an NFS daemon "
                          "or not."
         },
+        {
+            .key = {"iam-brick-daemon"},
+            .type = GF_OPTION_TYPE_BOOL,
+            .default_value = "off",
+            .description = "This option differentiates if the io-stats "
+                           "translator is running as part of brick daemon "
+                           "or not."
+        },
         { .key = {"iam-gfproxy-daemon"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
           .description = "This option differentiates if the io-stats "
-                         "translator is running as part of an GFProxy daemon "
+                         "translator is running as part of an NFS daemon "
                          "or not."
         },
+        { .key = {"fop-throttling-enable"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "This option enables whether certain namespaces "
+                         "(exports) will be throttled when they exceed their "
+                         "allowed FOPs/sec rate."
+        },
+        { .key = {"measure-namespace-rates"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "This option enables tracking of FOP rates for individual namespaces. "
+                         "It is automatically set to true if fop-throttling-enable is enabled."
+        },
         { .key = {"fop-sample-enable-audit"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
-          .description = "This option samples the following FOPs 1:1: "
-                         "CREATE, UNLINK, MKDIR, RMDIR, TRUNCATE. "
+          .description = "This option samples the following FOPs 1:1: CREATE, UNLINK, MKDIR, RMDIR, TRUNCATE."
+                         " fop-sample-interval must be set to something non-zero."
         },
         { .key = {"fop-sample-all-errors"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
-          .description = "This option samples all fops that failed."
+          .description = "This option samples all fops with a non-zero op_errno "
+                         "value 1:1."
         },
         { .key = {"fop-sample-hard-errors"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "on",
-          .description = "This option samples all fops with \"hard errors\""
+          .description = "This option samples all fops with \"hard errors\" 1:1,"
                          "including EROFS, ENOSPC, etc."
         },
         { .key  = { "dump-percentile-latencies" },
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
-          .description = "If on the p50, p90, p95, and p99 latency of each "
-                         "operation and all types is dumped at each sample "
-                         "interval."
+          .description = "If on the p50, p90, p95, and p99 latency of each operation "
+                         "and all types is dumped at each sample interval. "
         },
         { .key  = {NULL} },
-
 };
diff --git a/xlators/debug/io-stats/src/io-stats.h b/xlators/debug/io-stats/src/io-stats.h
new file mode 100644
index 00000000000..678f67929f4
--- /dev/null
+++ b/xlators/debug/io-stats/src/io-stats.h
@@ -0,0 +1,381 @@
+#ifndef _IO_STATS_H_
+#define _IO_STATS_H_
+
+#define OFFSET_WRITE_DETECTOR 0
+#define UNKNOWN_WRITE_OFFSET ((off_t)-1)
+
+#define FOP_HITS_SIZE 32
+struct _ios_fop_hits_s {
+        uint64_t idx;
+        uint64_t hits[FOP_HITS_SIZE];
+        double elapsed[FOP_HITS_SIZE];
+        double avg_fops;
+        double avg_latency;
+};
+typedef struct _ios_fop_hits_s ios_fop_hits_t;
+
+ios_fop_hits_t *ios_fop_hits_init ();
+
+#define GF_IO_STATS "io-stats"
+
+#define MAX_LIST_MEMBERS 100
+#define DEFAULT_PWD_BUF_SZ 16384
+#define DEFAULT_GRP_BUF_SZ 16384
+#define IOS_MAX_ERRORS 132
+
+typedef enum {
+        IOS_STATS_TYPE_NONE,
+        IOS_STATS_TYPE_OPEN,
+        IOS_STATS_TYPE_READ,
+        IOS_STATS_TYPE_WRITE,
+        IOS_STATS_TYPE_OPENDIR,
+        IOS_STATS_TYPE_READDIRP,
+        IOS_STATS_TYPE_READ_THROUGHPUT,
+        IOS_STATS_TYPE_WRITE_THROUGHPUT,
+        IOS_STATS_TYPE_MAX
+}ios_stats_type_t;
+
+typedef enum {
+        IOS_STATS_THRU_READ,
+        IOS_STATS_THRU_WRITE,
+        IOS_STATS_THRU_MAX,
+}ios_stats_thru_t;
+
+struct ios_stat_lat {
+        struct timeval  time;
+        double          throughput;
+};
+
+struct ios_stat {
+        gf_lock_t       lock;
+        uuid_t          gfid;
+        char           *filename;
+        uint64_t        counters [IOS_STATS_TYPE_MAX];
+        struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
+        int             refcnt;
+#if OFFSET_WRITE_DETECTOR
+        off_t expected_write_offset;
+#endif
+};
+
+struct ios_stat_list {
+        struct list_head  list;
+        struct ios_stat  *iosstat;
+        double            value;
+};
+
+struct ios_stat_head {
+       gf_lock_t                lock;
+       double                   min_cnt;
+       uint64_t                 members;
+       struct ios_stat_list    *iosstats;
+};
+
+typedef struct _ios_sample_t {
+        uid_t  uid;
+        gid_t  gid;
+        char   identifier[UNIX_PATH_MAX];
+        glusterfs_fop_t fop_type;
+        gf_fop_pri_t fop_pri;
+        struct timeval timestamp;
+        double elapsed;
+        ns_info_t ns_info;
+        char path[4096];
+        gf_boolean_t have_path;
+        int32_t op_ret;
+        int32_t op_errno;
+} ios_sample_t;
+
+
+typedef struct _ios_sample_buf_t {
+        uint64_t        pos;  /* Position in write buffer */
+        uint64_t        size;  /* Size of ring buffer */
+        uint64_t        collected;  /* Number of samples we've collected */
+        uint64_t        observed;  /* Number of FOPs we've observed */
+        ios_sample_t    *ios_samples;  /* Our list of samples */
+} ios_sample_buf_t;
+
+
+struct ios_lat {
+        double      min;
+        double      max;
+        double      avg;
+        uint64_t    total;
+};
+
+struct ios_global_stats {
+        uint64_t        data_written;
+        uint64_t        data_read;
+        uint64_t        block_count_write[32];
+        uint64_t        block_count_read[32];
+        uint64_t        fop_hits[GF_FOP_MAXVALUE];
+        struct timeval  started_at;
+        struct ios_lat  latency[GF_FOP_MAXVALUE];
+        uint64_t        errno_count[IOS_MAX_ERRORS];
+        uint64_t        nr_opens;
+        uint64_t        max_nr_opens;
+        struct timeval  max_openfd_time;
+#if OFFSET_WRITE_DETECTOR
+        uint64_t        total_write_ops;
+        uint64_t        offset_write_ops;
+#endif
+};
+
+struct ios_conf {
+        gf_lock_t                 lock;
+        struct ios_global_stats   cumulative;
+        uint64_t                  increment;
+        struct ios_global_stats   incremental;
+        gf_boolean_t              dump_fd_stats;
+        gf_boolean_t              count_fop_hits;
+        gf_boolean_t              measure_latency;
+        struct ios_stat_head      list[IOS_STATS_TYPE_MAX];
+        struct ios_stat_head      thru_list[IOS_STATS_THRU_MAX];
+        int32_t                   ios_dump_interval;
+        int32_t                   ns_rate_window;
+        pthread_t                 dump_thread;
+        gf_boolean_t              dump_thread_should_die;
+        gf_lock_t                 ios_sampling_lock;
+        int32_t                   ios_sample_interval;
+        int32_t                   ios_sample_buf_size;
+        ios_sample_buf_t          *ios_sample_buf;
+        struct dnscache           *dnscache;
+        int32_t                   ios_dnscache_ttl_sec;
+        dict_t                    *hash_to_ns;       /* Hash -> NS name */
+        dict_t                    *fop_hits_dict;    /* NS -> Real rate */
+        dict_t                    *throttling_rates; /* NS -> Max rate */
+        gf_boolean_t              iamgfproxyd;
+        gf_boolean_t              iamnfsd;
+        gf_boolean_t              iamshd;
+        gf_boolean_t              iambrickd;
+        gf_boolean_t              throttling_enabled;
+        time_t                    namespace_conf_mtime;
+        gf_boolean_t              measure_namespace_rates;
+        gf_boolean_t              audit_creates_and_unlinks;
+        gf_boolean_t              sample_hard_errors;
+        gf_boolean_t              dump_percentile_latencies;
+        gf_boolean_t              sample_all_errors;
+        uint32_t                  outstanding_req;
+};
+
+
+struct ios_fd {
+        char           *filename;
+        uint64_t        data_written;
+        uint64_t        data_read;
+        uint64_t        block_count_write[32];
+        uint64_t        block_count_read[32];
+        struct timeval  opened_at;
+};
+
+typedef enum {
+        IOS_DUMP_TYPE_NONE      = 0,
+        IOS_DUMP_TYPE_FILE      = 1,
+        IOS_DUMP_TYPE_DICT      = 2,
+        IOS_DUMP_TYPE_JSON_FILE = 3,
+        IOS_DUMP_TYPE_SAMPLES   = 4,
+        IOS_DUMP_TYPE_MAX       = 5
+} ios_dump_type_t;
+
+struct ios_dump_args {
+        xlator_t *this;
+        ios_dump_type_t type;
+        union {
+                FILE *logfp;
+                dict_t *dict;
+        } u;
+};
+
+typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
+                                    int, int, uint64_t);
+
+// Will be present in frame->local
+struct ios_local {
+        inode_t *inode;
+        loc_t loc;
+        fd_t *fd;
+};
+
+#define ios_local_new() GF_CALLOC (1, sizeof (struct ios_local), gf_common_mt_char);
+
+void
+ios_local_free (struct ios_local *local)
+{
+        if (!local)
+                return;
+
+        inode_unref (local->inode);
+
+        if (local->fd)
+                fd_unref (local->fd);
+
+        loc_wipe (&local->loc);
+        memset (local, 0, sizeof (*local));
+        GF_FREE (local);
+}
+
+struct volume_options options[];
+
+inline static int
+is_fop_latency_started (call_frame_t *frame)
+{
+        GF_ASSERT (frame);
+        struct timeval epoch = {0, };
+        return memcmp (&frame->begin, &epoch, sizeof (epoch));
+}
+
+const char *_IOS_DUMP_DIR = "/var/lib/glusterd/stats";
+const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples";
+const char *_IOS_NAMESPACE_CONF = TOSTRING (GLUSTERD_WORKDIR) "/namespaces.conf";
+
+extern const char *__progname;
+
+/* This is a list of errors which are in some way critical.
+ * It is useful to sample these errors even if other errors
+ * should be ignored. */
+const int32_t ios_hard_error_list[] = {
+        EIO,
+        EROFS,
+        ENOSPC,
+        ENOTCONN,
+        ESTALE,
+};
+
+#define IOS_HARD_ERROR_LIST_SIZE (sizeof(ios_hard_error_list) / sizeof(int32_t))
+
+const char *errno_to_name[IOS_MAX_ERRORS] = {
+      "success",       /* 0 */
+      "eperm",
+      "enoent",
+      "esrch",
+      "eintr",
+      "eio",
+      "enxio",
+      "e2big",
+      "enoexec",
+      "ebadf",
+      "echild",
+      "eagain",
+      "enomem",
+      "eacces",
+      "efault",
+      "enotblk",
+      "ebusy",
+      "eexist",
+      "exdev",
+      "enodev",
+      "enotdir",
+      "eisdir",
+      "einval",
+      "enfile",
+      "emfile",
+      "enotty",
+      "etxtbsy",
+      "efbig",
+      "enospc",
+      "espipe",
+      "erofs",
+      "emlink",
+      "epipe",
+      "edom",
+      "erange",
+      "edeadlk",
+      "enametoolong",
+      "enolck",
+      "enosys",
+      "enotempty",
+      "eloop",
+      "ewouldblock",
+      "enomsg",
+      "eidrm",
+      "echrng",
+      "el2nsync",
+      "el3hlt",
+      "el3rst",
+      "elnrng",
+      "eunatch",
+      "enocsi",
+      "el2hlt",
+      "ebade",
+      "ebadr",
+      "exfull",
+      "enoano",
+      "ebadrqc",
+      "ebadslt",
+      "edeadlock",
+      "ebfont",
+      "enostr",
+      "enodata",
+      "etime",
+      "enosr",
+      "enonet",
+      "enopkg",
+      "eremote",
+      "enolink",
+      "eadv",
+      "esrmnt",
+      "ecomm",
+      "eproto",
+      "emultihop",
+      "edotdot",
+      "ebadmsg",
+      "eoverflow",
+      "enotuniq",
+      "ebadfd",
+      "eremchg",
+      "elibacc",
+      "elibbad",
+      "elibscn",
+      "elibmax",
+      "elibexec",
+      "eilseq",
+      "erestart",
+      "estrpipe",
+      "eusers",
+      "enotsock",
+      "edestaddrreq",
+      "emsgsize",
+      "eprototype",
+      "enoprotoopt",
+      "eprotonosupport",
+      "esocktnosupport",
+      "eopnotsupp",
+      "epfnosupport",
+      "eafnosupport",
+      "eaddrinuse",
+      "eaddrnotavail",
+      "enetdown",
+      "enetunreach",
+      "enetreset",
+      "econnaborted",
+      "econnreset",
+      "enobufs",
+      "eisconn",
+      "enotconn",
+      "eshutdown",
+      "etoomanyrefs",
+      "etimedout",
+      "econnrefused",
+      "ehostdown",
+      "ehostunreach",
+      "ealready",
+      "einprogress",
+      "estale",
+      "euclean",
+      "enotnam",
+      "enavail",
+      "eisnam",
+      "eremoteio",
+      "edquot",
+      "enomedium",
+      "emediumtype",
+      "ecanceled",
+      "enokey",
+      "ekeyexpired",
+      "ekeyrevoked",
+      "ekeyrejected",
+      "eownerdead",
+      "enotrecoverable"
+};
+
+#endif /* _IO_STATS_H_ */
diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c
index 639dfa33fe2..4585087025e 100644
--- a/xlators/features/namespace/src/namespace.c
+++ b/xlators/features/namespace/src/namespace.c
@@ -143,7 +143,7 @@ ns_inode_ctx_put (inode_t *inode, xlator_t *this, ns_info_t *info)
         int        ret            = -1;
 
         if (!inode || !this) {
-                gf_log (this->name, GF_LOG_WARNING,
+                gf_log (this->name, GF_LOG_DEBUG,
                         "Need a valid inode and xlator to cache ns_info.");
                 ret = -1;
                 goto out;
@@ -253,10 +253,10 @@ get_path_resume_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         "G>P %s %10u namespace found %s",
                         uuid_utoa (local->loc.inode->gfid), info->hash, path);
         } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
-                gf_log (this->name, GF_LOG_WARNING, "G>P %s has no path",
+                gf_log (this->name, GF_LOG_DEBUG, "G>P %s has no path",
                         uuid_utoa (local->loc.inode->gfid));
         } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
-                gf_log (this->name, GF_LOG_WARNING,
+                gf_log (this->name, GF_LOG_DEBUG,
                         "G>P %s winding failed, still have gfid",
                         uuid_utoa (local->loc.inode->gfid));
         }
@@ -303,16 +303,26 @@ set_ns_from_loc (const char *fn, call_frame_t *frame, xlator_t *this, loc_t *loc
          * from the inode context, then from the loc's path itself. */
         if (!loc || !loc->path || !loc->inode) {
                 ret = PATH_PARSE_RESULT_NO_PATH;
-        } else if (!ns_inode_ctx_get (loc->inode, this, info)) {
+                goto out;
+        }
+
+        if (!ns_inode_ctx_get (loc->inode, this, info)) {
                 ret = PATH_PARSE_RESULT_FOUND;
-        } else {
-                ret = parse_path (info, loc->path);
-                gf_log (this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s",
-                        fn, loc->path);
+                goto out;
+        }
 
-                if (ret == PATH_PARSE_RESULT_FOUND) {
-                        ns_inode_ctx_put (loc->inode, this, info);
-                }
+        if (gf_uuid_is_null (loc->inode->gfid) && gf_uuid_is_null (loc->gfid)) {
+                ret = PATH_PARSE_RESULT_NO_PATH;
+                goto out;
+        }
+
+        ret = parse_path (info, loc->path);
+        gf_log (this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s",
+                fn, loc->path);
+
+        if (ret == PATH_PARSE_RESULT_FOUND) {
+                ns_inode_ctx_put (loc->inode, this, info);
+                goto out;
         }
 
         /* Keep trying by calling inode_path next, making sure to copy
@@ -337,6 +347,7 @@ set_ns_from_loc (const char *fn, call_frame_t *frame, xlator_t *this, loc_t *loc
                 }
         }
 
+out:
         /* Report our status, and if we have a GFID, we'll eventually try a
          * GET_ANCESTRY_PATH_KEY wind when we return from this function. */
         if (ret == PATH_PARSE_RESULT_FOUND) {
@@ -344,7 +355,7 @@ set_ns_from_loc (const char *fn, call_frame_t *frame, xlator_t *this, loc_t *loc
                         "%s: LOC %s %10u namespace found for %s", fn,
                         uuid_utoa (loc->inode->gfid), info->hash, loc->path);
         } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
-                gf_log (this->name, GF_LOG_WARNING, "%s: LOC has no path", fn);
+                gf_log (this->name, GF_LOG_DEBUG, "%s: LOC has no path", fn);
         } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
                 /* Make sure to copy the inode's gfid for the eventual wind. */
                 if (gf_uuid_is_null (loc->inode->gfid)) {
@@ -381,9 +392,20 @@ set_ns_from_fd (const char *fn, call_frame_t *frame, xlator_t *this, fd_t *fd)
          * from the inode context, then inode_path. */
         if (!fd || !fd->inode) {
                 ret = PATH_PARSE_RESULT_NO_PATH;
-        } else if (!ns_inode_ctx_get (fd->inode, this, info)) {
+                goto out;
+        }
+
+        if (!ns_inode_ctx_get (fd->inode, this, info)) {
                 ret = PATH_PARSE_RESULT_FOUND;
-        } else if (inode_path (fd->inode, NULL, &path) >= 0 && path) {
+                goto out;
+        }
+
+        if (gf_uuid_is_null (fd->inode->gfid)) {
+                ret = PATH_PARSE_RESULT_NO_PATH;
+                goto out;
+        }
+
+        if (inode_path (fd->inode, NULL, &path) >= 0 && path) {
                 ret = parse_path (info, path);
                 gf_log (this->name, GF_LOG_DEBUG, "%s: FD  retrieved path %s",
                         fn, path);
@@ -397,13 +419,14 @@ set_ns_from_fd (const char *fn, call_frame_t *frame, xlator_t *this, fd_t *fd)
                 GF_FREE (path);
         }
 
+out:
         /* Report our status, and if we have a GFID, we'll eventually try a
          * GET_ANCESTRY_PATH_KEY wind when we return from this function. */
         if (ret == PATH_PARSE_RESULT_FOUND) {
                 gf_log (this->name, GF_LOG_DEBUG, "%s: FD  %s %10u namespace found",
                         fn, uuid_utoa (fd->inode->gfid), info->hash);
         } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
-                gf_log (this->name, GF_LOG_WARNING, "%s: FD  has no path", fn);
+                gf_log (this->name, GF_LOG_DEBUG, "%s: FD  has no path", fn);
         } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
                 gf_log (this->name, GF_LOG_DEBUG,
                         "%s: FD  %s winding, looking for path",
@@ -442,6 +465,12 @@ set_ns_from_fd (const char *fn, call_frame_t *frame, xlator_t *this, fd_t *fd)
                         goto wind;                                             \
                 }                                                              \
                                                                                \
+                if (gf_uuid_is_null (inode->gfid)) {                           \
+                        gf_log (this->name, GF_LOG_DEBUG,                      \
+                                "Cannot wind on a NULL GFID.");                \
+                        goto wind;                                             \
+                }                                                              \
+                                                                               \
                 new_frame->root->uid = new_frame->root->gid = 0;               \
                 /* Put a phony "not found" NS info into this call. */          \
                 new_frame->root->ns_info = *info;                              \
@@ -750,8 +779,10 @@ wind:
 int32_t
 ns_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
 {
+        ns_private_t        *priv = (ns_private_t *)this->private;
+
         path_parse_result_t ret = set_ns_from_loc (__FUNCTION__, frame, this, loc);
-        if (ret == PATH_PARSE_RESULT_IS_GFID) {
+        if (ret == PATH_PARSE_RESULT_IS_GFID && priv->wind_lookups) {
                 GET_ANCESTRY_PATH_WIND (lookup, loc->inode, loc, xdata);
                 return 0;
         }
@@ -1229,6 +1260,7 @@ init (xlator_t *this)
         }
 
         GF_OPTION_INIT ("tag-namespaces", priv->tag_namespaces, bool, out);
+        GF_OPTION_INIT ("wind-lookups", priv->wind_lookups, bool, out);
 
         gf_log (this->name, GF_LOG_INFO, "Namespace xlator loaded");
         this->private = priv;
@@ -1262,6 +1294,8 @@ reconfigure (xlator_t *this, dict_t *options)
 
         GF_OPTION_RECONF ("tag-namespaces", priv->tag_namespaces, options, bool,
                           out);
+        GF_OPTION_RECONF ("wind-lookups", priv->wind_lookups, options,
+                          bool, out);
 
         ret = 0;
 out:
@@ -1331,5 +1365,16 @@ struct volume_options options[] = {
                        "that tags every fop with a namespace hash for later "
                        "throttling, stats collection, logging, etc.",
         },
+        {
+        .key           = { "wind-lookups" },
+        .type          = GF_OPTION_TYPE_BOOL,
+        .default_value = "on",
+        .description   = "If enabled, a GET_ANCESTRY_PATH_KEY getxattr wind "
+                         "will be scheduled for a LOOKUP. Since we have many "
+                         "LOOKUP requests which are sent for files that don't "
+                         "exist, it's often benificial to disable this option, "
+                         "so we don't have a high percentage of bad GETXATTRs "
+                         "in io-stats.",
+        },
         {.key = { NULL } },
 };
diff --git a/xlators/features/namespace/src/namespace.h b/xlators/features/namespace/src/namespace.h
index 3b9f782cb1a..affaba04681 100644
--- a/xlators/features/namespace/src/namespace.h
+++ b/xlators/features/namespace/src/namespace.h
@@ -13,6 +13,7 @@
 
 typedef struct {
 	gf_boolean_t tag_namespaces;
+	gf_boolean_t wind_lookups;
 } ns_private_t;
 
 typedef struct {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 6fcf6892d27..a07fd9c3232 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -2447,8 +2447,8 @@ out:
  * the topology of the brick graph */
 static volgen_brick_xlator_t server_graph_table[] = {
         {brick_graph_add_server, NULL},
-        {brick_graph_add_decompounder, "decompounder"},
         {brick_graph_add_io_stats, "NULL"},
+        {brick_graph_add_decompounder, "decompounder"},
         {brick_graph_add_namespace, "namespace"},
         {brick_graph_add_cdc, NULL},
         {brick_graph_add_quota, "quota"},
diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am
index 1d09eace2ed..bdbd0290fd6 100644
--- a/xlators/performance/io-threads/src/Makefile.am
+++ b/xlators/performance/io-threads/src/Makefile.am
@@ -1,15 +1,16 @@
 xlator_LTLIBRARIES = io-threads.la
 xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
 
-io_threads_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+io_threads_la_LDFLAGS = -module -avoid-version
 
 io_threads_la_SOURCES = io-threads.c
 io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
 
-noinst_HEADERS = io-threads.h iot-mem-types.h io-threads-messages.h
+noinst_HEADERS = io-threads.h iot-mem-types.h
 
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+              -DGLUSTERD_WORKDIR=$(GLUSTERD_WORKDIR)
 
 AM_CFLAGS = -Wall $(GF_CFLAGS)
 
-CLEANFILES = 
+CLEANFILES =
diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c
index 04d94691ba3..c0ebdd33caf 100644
--- a/xlators/performance/io-threads/src/io-threads.c
+++ b/xlators/performance/io-threads/src/io-threads.c
@@ -8,6 +8,11 @@
   cases as published by the Free Software Foundation.
 */
 
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
 #include "call-stub.h"
 #include "defaults.h"
 #include "glusterfs.h"
@@ -20,12 +25,14 @@
 #include <sys/time.h>
 #include <time.h>
 #include "locking.h"
-#include "io-threads-messages.h"
 #include "timespec.h"
+#include "hashfn.h"
 
 void *iot_worker (void *arg);
 int iot_workers_scale (iot_conf_t *conf);
 int __iot_workers_scale (iot_conf_t *conf);
+int32_t iot_reinit_clock (iot_conf_t *conf, int i, uint32_t total_weight);
+int32_t __iot_reinit_clock (iot_conf_t *conf, int i, uint32_t total_weight);
 struct volume_options options[];
 
 #define IOT_FOP(name, frame, this, args ...)                                   \
@@ -50,84 +57,622 @@ struct volume_options options[];
                 }                                                              \
         } while (0)
 
+gf_boolean_t
+__iot_ns_queue_empty (iot_ns_queue_t *queue)
+{
+        return (queue->size == 0);
+}
+
+/* Fetch a namespace queue for the given ns_info struct. If the hash was not
+ * found or the clock is not enabled, then return NULL which will be substituted
+ * by the unknown queue. */
+iot_ns_queue_t *
+__iot_get_ns_queue (iot_conf_t *conf, ns_info_t *info)
+{
+        char            ns_key[DICT_UINT32_KEY_SIZE];
+        int32_t         ret   = -1;
+        iot_ns_queue_t *queue = NULL;
+
+        if (!conf->ns_weighted_queueing || !info->found) {
+                gf_log (GF_IO_THREADS, GF_LOG_TRACE,
+                        "Namespace not found for %u", info->hash);
+                return NULL;
+        }
+
+        dict_uint32_to_key (info->hash, ns_key);
+        ret = dict_get_ptr (conf->ns_queues, ns_key, (void **)&queue);
+
+        if (ret) {
+                gf_log (GF_IO_THREADS, GF_LOG_TRACE,
+                        "Namespace not found for %u", info->hash);
+        }
+
+        /* If successful, return `queue`. */
+        return (!ret && queue) ? queue : NULL;
+}
+
+/* When we parse a new namespace conf file, we create a whole new set of queue
+ * structs; however, old requests may be sitting on old queues. This function
+ * drains the old requests lists into the new queue, or alternatively appends
+ * it onto the unknown queue if there is no corresponding new queue.
+ * (i.e. if it was removed from the conf file) */
+int
+__iot_drain_ns_queue_foreach (dict_t *this, char *key, data_t *value, void *data)
+{
+        int             ret       = 0;
+        iot_conf_t     *conf      = data;
+        dict_t         *new_dict  = conf->ns_queues;
+        iot_ns_queue_t *old_queue = data_to_ptr (value);
+        iot_ns_queue_t *new_queue = NULL;
+        int             i;
+
+        ret = dict_get_ptr (new_dict, key, (void **)&new_queue);
+
+        /* Don't drain the unknown queue. */
+        if (old_queue == conf->ns_unknown_queue) {
+                return 0;
+        }
+
+        for (i = 0; i < IOT_PRI_MAX; i++) {
+                /* If we didn't find a "new queue" corresponding to the old,
+                 * then drain into the unknown queue of that priority level. */
+                if (!ret && new_queue) {
+                        list_append_init (&old_queue[i].reqs,
+                                          &new_queue[i].reqs);
+                        new_queue[i].size += old_queue[i].size;
+                } else {
+                        list_append_init (&old_queue[i].reqs,
+                                          &conf->ns_unknown_queue[i].reqs);
+                        conf->ns_unknown_queue[i].size += old_queue[i].size;
+                }
+        }
+
+        return 0;
+}
+
+/* Drain the namespace queues in old_dict, if there are any. Then free the dict
+ * and clear the clock structs. */
+void
+__iot_drain_and_clear_clock (iot_conf_t *conf, dict_t *old_dict)
+{
+        int i;
+
+        if (old_dict) {
+                dict_foreach (old_dict, __iot_drain_ns_queue_foreach, conf);
+                dict_destroy (old_dict);
+        }
+
+        for (i = 0; i < IOT_PRI_MAX; i++) {
+                GF_FREE (conf->ns_clocks[i].slots);
+                conf->ns_clocks[i].slots = NULL;
+                conf->ns_clocks[i].idx = 0;
+                conf->ns_clocks[i].size = 0;
+        }
+}
+
+/* Parse a single namespace conf line. This constructs a new queue and puts it
+ * into the namespace dictionary as well, skipping duplicated namespaces with
+ * a warning. */
+int32_t
+__iot_ns_parse_conf_line (iot_conf_t *conf, char *file_line, uint32_t *total_weight)
+{
+        char            ns_key[DICT_UINT32_KEY_SIZE];
+        char           *ns_name      = NULL;
+        iot_ns_queue_t *queue        = NULL;
+        int32_t         queue_weight = 1;
+        uint32_t        ns_hash      = 0;
+        int             i, ret = -1, scanned = -1;
+
+        ns_name = GF_CALLOC (strlen (file_line), sizeof (char), 0);
+        if (!ns_name) {
+                gf_log (GF_IO_THREADS, GF_LOG_WARNING,
+                        "Memory allocation error!");
+                ret = ENOMEM;
+                goto out;
+        }
+
+        /* Scan the line, skipping the second column which corresponds to a
+         * throttling rate, which we don't particularly care about. */
+        scanned = sscanf (file_line, "%s %*d %d", ns_name, &queue_weight);
+        if (scanned < 1 || strlen (ns_name) < 1) {
+                gf_log (GF_IO_THREADS, GF_LOG_WARNING,
+                        "Empty or malformatted line \"%s\" while parsing", file_line);
+                goto out;
+        }
+
+        /* Hash the namespace name, convert it to a key, then search the dict
+         * for an entry matching this key. */
+        ns_hash = SuperFastHash (ns_name, strlen (ns_name));
+
+        gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                "Parsed namespace \'%s\' (%u)", ns_name, ns_hash);
+
+        dict_uint32_to_key (ns_hash, ns_key);
+        ret = dict_get_ptr (conf->ns_queues, ns_key, (void **)&queue);
+        if (!ret && queue) {
+                gf_log (GF_IO_THREADS, GF_LOG_WARNING,
+                        "Duplicate-hashed queue found for namespace %s", ns_name);
+                /* Since ret == 0, we won't free the queue inadvertently. */
+                goto out;
+        }
+
+        queue = GF_CALLOC (IOT_PRI_MAX, sizeof (iot_ns_queue_t), 0);
+        if (!queue) {
+                gf_log (GF_IO_THREADS, GF_LOG_WARNING,
+                        "Memory allocation error!");
+                ret = -(ENOMEM);
+                goto out;
+        }
+
+        /* Init queues. */
+        for (i = 0; i < IOT_PRI_MAX; i++) {
+                INIT_LIST_HEAD (&queue[i].reqs);
+                queue[i].hash = ns_hash;
+                queue[i].weight = conf->ns_default_weight;
+                queue[i].size = 0;
+        }
+
+        ret = dict_set_ptr (conf->ns_queues, ns_key, queue);
+        if (ret) {
+                goto out;
+        }
+
+        ret = dict_set_dynstr_with_alloc (conf->hash_to_ns, ns_key, ns_name);
+        if (ret) {
+                goto out;
+        }
+
+        if (scanned < 2 || queue_weight < 1) {
+                gf_log (GF_IO_THREADS, GF_LOG_WARNING,
+                        "No weight (or too low) found in config line for namespace %s, "
+                        "defaulting to weight of %u.", ns_name, conf->ns_default_weight);
+        } else {
+                gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                        "Parsed weight \'%s\' = %u", ns_name, queue_weight);
+                for (i = 0; i < IOT_PRI_MAX; i++) {
+                        queue[i].weight = (uint32_t) queue_weight;
+                }
+        }
+
+        *total_weight += queue->weight;
+
+out:
+        if (ns_name) {
+                GF_FREE (ns_name);
+        }
+
+        if (ret && queue) {
+                GF_FREE (queue);
+        }
+
+        return ret;
+}
+
+/* This function (re)initializes the clock that is used by the en/de-queue
+ * operations. */
+void
+__iot_reinit_ns_conf (iot_conf_t *conf)
+{
+        char             ns_unknown_key[DICT_UINT32_KEY_SIZE];
+        dict_t          *old_dict, *new_dict;
+        FILE            *fp = NULL;
+        char            *line = NULL;
+        size_t           len = 0;
+        uint32_t         total_weight;
+        int              i, ret = 0;
+
+        if (!conf) {
+                return;
+        }
+
+        if (conf->ns_weighted_queueing) {
+                gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                        "Loading %s from disk.",
+                        _IOT_NAMESPACE_CONF);
+
+                fp = fopen (_IOT_NAMESPACE_CONF, "r");
+                if (!fp) {
+                        gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                                "Cannot open file for reading.");
+                        ret = ENOENT;
+                        goto out;
+                }
+
+                /* Save the old queues; we will need to drain old requests out
+                 * of it once we make new queues. */
+                old_dict = conf->ns_queues;
+                conf->ns_queues = new_dict = get_new_dict ();
+
+                /* Include the unknown queue weight, which isn't a parsed line. */
+                total_weight = conf->ns_default_weight;
+
+                /* Parse the new config file line-by-line, making a new queue
+                 * for each namespace that is parsed. */
+                while (getline (&line, &len, fp) != -1) {
+                        __iot_ns_parse_conf_line (conf, line, &total_weight);
+                }
+                free (line);
+
+                /* Drain old queues into new queues, or into unknown queue. */
+                __iot_drain_and_clear_clock (conf, old_dict);
+
+                /* We will add the unknown queue manually into the dictionaries. */
+                dict_uint32_to_key (0, ns_unknown_key);
+                ret = dict_set_dynstr_with_alloc (conf->hash_to_ns,
+                                                  ns_unknown_key, "unknown");
+                if (ret) {
+                        goto out;
+                }
+
+                /* Set the ns_unknown_queue as static pointer so it's not freed
+                 * in the queue drain step next time the automation. */
+                ret = dict_set_static_ptr (conf->ns_queues, ns_unknown_key,
+                                           conf->ns_unknown_queue);
+                if (ret) {
+                        goto out;
+                }
+
+                for (i = 0; i < IOT_PRI_MAX; i++) {
+                        ret = __iot_reinit_clock (conf, i, total_weight);
+                        if (ret) {
+                                goto out;
+                        }
+                }
+
+                /* Finally, keep our conf struct updated so we don't spuriously
+                 * reconfigure the clock. */
+                get_file_mtime (_IOT_NAMESPACE_CONF, &conf->ns_conf_mtime);
+        }
+
+        ret = 0;
+out:
+        if (fp) {
+                fclose (fp);
+        }
+
+        if (ret) {
+                gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                        "There was an error loading the namespaces conf file, "
+                        "disabling clock.");
+                conf->ns_weighted_queueing = _gf_false;
+        }
+
+        /* If our clock isn't enabled (or it was disabled because of an error)
+         * then drain the queues if there are any. */
+        if (!conf->ns_weighted_queueing) {
+                old_dict = conf->ns_queues;
+                /* This NULL signals to the drain that we're not populating any
+                 * new queues... */
+                conf->ns_queues = NULL;
+                __iot_drain_and_clear_clock (conf, old_dict);
+        }
+}
+
+/* This is a simple iterative algorithm which tries to allocate the lowest
+ * amount of slots that maintains the same proportional amount of work given
+ * to each namespace.
+ *
+ * Each namespace has a weight, and the proportion of "weight / total weight"
+ * is something we'll call the "ideal" work ratio. If we have no latency and
+ * infinite queues, this ratio is the amount of requests we serve (out of total)
+ * for the given namespace.
+ *
+ * For a given namespace idx i, let the namespace's weight be W_i, and the total
+ * weight be TW. The "ideal" percentage is thus given by W_i/TW. Now if we
+ * choose some arbitrary total number of slots TS, the number of slots we
+ * should give to the namespace is given by S_i = TS*(W_i/TW). Since this is
+ * probably not an integer, we'll actually floor the number, meaning that the
+ * sum of S_i for each namespace most likely doesn't equal the TS we chose
+ * earlier. Let this "real" total sum of slots be RS.
+ *
+ * Now we have the concept of an "realized" percentage given by the ratio of
+ * _allocated slots_ instead of just ideal weights, given by S_i/RS. We consider
+ * this set of slot allocations to be ideal if these two ratios (slots and
+ * weights) are "close enough", given by our ns-weight-tolerance option.
+ *
+ * We then use a loop to distribute the shares, using some modulo magic to
+ * get a good, semi-even distribution in the slots array. The main concern here
+ * is  trying to make sure that no single share is bunched up.
+ * If we have namespaces A and B with 2 and 8 slots respectively, we should
+ * shoot for a distribution like [A B B B B A B B B] unstead of like
+ * [A A B B B B B B B B]. This loop tries its best to do that. We don't want
+ * to shuffle randomly either, since there is still a risk of having a bad
+ * bunching if our RNG is bad or we're unlucky... */
+int32_t
+__iot_reinit_clock (iot_conf_t *conf, int i, uint32_t total_weight)
+{
+        int32_t          ret = -1;
+        uint32_t         try_slots, total_slots, slots_idx, rep;
+        data_pair_t     *curr       = NULL;
+        iot_ns_queue_t  *curr_queue = NULL;
+        iot_ns_queue_t **slots      = NULL;
+        char            *ns_name    = NULL;
+        gf_boolean_t     fail;
+        double           real_percentage;
+
+        /* Initialize the "ideal" percentage each queue should have. */
+        dict_foreach_inline (conf->ns_queues, curr) {
+                curr_queue = data_to_ptr (curr->value);
+                curr_queue = &curr_queue[i];
+
+                curr_queue->percentage = ((double) curr_queue->weight)
+                                            / total_weight;
+        }
+
+        /* We try to satisfy each percentage within some margin, first trying
+         * 1 slot, until we get up to the total sum of all weights, which will
+         * obviously satisfy each percentage but in many cases is far too large
+         * for a slots matrix. */
+        for (try_slots = 1; try_slots <= total_weight; try_slots++) {
+                fail = _gf_false;
+                total_slots = 0;
+
+                /* Calculate how many slots each namespace much get. Since we're
+                 * rounding integers, we keep track of the actual total number
+                 * of slots in `total_slots`. */
+                dict_foreach_inline (conf->ns_queues, curr) {
+                        curr_queue = data_to_ptr (curr->value);
+                        curr_queue = &curr_queue[i];
+
+                        curr_queue->slots = (int) (curr_queue->percentage * try_slots);
+                        total_slots += curr_queue->slots;
+
+                        /* If we've allocated less than 1 slot for the queue,
+                         * we should find a larger size. */
+                        if (curr_queue->slots < 1) {
+                                fail = _gf_true;
+                                break;
+                        }
+                }
+
+                if (fail) {
+                        continue;
+                }
+
+                dict_foreach_inline (conf->ns_queues, curr) {
+                        curr_queue = data_to_ptr (curr->value);
+                        curr_queue = &curr_queue[i];
+
+                        real_percentage = ((double) curr_queue->slots) / total_slots;
+                        /* If the realized percentage is more than ns_weight_tolerance
+                         * percent away from the ideal percentage, then let's try
+                         * another number. */
+                        if (abs (real_percentage - curr_queue->percentage) * 100.0
+                              > conf->ns_weight_tolerance) {
+                                fail = _gf_true;
+                                break;
+                        }
+                }
+
+                if (!fail) {
+                        break;
+                }
+        }
+
+        /* Report the fits that we have found. */
+        dict_foreach_inline (conf->ns_queues, curr) {
+                curr_queue = data_to_ptr (curr->value);
+                curr_queue = &curr_queue[i];
+
+                real_percentage = ((double) curr_queue->slots) / total_slots;
+                ret = dict_get_str (conf->hash_to_ns, curr->key, &ns_name);
+
+                if (ret || !ns_name) {
+                        continue;
+                }
+
+                gf_log (GF_IO_THREADS, GF_LOG_INFO,
+                        "Initializing namespace \'%s\' (weight: %d) with %d slots. "
+                        "Ideal percentage: %0.2f%%, real percentage: %0.2f%%.",
+                        ns_name, curr_queue->weight, curr_queue->slots,
+                        curr_queue->percentage * 100.0, real_percentage * 100.0);
+        }
+
+        /* At this point, we've either found a good fit, or gotten all the way to
+         * total_weight. In either case, we can start allocating slots. */
+        slots = GF_CALLOC (total_slots, sizeof (iot_ns_queue_t *), 0);
+        slots_idx = 0;
+        rep = 0;
+
+        if (!slots) {
+                ret = -(ENOMEM);
+                goto out;
+        }
+
+        /* Allocate slots, with some fun modulo-math to make sure that they're
+         * well distributed. */
+        while (total_slots != slots_idx) {
+                dict_foreach_inline (conf->ns_queues, curr) {
+                        curr_queue = data_to_ptr (curr->value);
+                        curr_queue = &curr_queue[i];
+
+                        if (curr_queue->slots == 0) {
+                                continue;
+                        }
+
+                        if (rep % (total_slots / curr_queue->slots) == 0) {
+                                slots[slots_idx++] = curr_queue;
+                                curr_queue->slots--;
+                        }
+                }
+
+                rep++;
+        }
+
+        /* Set the slots into the queue, and we're ready to go! */
+        conf->ns_clocks[i].slots = slots;
+        conf->ns_clocks[i].size = total_slots;
+        conf->ns_clocks[i].idx = 0;
+
+        ret = 0;
+out:
+        if (ret && slots) {
+                GF_FREE (slots);
+        }
+
+        return ret;
+}
+
+
+void *
+iot_reinit_ns_conf_thread (void *arg)
+{
+        xlator_t        *this       = arg;
+        iot_conf_t      *conf       = this->private;
+        time_t           curr_mtime = {0, };
+
+        while (_gf_true) {
+                sleep (conf->ns_conf_reinit_secs);
+
+                get_file_mtime (_IOT_NAMESPACE_CONF, &curr_mtime);
+                if (conf->ns_weighted_queueing && curr_mtime != conf->ns_conf_mtime) {
+                        pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+                        pthread_mutex_lock (&conf->mutex);
+                        {
+                                __iot_reinit_ns_conf (conf);
+                        }
+                        pthread_mutex_unlock (&conf->mutex);
+                        pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+                } else {
+                        gf_log (GF_IO_THREADS, GF_LOG_DEBUG,
+                                "Config file %s not modified, skipping.",
+                                _IOT_NAMESPACE_CONF);
+                }
+        }
+}
+
+static void
+start_iot_reinit_ns_conf_thread (xlator_t *this)
+{
+        iot_conf_t      *priv = this->private;
+        int              ret;
+
+        if (!priv) {
+                return;
+        }
+
+        if (priv->reinit_ns_conf_thread_running) {
+                gf_log (GF_IO_THREADS, GF_LOG_INFO, "reinit_ns_conf_thread already started.");
+                return;
+        }
+
+        gf_log (GF_IO_THREADS, GF_LOG_INFO, "Starting reinit_ns_conf_thread.");
+
+        ret = pthread_create (&priv->reinit_ns_conf_thread, NULL, iot_reinit_ns_conf_thread, this);
+        if (ret == 0) {
+                priv->reinit_ns_conf_thread_running = _gf_true;
+        } else {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "pthread_create(iot_reinit_ns_conf_thread) failed");
+        }
+}
+
+static void
+stop_iot_reinit_ns_conf_thread (xlator_t *this)
+{
+        iot_conf_t      *priv   = this->private;
+
+        if (!priv) {
+                return;
+        }
+
+        if (!priv->reinit_ns_conf_thread_running) {
+                gf_log (GF_IO_THREADS, GF_LOG_INFO, "reinit_ns_conf_thread already stopped.");
+                return;
+        }
+
+        gf_log (GF_IO_THREADS, GF_LOG_INFO, "Stopping reinit_ns_conf_thread.");
+
+        if (pthread_cancel (priv->reinit_ns_conf_thread) != 0) {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "pthread_cancel(iot_reinit_ns_conf_thread) failed");
+        }
+
+        if (pthread_join (priv->reinit_ns_conf_thread, NULL) != 0) {
+                gf_log (this->name, GF_LOG_WARNING,
+                        "pthread_join(iot_reinit_ns_conf_thread) failed");
+        }
+
+        /* Failure probably means it's already dead. */
+        priv->reinit_ns_conf_thread_running = _gf_false;
+}
+
 call_stub_t *
-__iot_dequeue (iot_conf_t *conf, int *pri, struct timespec *sleep)
+__iot_dequeue (iot_conf_t *conf, int *pri)
 {
-        call_stub_t  *stub = NULL;
-        int           i = 0;
-	struct timeval curtv = {0,}, difftv = {0,};
+        call_stub_t    *stub = NULL;
+        iot_ns_clock_t *curr_clock;
+        iot_ns_queue_t *queue = NULL;
+        int             i, start_idx;
 
         *pri = -1;
-	sleep->tv_sec = 0;
-	sleep->tv_nsec = 0;
+
         for (i = 0; i < IOT_PRI_MAX; i++) {
-                if (list_empty (&conf->reqs[i]) ||
-                   (conf->ac_iot_count[i] >= conf->ac_iot_limit[i]))
+                if (conf->ac_iot_count[i] >= conf->ac_iot_limit[i] ||
+                    conf->queue_sizes[i] == 0) {
+                        /* If we have too many workers currently serving this
+                         * priority level, or no reqs at this level, skip. */
                         continue;
+                }
 
-		if (i == IOT_PRI_LEAST) {
-			pthread_mutex_lock(&conf->throttle.lock);
-			if (!conf->throttle.sample_time.tv_sec) {
-				/* initialize */
-				gettimeofday(&conf->throttle.sample_time, NULL);
-			} else {
-				/*
-				 * Maintain a running count of least priority
-				 * operations that are handled over a particular
-				 * time interval. The count is provided via
-				 * state dump and is used as a measure against
-				 * least priority op throttling.
-				 */
-				gettimeofday(&curtv, NULL);
-				timersub(&curtv, &conf->throttle.sample_time,
-					 &difftv);
-				if (difftv.tv_sec >= IOT_LEAST_THROTTLE_DELAY) {
-					conf->throttle.cached_rate =
-						conf->throttle.sample_cnt;
-					conf->throttle.sample_cnt = 0;
-					conf->throttle.sample_time = curtv;
-				}
-
-				/*
-				 * If we're over the configured rate limit,
-				 * provide an absolute time to the caller that
-				 * represents the soonest we're allowed to
-				 * return another least priority request.
-				 */
-				if (conf->throttle.rate_limit &&
-				    conf->throttle.sample_cnt >=
-						conf->throttle.rate_limit) {
-					struct timeval delay;
-					delay.tv_sec = IOT_LEAST_THROTTLE_DELAY;
-					delay.tv_usec = 0;
-
-					timeradd(&conf->throttle.sample_time,
-						 &delay, &curtv);
-					TIMEVAL_TO_TIMESPEC(&curtv, sleep);
-
-					pthread_mutex_unlock(
-						&conf->throttle.lock);
-					break;
-				}
-			}
-			conf->throttle.sample_cnt++;
-			pthread_mutex_unlock(&conf->throttle.lock);
-		}
-
-                stub = list_entry (conf->reqs[i].next, call_stub_t, list);
+                if (conf->ns_weighted_queueing) {
+                        /* Get the clock for this priority level, and keep track
+                         * of where the search started, so we know if we've
+                         * searched through the whole clock. */
+                        curr_clock = &conf->ns_clocks[i];
+                        start_idx = curr_clock->idx;
+
+                        do {
+                                /* Get the queue for the current index (modulo
+                                 * size), and increment the clock forward. */
+                                queue = curr_clock->slots[curr_clock->idx];
+                                curr_clock->idx = (curr_clock->idx + 1) % curr_clock->size;
+
+                                /* If we have a request to serve, then we're done. */
+                                if (!__iot_ns_queue_empty (queue)) {
+                                        break;
+                                }
+
+                                /* Otherwise, keep searching until we've looped
+                                 * back to the start. */
+                                queue = NULL;
+                        } while (curr_clock->idx != start_idx);
+
+                        /* If we have no queue, we must've not found a req to
+                         * serve. Let's try the next priority. */
+                        if (!queue) {
+                                continue;
+                        }
+                } else {
+                        /* If our unknown queue is empty, we have no other
+                         * queues to serve. */
+                        if (__iot_ns_queue_empty (&conf->ns_unknown_queue[i])) {
+                                continue;
+                        }
+
+                        /* Select the unknown queue as the next queue to
+                         * serve a request from. */
+                        queue = &conf->ns_unknown_queue[i];
+                }
+
+                /* Otherwise take a request off of the queue. */
+                stub = list_first_entry (&queue->reqs, call_stub_t, list);
+                list_del_init (&stub->list);
+
+                /* Increment the number of workers serving this priority,
+                 * and record which priority we are serving. Update queue
+                 * sizes and set `pri` variable for the caller. */
                 conf->ac_iot_count[i]++;
                 conf->queue_marked[i] = _gf_false;
+
+                conf->queue_size--;
+                conf->queue_sizes[i]--;
+                queue->size--;
+
                 *pri = i;
                 break;
         }
 
-        if (!stub)
-                return NULL;
-
-        conf->queue_size--;
-        conf->queue_sizes[*pri]--;
-        list_del_init (&stub->list);
-
         return stub;
 }
 
@@ -135,15 +680,29 @@ __iot_dequeue (iot_conf_t *conf, int *pri, struct timespec *sleep)
 void
 __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri)
 {
-        if (pri < 0 || pri >= IOT_PRI_MAX)
-                pri = IOT_PRI_MAX-1;
+        ns_info_t    *info = &stub->frame->root->ns_info;
+        iot_ns_queue_t *queue = 0;
+
+        /* If we have an invalid priority, set it to LEAST. */
+        if (pri < 0 || pri >= IOT_PRI_MAX) {
+                pri = IOT_PRI_MAX - 1;
+        }
 
-        list_add_tail (&stub->list, &conf->reqs[pri]);
+        /* Get the queue for this namespace. If we don't have one,
+         * use the unknown queue that always exists in the conf struct. */
+        queue = __iot_get_ns_queue (conf, info);
+        if (!queue) {
+                queue = conf->ns_unknown_queue;
+        }
+
+        /* Get the (pri)'th level queue, and add the request to the queue. */
+        queue = &queue[pri];
+        list_add_tail (&stub->list, &queue->reqs);
 
+        /* Update size records. */
         conf->queue_size++;
         conf->queue_sizes[pri]++;
-
-        return;
+        queue->size++;
 }
 
 
@@ -156,28 +715,24 @@ iot_worker (void *data)
         struct timespec   sleep_till = {0, };
         int               ret = 0;
         int               pri = -1;
-	struct timespec	  sleep = {0,};
-        gf_boolean_t      bye = _gf_false;
+        char              timeout = 0;
+        char              bye = 0;
 
         conf = data;
         this = conf->this;
         THIS = this;
+        gf_log (GF_IO_THREADS, GF_LOG_DEBUG, "IOT worker spawned.");
 
-        for (;;) {
+        while (_gf_true) {
                 pthread_mutex_lock (&conf->mutex);
                 {
                         if (pri != -1) {
                                 conf->ac_iot_count[pri]--;
                                 pri = -1;
                         }
-                        while (conf->queue_size == 0) {
-                                if (conf->down) {
-                                        bye = _gf_true;/*Avoid sleep*/
-                                        break;
-                                }
 
-                                clock_gettime (CLOCK_REALTIME_COARSE,
-                                               &sleep_till);
+                        while (conf->queue_size == 0) {
+                                clock_gettime (CLOCK_REALTIME_COARSE, &sleep_till);
                                 sleep_till.tv_sec += conf->idle_time;
 
                                 conf->sleep_count++;
@@ -186,55 +741,52 @@ iot_worker (void *data)
                                                               &sleep_till);
                                 conf->sleep_count--;
 
-                                if (conf->down || ret == ETIMEDOUT) {
-                                        bye = _gf_true;
+                                if (ret == ETIMEDOUT) {
+                                        timeout = 1;
                                         break;
                                 }
                         }
 
-                        if (bye) {
-                                if (conf->down ||
-                                    conf->curr_count > IOT_MIN_THREADS) {
+                        if (timeout) {
+                                if (conf->curr_count > IOT_MIN_THREADS) {
                                         conf->curr_count--;
-                                        if (conf->curr_count == 0)
-                                           pthread_cond_broadcast (&conf->cond);
-                                        gf_msg_debug (conf->this->name, 0,
-                                                      "terminated. "
-                                                      "conf->curr_count=%d",
-                                                      conf->curr_count);
+                                        bye = 1;
+                                        gf_log (conf->this->name, GF_LOG_DEBUG,
+                                                "timeout, terminated. conf->curr_count=%d",
+                                                conf->curr_count);
                                 } else {
-                                        bye = _gf_false;
+                                        timeout = 0;
                                 }
                         }
 
-                        if (!bye) {
-                                stub = __iot_dequeue (conf, &pri, &sleep);
-                                if (!stub && (sleep.tv_sec || sleep.tv_nsec)) {
-                                        pthread_cond_timedwait(&conf->cond,
-                                                               &conf->mutex,
-                                                               &sleep);
-                                        pthread_mutex_unlock(&conf->mutex);
-                                        continue;
-                                }
-                        }
+                        stub = __iot_dequeue (conf, &pri);
                 }
                 pthread_mutex_unlock (&conf->mutex);
 
                 if (stub) { /* guard against spurious wakeups */
                         if (stub->poison) {
                                 gf_log (this->name, GF_LOG_INFO,
-                                        "Dropping poisoned request %p.", stub);
+                                        "dropping poisoned request %p", stub);
                                 call_stub_destroy (stub);
                         } else {
                                 call_resume (stub);
                         }
                 }
-                stub = NULL;
 
-                if (bye)
+                if (bye) {
                         break;
+                }
+        }
+
+        if (pri != -1) {
+                pthread_mutex_lock (&conf->mutex);
+                {
+                        conf->ac_iot_count[pri]--;
+                }
+                pthread_mutex_unlock (&conf->mutex);
         }
 
+        gf_log (GF_IO_THREADS, GF_LOG_DEBUG, "IOT worker died.");
         return NULL;
 }
 
@@ -273,6 +825,9 @@ iot_get_pri_meaning (iot_pri_t pri)
 {
         char    *name = NULL;
         switch (pri) {
+        case IOT_PRI_UNSPEC:
+                name = "unspecified";
+                break;
         case IOT_PRI_HI:
                 name = "fast";
                 break;
@@ -288,13 +843,11 @@ iot_get_pri_meaning (iot_pri_t pri)
         case IOT_PRI_MAX:
                 name = "invalid";
                 break;
-        case IOT_PRI_UNSPEC:
-                name = "unspecified";
-                break;
         }
         return name;
 }
 
+
 int
 iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
 {
@@ -307,75 +860,17 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub)
                 goto out;
         }
 
-        switch (stub->fop) {
-        case GF_FOP_OPEN:
-        case GF_FOP_STAT:
-        case GF_FOP_FSTAT:
-        case GF_FOP_LOOKUP:
-        case GF_FOP_ACCESS:
-        case GF_FOP_READLINK:
-        case GF_FOP_OPENDIR:
-        case GF_FOP_STATFS:
-        case GF_FOP_READDIR:
-        case GF_FOP_READDIRP:
-        case GF_FOP_GETACTIVELK:
-        case GF_FOP_SETACTIVELK:
-                pri = IOT_PRI_HI;
-                break;
-
-        case GF_FOP_CREATE:
-        case GF_FOP_FLUSH:
-        case GF_FOP_LK:
-        case GF_FOP_INODELK:
-        case GF_FOP_FINODELK:
-        case GF_FOP_ENTRYLK:
-        case GF_FOP_FENTRYLK:
-        case GF_FOP_LEASE:
-        case GF_FOP_UNLINK:
-        case GF_FOP_SETATTR:
-        case GF_FOP_FSETATTR:
-        case GF_FOP_MKNOD:
-        case GF_FOP_MKDIR:
-        case GF_FOP_RMDIR:
-        case GF_FOP_SYMLINK:
-        case GF_FOP_RENAME:
-        case GF_FOP_LINK:
-        case GF_FOP_SETXATTR:
-        case GF_FOP_GETXATTR:
-        case GF_FOP_FGETXATTR:
-        case GF_FOP_FSETXATTR:
-        case GF_FOP_REMOVEXATTR:
-        case GF_FOP_FREMOVEXATTR:
-                pri = IOT_PRI_NORMAL;
-                break;
+        if (frame->pri != IOT_PRI_UNSPEC) {
+                pri = frame->pri;
+                goto out;
+        }
 
-        case GF_FOP_READ:
-        case GF_FOP_WRITE:
-        case GF_FOP_FSYNC:
-        case GF_FOP_TRUNCATE:
-        case GF_FOP_FTRUNCATE:
-        case GF_FOP_FSYNCDIR:
-        case GF_FOP_XATTROP:
-        case GF_FOP_FXATTROP:
-        case GF_FOP_RCHECKSUM:
-	case GF_FOP_FALLOCATE:
-	case GF_FOP_DISCARD:
-        case GF_FOP_ZEROFILL:
-                pri = IOT_PRI_LO;
-                break;
+        // Retrieve the fop priority
+        pri = iot_fop_to_pri (stub->fop);
 
-        case GF_FOP_FORGET:
-        case GF_FOP_RELEASE:
-        case GF_FOP_RELEASEDIR:
-        case GF_FOP_GETSPEC:
-                break;
-        case GF_FOP_IPC:
-        default:
-                return -EINVAL;
-        }
 out:
-        gf_msg_debug (this->name, 0, "%s scheduled as %s fop",
-                      gf_fop_list[stub->fop], iot_get_pri_meaning (pri));
+        gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop",
+                gf_fop_list[stub->fop], iot_get_pri_meaning (pri));
         ret = do_iot_schedule (this->private, stub, pri);
         return ret;
 }
@@ -619,34 +1114,115 @@ iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
 }
 
 
+/* Populate all queue size keys for a specific priority level. */
 int
-iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
-              const char *name, dict_t *xdata)
+__iot_populate_ns_queue_sizes (iot_conf_t *conf, dict_t *depths, int pri)
+{
+        int             ret        = 0;
+        data_pair_t    *curr       = NULL;
+        iot_ns_queue_t *curr_queue = NULL;
+        char           *temp_key   = NULL;
+        char           *ns_name    = NULL;
+        const char     *pri_str    = fop_pri_to_string (pri);;
+
+        /* For each namespace at this priority level, we try to grab the n
+         * namespace name and record a key like `namespaces.NAME.PRI_LEVEL`. */
+        dict_foreach_inline (conf->ns_queues, curr) {
+                curr_queue = data_to_ptr (curr->value);
+                curr_queue = &curr_queue[pri];
+
+                /* Try to retrieve the namespace's un-hashed real name. */
+                ret = dict_get_str (conf->hash_to_ns, curr->key, &ns_name);
+                if (ret || !ns_name) {
+                        continue;
+                }
+
+                /* Give the root namespace a readable name. */
+                if (strncmp (ns_name, "/", 1) == 0) {
+                        ns_name = "root";
+                }
+
+                /* Print a new temporary key for the namespace and priority level. */
+                ret = gf_asprintf (&temp_key, "namespaces.%s.%s", ns_name, pri_str);
+                if (ret == -1 || !temp_key) {
+                        ret = -(ENOMEM);
+                        goto out;
+                }
+
+                /* Insert the key and queue-size. */
+                ret = dict_set_int32 (depths, temp_key, curr_queue->size);
+		GF_FREE (temp_key);
+		temp_key = NULL;
+
+                if (ret) {
+                        goto out;
+                }
+        }
+
+out:
+        return ret;
+}
+
+/* Populate global queue counts (per-priority) and if namespace queueing is
+ * enabled, then also add those keys to the dictionary as well. */
+int
+__iot_populate_queue_sizes (iot_conf_t *conf, dict_t **depths)
 {
-        iot_conf_t *conf = NULL;
-        dict_t     *depths = NULL;
-        int i = 0;
+        int         ret = 0, pri = 0;
+        const char *pri_str = NULL;
 
-        conf = this->private;
+        /* We explicitly do not want a reference count for this dict
+         * in this translator, since it will get freed in io_stats later. */
+        *depths = get_new_dict ();
+        if (!*depths) {
+                return -(ENOMEM);
+        }
 
-        if (conf && name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
-                // We explicitly do not want a reference count
-                // for this dict in this translator
-                depths = get_new_dict ();
-                if (!depths)
-                        goto unwind_special_getxattr;
+        for (pri = 0; pri < IOT_PRI_MAX; pri++) {
+                pri_str = fop_pri_to_string (pri);
 
-                for (i = 0; i < IOT_PRI_MAX; i++) {
-                        if (dict_set_int32 (depths,
-                                            (char *)fop_pri_to_string (i),
-                                            conf->queue_sizes[i]) != 0) {
-                                dict_destroy (depths);
-                                depths = NULL;
-                                goto unwind_special_getxattr;
+                /* First, let's add an entry for the number of requests
+                 * per prority (globally). */
+                ret = dict_set_int32 (*depths, (char *)pri_str,
+                                      conf->queue_sizes[pri]);
+                if (ret) {
+                        goto out;
+                }
+
+                /* If namespace queueing is enabled, then try to populate
+                 * per-namespace queue keys as well. */
+                if (conf->ns_weighted_queueing) {
+                        ret = __iot_populate_ns_queue_sizes (conf, *depths, pri);
+                        if (ret) {
+                                goto out;
                         }
                 }
+        }
+
+out:
+        if (ret) {
+                dict_destroy (*depths);
+                *depths = NULL;
+        }
+
+        return ret;
+}
+
+int
+iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+              const char *name, dict_t *xdata)
+{
+        iot_conf_t     *conf       = this->private;
+        dict_t         *depths     = NULL;
+
+        if (name && strcmp (name, IO_THREADS_QUEUE_SIZE_KEY) == 0) {
+                /* Populate depths dict, or it'll stay NULL if an error occurred. */
+                pthread_mutex_lock (&conf->mutex);
+                {
+                        __iot_populate_queue_sizes (conf, &depths);
+                }
+                pthread_mutex_unlock (&conf->mutex);
 
-unwind_special_getxattr:
                 STACK_UNWIND_STRICT (getxattr, frame, 0, 0, depths, xdata);
                 return 0;
         }
@@ -824,10 +1400,9 @@ __iot_workers_scale (iot_conf_t *conf)
                 ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf);
                 if (ret == 0) {
                         conf->curr_count++;
-                        gf_msg_debug (conf->this->name, 0,
-                                      "scaled threads to %d (queue_size=%d/%d)",
-                                      conf->curr_count,
-                                      conf->queue_size, scale);
+                        gf_log (conf->this->name, GF_LOG_DEBUG,
+                                "scaled threads to %d (queue_size=%d/%d)",
+                                conf->curr_count, conf->queue_size, scale);
                 } else {
                         break;
                 }
@@ -872,13 +1447,11 @@ set_stack_size (iot_conf_t *conf)
         if (err == EINVAL) {
                 err = pthread_attr_getstacksize (&conf->w_attr, &stacksize);
                 if (!err)
-                        gf_msg (this->name, GF_LOG_WARNING,
-                                0, IO_THREADS_MSG_SIZE_NOT_SET,
+                        gf_log (this->name, GF_LOG_WARNING,
                                 "Using default thread stack size %zd",
                                 stacksize);
                 else
-                        gf_msg (this->name, GF_LOG_WARNING,
-                                0, IO_THREADS_MSG_SIZE_NOT_SET,
+                        gf_log (this->name, GF_LOG_WARNING,
                                 "Using default thread stack size");
         }
 
@@ -897,9 +1470,8 @@ mem_acct_init (xlator_t *this)
         ret = xlator_mem_acct_init (this, gf_iot_mt_end + 1);
 
         if (ret != 0) {
-                gf_msg (this->name, GF_LOG_ERROR,
-                        ENOMEM, IO_THREADS_MSG_NO_MEMORY,
-                        "Memory accounting init failed");
+                gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+                                "failed");
                 return ret;
         }
 
@@ -938,10 +1510,6 @@ iot_priv_dump (xlator_t *this)
         gf_proc_dump_write("least_priority_threads", "%d",
                            conf->ac_iot_limit[IOT_PRI_LEAST]);
 
-	gf_proc_dump_write("cached least rate", "%u",
-			   conf->throttle.cached_rate);
-	gf_proc_dump_write("least rate limit", "%u", conf->throttle.rate_limit);
-
         return 0;
 }
 
@@ -1107,13 +1675,16 @@ stop_iot_watchdog (xlator_t *this)
 int
 reconfigure (xlator_t *this, dict_t *options)
 {
-	iot_conf_t      *conf = NULL;
-	int		 ret = -1;
+	iot_conf_t      *conf         = NULL;
+	int		 i, ret = -1;
+        gf_boolean_t     ns_weighted_queueing = _gf_false;
 
         conf = this->private;
         if (!conf)
                 goto out;
 
+        GF_OPTION_RECONF ("iam-brick-daemon", conf->iambrickd, options, bool, out);
+
         GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out);
 
         GF_OPTION_RECONF ("fops-per-thread-ratio", conf->fops_per_thread_ratio,
@@ -1132,25 +1703,52 @@ reconfigure (xlator_t *this, dict_t *options)
         GF_OPTION_RECONF ("least-prio-threads",
                           conf->ac_iot_limit[IOT_PRI_LEAST], options, int32,
                           out);
+
         GF_OPTION_RECONF ("enable-least-priority", conf->least_priority,
                           options, bool, out);
 
-	GF_OPTION_RECONF ("least-rate-limit", conf->throttle.rate_limit,
-                          options, int32, out);
-
         GF_OPTION_RECONF ("cleanup-disconnected-reqs",
                           conf->cleanup_disconnected_reqs, options, bool, out);
 
         GF_OPTION_RECONF ("watchdog-secs", conf->watchdog_secs, options,
                           int32, out);
-
         if (conf->watchdog_secs > 0) {
                 start_iot_watchdog (this);
         } else {
                 stop_iot_watchdog (this);
         }
 
-	ret = 0;
+        GF_OPTION_RECONF ("ns-weighted-queueing", ns_weighted_queueing, options, bool, out);
+        GF_OPTION_RECONF ("ns-default-weight", conf->ns_default_weight, options, uint32, out);
+        GF_OPTION_RECONF ("ns-weight-tolerance", conf->ns_weight_tolerance, options, double, out);
+        GF_OPTION_RECONF ("ns-conf-reinit-secs", conf->ns_conf_reinit_secs, options,
+                          uint32, out);
+
+        if (!conf->iambrickd) {
+                ns_weighted_queueing = _gf_false;
+        }
+
+        /* Reinit the default weight, which is the weight of the unknown queue. */
+        for (i = 0; i < IOT_PRI_MAX; i++) {
+                conf->ns_unknown_queue[i].weight = conf->ns_default_weight;
+        }
+
+        if (ns_weighted_queueing != conf->ns_weighted_queueing) {
+                pthread_mutex_lock (&conf->mutex);
+                {
+                        conf->ns_weighted_queueing = ns_weighted_queueing;
+                        __iot_reinit_ns_conf (conf);
+                }
+                pthread_mutex_unlock (&conf->mutex);
+        }
+
+        if (conf->ns_weighted_queueing) {
+                start_iot_reinit_ns_conf_thread (this);
+        } else {
+                stop_iot_reinit_ns_conf_thread (this);
+        }
+
+        ret = 0;
 out:
 	return ret;
 }
@@ -1160,49 +1758,43 @@ int
 init (xlator_t *this)
 {
         iot_conf_t *conf = NULL;
-        int         ret  = -1;
-        int         i    = 0;
+        int         i, ret = -1;
 
 	if (!this->children || this->children->next) {
-		gf_msg ("io-threads", GF_LOG_ERROR, 0,
-                        IO_THREADS_MSG_XLATOR_CHILD_MISCONFIGURED,
-			"FATAL: iot not configured "
-                        "with exactly one child");
+		gf_log ("io-threads", GF_LOG_ERROR,
+			"FATAL: iot not configured with exactly one child");
                 goto out;
 	}
 
 	if (!this->parents) {
-		gf_msg (this->name, GF_LOG_WARNING, 0,
-			IO_THREADS_MSG_VOL_MISCONFIGURED,
-                        "dangling volume. check volfile ");
+		gf_log (this->name, GF_LOG_WARNING,
+			"dangling volume. check volfile ");
 	}
 
 	conf = (void *) GF_CALLOC (1, sizeof (*conf),
                                    gf_iot_mt_iot_conf_t);
         if (conf == NULL) {
-                gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
-                        IO_THREADS_MSG_NO_MEMORY, "out of memory");
+                gf_log (this->name, GF_LOG_ERROR,
+                        "out of memory");
                 goto out;
         }
 
-        if ((ret = pthread_cond_init(&conf->cond, NULL)) != 0) {
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        IO_THREADS_MSG_INIT_FAILED,
+        if ((ret = pthread_cond_init (&conf->cond, NULL)) != 0) {
+                gf_log (this->name, GF_LOG_ERROR,
                         "pthread_cond_init failed (%d)", ret);
                 goto out;
         }
-        conf->cond_inited = _gf_true;
 
-        if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        IO_THREADS_MSG_INIT_FAILED,
+        if ((ret = pthread_mutex_init (&conf->mutex, NULL)) != 0) {
+                gf_log (this->name, GF_LOG_ERROR,
                         "pthread_mutex_init failed (%d)", ret);
                 goto out;
         }
-        conf->mutex_inited = _gf_true;
 
         set_stack_size (conf);
 
+        GF_OPTION_INIT ("iam-brick-daemon", conf->iambrickd, bool, out);
+
         GF_OPTION_INIT ("thread-count", conf->max_count, int32, out);
 
         GF_OPTION_INIT ("fops-per-thread-ratio", conf->fops_per_thread_ratio,
@@ -1228,33 +1820,45 @@ init (xlator_t *this)
         GF_OPTION_INIT ("cleanup-disconnected-reqs",
                         conf->cleanup_disconnected_reqs, bool, out);
 
-	GF_OPTION_INIT ("least-rate-limit", conf->throttle.rate_limit, int32,
-		        out);
+        conf->this = this;
+
+        GF_OPTION_INIT ("ns-weighted-queueing", conf->ns_weighted_queueing, bool, out);
+        GF_OPTION_INIT ("ns-default-weight", conf->ns_default_weight, uint32, out);
+        GF_OPTION_INIT ("ns-weight-tolerance", conf->ns_weight_tolerance, double, out);
+        GF_OPTION_INIT ("ns-conf-reinit-secs", conf->ns_conf_reinit_secs, uint32, out);
 
-        if ((ret = pthread_mutex_init(&conf->throttle.lock, NULL)) != 0) {
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        IO_THREADS_MSG_INIT_FAILED,
-                        "pthread_mutex_init failed (%d)", ret);
-                goto out;
+        for (i = 0; i < IOT_PRI_MAX; i++) {
+                INIT_LIST_HEAD (&conf->ns_unknown_queue[i].reqs);
+                conf->ns_unknown_queue[i].hash = 0;
+                conf->ns_unknown_queue[i].weight = conf->ns_default_weight;
+                conf->ns_unknown_queue[i].size = 0;
         }
 
-        conf->this = this;
+        if (!conf->iambrickd) {
+                conf->ns_weighted_queueing = _gf_false;
+        }
 
-        for (i = 0; i < IOT_PRI_MAX; i++) {
-                INIT_LIST_HEAD (&conf->reqs[i]);
+        conf->hash_to_ns = dict_new ();
+
+        pthread_mutex_lock (&conf->mutex);
+        {
+                __iot_reinit_ns_conf (conf);
         }
+        pthread_mutex_unlock (&conf->mutex);
 
 	ret = iot_workers_scale (conf);
-
         if (ret == -1) {
-                gf_msg (this->name, GF_LOG_ERROR, 0,
-                        IO_THREADS_MSG_INIT_FAILED,
+                gf_log (this->name, GF_LOG_ERROR,
                         "cannot initialize worker threads, exiting init");
                 goto out;
         }
 
 	this->private = conf;
 
+        if (conf->ns_weighted_queueing) {
+                start_iot_reinit_ns_conf_thread (this);
+        }
+
         conf->watchdog_secs = 0;
         GF_OPTION_INIT ("watchdog-secs", conf->watchdog_secs, int32, out);
         if (conf->watchdog_secs > 0) {
@@ -1263,57 +1867,24 @@ init (xlator_t *this)
 
         ret = 0;
 out:
-        if (ret)
+        if (ret) {
                 GF_FREE (conf);
+        }
 
 	return ret;
 }
 
-static void
-iot_exit_threads (iot_conf_t *conf)
-{
-        pthread_mutex_lock (&conf->mutex);
-        {
-                conf->down = _gf_true;
-                /*Let all the threads know that xl is going down*/
-                pthread_cond_broadcast (&conf->cond);
-                while (conf->curr_count)/*Wait for threads to exit*/
-                        pthread_cond_wait (&conf->cond, &conf->mutex);
-        }
-        pthread_mutex_unlock (&conf->mutex);
-}
-
-int
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
-        iot_conf_t *conf = this->private;
-
-        if (GF_EVENT_PARENT_DOWN == event)
-                iot_exit_threads (conf);
-
-        default_notify (this, event, data);
-
-        return 0;
-}
 
 void
 fini (xlator_t *this)
 {
 	iot_conf_t *conf = this->private;
 
-        if (!conf)
-                return;
-
-        if (conf->mutex_inited && conf->cond_inited)
-                iot_exit_threads (conf);
-
-        if (conf->cond_inited)
-                pthread_cond_destroy (&conf->cond);
-
-        if (conf->mutex_inited)
-                pthread_mutex_destroy (&conf->mutex);
-
         stop_iot_watchdog (this);
+        stop_iot_reinit_ns_conf_thread (this);
+
+        dict_unref (conf->hash_to_ns);
+        conf->hash_to_ns = NULL;
 
 	GF_FREE (conf);
 
@@ -1321,13 +1892,32 @@ fini (xlator_t *this)
 	return;
 }
 
+/* Clears a queue of requests from the given client (which is assumed to
+ * have disconnected or otherwise stopped needing these requests...)  */
+void
+clear_reqs_from_queue (xlator_t *this, client_t *client, struct list_head *queue)
+{
+        call_stub_t     *curr;
+        call_stub_t     *next;
+
+        list_for_each_entry_safe (curr, next, queue, list) {
+                if (curr->frame->root->client != client) {
+                        continue;
+                }
+                gf_log (this->name, GF_LOG_INFO,
+                        "poisoning %s fop at %p for client %s",
+                        gf_fop_list[curr->fop], curr, client->client_uid);
+                curr->poison = _gf_true;
+        }
+}
+
 static int
 iot_disconnect_cbk (xlator_t *this, client_t *client)
 {
+        iot_conf_t     *conf       = this->private;
+        data_pair_t    *curr       = NULL;
+        iot_ns_queue_t *curr_queue = NULL;
         int             i;
-        call_stub_t     *curr;
-        call_stub_t     *next;
-        iot_conf_t      *conf   = this->private;
 
         if (!conf || !conf->cleanup_disconnected_reqs) {
                 goto out;
@@ -1335,15 +1925,15 @@ iot_disconnect_cbk (xlator_t *this, client_t *client)
 
         pthread_mutex_lock (&conf->mutex);
         for (i = 0; i < IOT_PRI_MAX; i++) {
-                list_for_each_entry_safe (curr, next, &conf->reqs[i], list) {
-                        if (curr->frame->root->client != client) {
-                                continue;
+                /* Clear client reqs from the unknown queue. */
+                clear_reqs_from_queue (this, client, &conf->ns_unknown_queue[i].reqs);
+                /* Clear client reqs from each of the namespace queues. */
+                if (conf->ns_weighted_queueing && conf->ns_queues) {
+                        dict_foreach_inline (conf->ns_queues, curr) {
+                                curr_queue = data_to_ptr (curr->value);
+                                curr_queue = &curr_queue[i];
+                                clear_reqs_from_queue (this, client, &curr_queue->reqs);
                         }
-                        gf_log (this->name, GF_LOG_INFO,
-                                "poisoning %s fop at %p for client %s",
-                                gf_fop_list[curr->fop], curr,
-                                client->client_uid);
-                        curr->poison = _gf_true;
                 }
         }
         pthread_mutex_unlock (&conf->mutex);
@@ -1412,7 +2002,7 @@ struct volume_options options[] = {
 	  .type = GF_OPTION_TYPE_INT,
 	  .min  = IOT_MIN_THREADS,
 	  .max  = IOT_MAX_THREADS,
-          .default_value = "16",
+          .default_value = "32",
           .description = "Number of threads in IO threads translator which "
                          "perform concurrent IO operations"
 
@@ -1435,7 +2025,7 @@ struct volume_options options[] = {
 	  .type = GF_OPTION_TYPE_INT,
 	  .min  = IOT_MIN_THREADS,
 	  .max  = IOT_MAX_THREADS,
-          .default_value = "16",
+          .default_value = "32",
           .description = "Max number of threads in IO threads translator which "
                          "perform high priority IO operations at a given time"
 
@@ -1444,7 +2034,7 @@ struct volume_options options[] = {
 	  .type = GF_OPTION_TYPE_INT,
 	  .min  = IOT_MIN_THREADS,
 	  .max  = IOT_MAX_THREADS,
-          .default_value = "16",
+          .default_value = "32",
           .description = "Max number of threads in IO threads translator which "
                          "perform normal priority IO operations at a given time"
 
@@ -1453,7 +2043,7 @@ struct volume_options options[] = {
 	  .type = GF_OPTION_TYPE_INT,
 	  .min  = IOT_MIN_THREADS,
 	  .max  = IOT_MAX_THREADS,
-          .default_value = "16",
+          .default_value = "32",
           .description = "Max number of threads in IO threads translator which "
                          "perform low priority IO operations at a given time"
 
@@ -1477,14 +2067,6 @@ struct volume_options options[] = {
           .max   = 0x7fffffff,
           .default_value = "120",
         },
-	{ .key	= {"least-rate-limit"},
-	  .type	= GF_OPTION_TYPE_INT,
-	  .min	= 0,
-          .max	= INT_MAX,
-	  .default_value = "0",
-	  .description = "Max number of least priority operations to handle "
-			"per-second"
-	},
         { .key   = {"watchdog-secs"},
           .type  = GF_OPTION_TYPE_INT,
           .min   = 0,
@@ -1495,7 +2077,47 @@ struct volume_options options[] = {
         { .key  = {"cleanup-disconnected-reqs"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
-          .description = "'Poison' queued requests when a client disconnects"
+          .description = "Enable/Disable least priority"
+        },
+        { .key  = {"ns-weighted-queueing"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "Enables the namespace queues clock."
+        },
+        { .key   = {"ns-default-weight"},
+          .type  = GF_OPTION_TYPE_INT,
+          .min   = 1,
+          .max   = 0x7fffffff,
+          .default_value = "100",
+          .description = "The default weight of a queue which doesn't have a "
+                         "weight specified in the namespace conf. This is also "
+                         "the weight of the unknown (default) queue."
+        },
+        { .key   = {"ns-weight-tolerance"},
+          .type  = GF_OPTION_TYPE_DOUBLE,
+          .default_value = "0.5",
+          .description = "The tolerance in percentage (out of 100) that the "
+                         "slot-allocation algorithm will tolerate for weight/total "
+                         "and slots/total percentages. This corresponds to "
+                         "ideal and realized workload percentages allocated "
+                         "to the namespace."
+        },
+        { .key   = {"ns-conf-reinit-secs"},
+          .type  = GF_OPTION_TYPE_INT,
+          .min   = 1,
+          .max   = 0x7fffffff,
+          .default_value = "5",
+          .description = "Number of seconds that the ns conf reinit thread "
+                         "sleeps before trying to detect changes in the "
+                         "namespace config file."
+        },
+        {
+            .key = {"iam-brick-daemon"},
+            .type = GF_OPTION_TYPE_BOOL,
+            .default_value = "off",
+            .description = "This option differentiates if the io-stats "
+                           "translator is running as part of brick daemon "
+                           "or not."
         },
 	{ .key  = {NULL},
         },
diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h
index 4300cf673b2..5c3403a7153 100644
--- a/xlators/performance/io-threads/src/io-threads.h
+++ b/xlators/performance/io-threads/src/io-threads.h
@@ -11,6 +11,11 @@
 #ifndef __IOT_H
 #define __IOT_H
 
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
 
 #include "compat-errno.h"
 #include "glusterfs.h"
@@ -25,6 +30,8 @@
 #include <semaphore.h>
 #include "statedump.h"
 
+#define GF_IO_THREADS "io-threads"
+const char *_IOT_NAMESPACE_CONF = TOSTRING (GLUSTERD_WORKDIR) "/namespaces.conf";
 
 struct iot_conf;
 
@@ -38,23 +45,33 @@ struct iot_conf;
 #define IOT_MIN_FOP_PER_THREAD  0
 #define IOT_MAX_FOP_PER_THREAD  2000
 
-
-#define IOT_THREAD_STACK_SIZE   ((size_t)(1024*1024))
-
-
-#define IOT_LEAST_THROTTLE_DELAY 1	/* sample interval in seconds */
-struct iot_least_throttle {
-	struct timeval	sample_time;	/* timestamp of current sample */
-	uint32_t	sample_cnt;	/* sample count for active interval */
-	uint32_t	cached_rate;	/* the most recently measured rate */
-	int32_t		rate_limit;	/* user-specified rate limit */
-	pthread_mutex_t	lock;
-};
+/* A queue (well, typically a set of IOT_PRI_MAX queues) that corresponds to
+ * a namespace and a priority level. */
+typedef struct _iot_ns_queue {
+        uint32_t         hash;   /* Hash of namespace queue corresponds to. */
+        uint32_t         weight; /* Weight this queue should have in the clock. */
+        double           percentage; /* Percentage of total weight given to this queue. */
+        uint32_t         slots; /* Temp variable for number of slots to allocate to this queue. */
+        uint32_t         size; /* Number of reqs in the queue currently. */
+        struct list_head reqs;   /* Queue of fop call-stubs (requests) */
+} iot_ns_queue_t;
+
+/* A circular buffer which points to multiple queues which approximate a
+ * weighted round robin serving requests. */
+typedef struct _iot_ns_clock {
+        unsigned int     idx;   /* Current idx of position on clock. */
+        size_t           size;  /* Size of clock. */
+        iot_ns_queue_t **slots; /* Circular buffer of clock slots. */
+} iot_ns_clock_t;
+
+#define IOT_THREAD_STACK_SIZE   ((size_t)(1024*1024*8))
 
 struct iot_conf {
         pthread_mutex_t      mutex;
         pthread_cond_t       cond;
 
+        gf_boolean_t         iambrickd;
+
         int32_t              max_count;   /* configured maximum */
         int32_t              fops_per_thread_ratio;
         int32_t              curr_count;  /* actual number of threads running */
@@ -62,30 +79,39 @@ struct iot_conf {
 
         int32_t              idle_time;   /* in seconds */
 
-        struct list_head     reqs[IOT_PRI_MAX];
+        gf_boolean_t         ns_weighted_queueing;
+        uint32_t             ns_default_weight;
+        double               ns_weight_tolerance;
+        uint32_t             ns_conf_reinit_secs;
+        time_t               ns_conf_mtime;
+        dict_t              *ns_queues; /* Queue for requsts for namespaces that were parsed. */
+        dict_t              *hash_to_ns; /* Hash key (string) -> Namespace string */
+        iot_ns_clock_t       ns_clocks[IOT_PRI_MAX]; /* Weighted Round Robin clocks (per priority). */
+        iot_ns_queue_t       ns_unknown_queue[IOT_PRI_MAX];  /* Queue for untagged requests (per priority). */
+
+        gf_boolean_t         reinit_ns_conf_thread_running;
+        pthread_t            reinit_ns_conf_thread;
 
         int32_t              ac_iot_limit[IOT_PRI_MAX];
         int32_t              ac_iot_count[IOT_PRI_MAX];
         int                  queue_sizes[IOT_PRI_MAX];
         int                  queue_size;
         pthread_attr_t       w_attr;
-        gf_boolean_t         least_priority; /*Enable/Disable least-priority */
+        gf_boolean_t         least_priority; /* Enable/Disable least-priority */
 
-        xlator_t            *this;
+        xlator_t           *this;
         size_t              stack_size;
 
-        gf_boolean_t         down; /*PARENT_DOWN event is notified*/
-        gf_boolean_t         mutex_inited;
-        gf_boolean_t         cond_inited;
-	      struct              iot_least_throttle throttle;
-
         int32_t             watchdog_secs;
         gf_boolean_t        watchdog_running;
         pthread_t           watchdog_thread;
         gf_boolean_t        queue_marked[IOT_PRI_MAX];
+
 	gf_boolean_t        cleanup_disconnected_reqs;
 };
 
 typedef struct iot_conf iot_conf_t;
 
+iot_pri_t iot_fop_to_pri (glusterfs_fop_t fop);
+
 #endif /* __IOT_H */
author	Jeff Darcy <jdarcy@fb.com>	2017-09-15 06:59:01 -0700
committer	Jeff Darcy <jdarcy@fb.com>	2017-09-15 13:47:01 -0700
commit	8dfdecf220d1c9365e1f8d6af9ead5e48c61e2eb (patch)
tree	bccd5906be43cf81792248b06099006525ed0c27
parent	e4b47b5d54644c398c424a99116a0cc37e4431d6 (diff)