1 files changed, 306 insertions, 0 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c
new file mode 100644
index 00000000000..d9543416540
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c
@@ -0,0 +1,306 @@
+/*
+   Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+/**
+ *
+ * Basic token bucket implementation for rate limiting. As of now interfaces
+ * to throttle disk read request, directory entry scan and hash calculation
+ * are available. To throttle a particular request (operation), the call needs
+ * to be wrapped in-between throttling APIs, for e.g.
+ *
+ *  TBF_THROTTLE_BEGIN (...);  <-- induces "delays" if required
+ *  {
+ *      call (...);
+ *  }
+ *  TBF_THROTTLE_END (...);  <-- not used atm, maybe needed later
+ *
+ */
+
+#include "mem-pool.h"
+#include "bit-rot-tbf.h"
+#include "bit-rot-stub-mem-types.h"
+
+typedef struct br_tbf_throttle {
+        char done;
+
+        pthread_mutex_t mutex;
+        pthread_cond_t  cond;
+
+        unsigned long tokens;
+
+        struct list_head list;
+} br_tbf_throttle_t;
+
+/**
+ * OK. Most implementations of TBF I've come across generate tokens
+ * every second (UML, etc..) and some chose sub-second granularity
+ * (blk-iothrottle cgroups). TBF algorithm itself does not enforce
+ * any logic for choosing generation interval and it seems pretty
+ * logical as one could jack up token count per interval w.r.t.
+ * generation rate.
+ *
+ * Value used here is chosen based on a series of test(s) performed
+ * to balance object signing time and not maxing out on all available
+ * CPU cores. It's obvious to have seconds granularity and jack up
+ * token count per interval, thereby achieving close to similar
+ * results. Let's stick to this as it seems to be working fine for
+ * the set of ops that are throttled.
+ */
+#define BR_TBF_TOKENGEN_INTERVAL_USEC  600000
+
+static inline br_tbf_throttle_t *
+br_tbf_init_throttle (unsigned long tokens_required)
+{
+        br_tbf_throttle_t *throttle = NULL;
+
+        throttle = GF_CALLOC (1, sizeof (*throttle),
+                              gf_br_mt_br_tbf_throttle_t);
+        if (!throttle)
+                return NULL;
+
+        throttle->done = 0;
+        throttle->tokens = tokens_required;
+        INIT_LIST_HEAD (&throttle->list);
+
+        (void) pthread_mutex_init (&throttle->mutex, NULL);
+        (void) pthread_cond_init (&throttle->cond, NULL);
+
+        return throttle;
+}
+
+void
+_br_tbf_dispatch_queued (br_tbf_bucket_t *bucket)
+{
+        gf_boolean_t xcont = _gf_false;
+        br_tbf_throttle_t *tmp = NULL;
+        br_tbf_throttle_t *throttle = NULL;
+
+        list_for_each_entry_safe (throttle, tmp, &bucket->queued, list) {
+
+                pthread_mutex_lock (&throttle->mutex);
+                {
+                        if (bucket->tokens < throttle->tokens) {
+                                xcont = _gf_true;
+                                goto unblock;
+                        }
+
+                        /* this request can now be serviced */
+                        throttle->done = 1;
+                        list_del_init (&throttle->list);
+
+                        bucket->tokens -= throttle->tokens;
+                        pthread_cond_signal (&throttle->cond);
+                }
+        unblock:
+                pthread_mutex_unlock (&throttle->mutex);
+                if (xcont)
+                        break;
+        }
+}
+
+void *br_tbf_tokengenerator (void *arg)
+{
+        unsigned long tokenrate = 0;
+        unsigned long maxtokens = 0;
+        br_tbf_bucket_t *bucket = arg;
+
+        tokenrate = bucket->tokenrate;
+        maxtokens = bucket->maxtokens;
+
+        while (1) {
+                usleep (BR_TBF_TOKENGEN_INTERVAL_USEC);
+
+                LOCK (&bucket->lock);
+                {
+                        bucket->tokens += tokenrate;
+                        if (bucket->tokens > maxtokens)
+                                bucket->tokens = maxtokens;
+
+                        if (!list_empty (&bucket->queued))
+                                _br_tbf_dispatch_queued (bucket);
+                }
+                UNLOCK (&bucket->lock);
+        }
+
+        return NULL;
+}
+
+/**
+ * There is lazy synchronization between this routine (when invoked
+ * under br_tbf_mod() context) and br_tbf_throttle(). *bucket is
+ * updated _after_ all the required variables are initialized.
+ */
+static inline int32_t
+br_tbf_init_bucket (br_tbf_t *tbf, br_tbf_opspec_t *spec)
+{
+        int ret = 0;
+        br_tbf_bucket_t *curr = NULL;
+        br_tbf_bucket_t **bucket = NULL;
+
+        GF_ASSERT (spec->op >= BR_TBF_OP_MIN);
+        GF_ASSERT (spec->op <= BR_TBF_OP_MAX);
+
+        /* no rate? no throttling. */
+        if (!spec->rate)
+                return 0;
+
+        bucket = tbf->bucket + spec->op;
+
+        curr = GF_CALLOC (1, sizeof (*curr), gf_br_mt_br_tbf_bucket_t);
+        if (!curr)
+                goto error_return;
+
+        LOCK_INIT (&curr->lock);
+        INIT_LIST_HEAD (&curr->queued);
+
+        curr->tokens = 0;
+        curr->tokenrate = spec->rate;
+        curr->maxtokens = spec->maxlimit;
+
+        ret = gf_thread_create (&curr->tokener,
+                                NULL, br_tbf_tokengenerator, curr);
+        if (ret != 0)
+                goto freemem;
+
+        *bucket = curr;
+        return 0;
+
+ freemem:
+        LOCK_DESTROY (&curr->lock);
+        GF_FREE (curr);
+ error_return:
+        return -1;
+}
+
+#define BR_TBF_ALLOC_SIZE                                               \
+        (sizeof (br_tbf_t) + (BR_TBF_OP_MAX * sizeof (br_tbf_bucket_t)))
+
+br_tbf_t *
+br_tbf_init (br_tbf_opspec_t *tbfspec, unsigned int count)
+{
+        int32_t i = 0;
+        int32_t ret = 0;
+        br_tbf_t *tbf = NULL;
+        br_tbf_opspec_t *opspec = NULL;
+
+        tbf = GF_CALLOC (1, BR_TBF_ALLOC_SIZE, gf_br_mt_br_tbf_t);
+        if (!tbf)
+                goto error_return;
+
+        tbf->bucket = (br_tbf_bucket_t **) ((char *)tbf + sizeof (*tbf));
+        for (i = 0; i < BR_TBF_OP_MAX; i++) {
+                *(tbf->bucket + i) = NULL;
+        }
+
+        for (i = 0; i < count; i++) {
+                opspec = tbfspec + i;
+
+                ret = br_tbf_init_bucket (tbf, opspec);
+                if (ret)
+                        break;
+        }
+
+        if (ret)
+                goto error_return;
+
+        return tbf;
+
+ error_return:
+        return NULL;
+}
+
+static void
+br_tbf_mod_bucket (br_tbf_bucket_t *bucket, br_tbf_opspec_t *spec)
+{
+        LOCK (&bucket->lock);
+        {
+                bucket->tokens = 0;
+                bucket->tokenrate = spec->rate;
+                bucket->maxtokens = spec->maxlimit;
+        }
+        UNLOCK (&bucket->lock);
+
+        /* next token tick would unqueue pending operations */
+}
+
+int
+br_tbf_mod (br_tbf_t *tbf, br_tbf_opspec_t *tbfspec)
+{
+        int              ret    = 0;
+        br_tbf_bucket_t *bucket = NULL;
+        br_tbf_ops_t     op     = BR_TBF_OP_MIN;
+
+        if (!tbf || !tbfspec)
+                return -1;
+
+        op = tbfspec->op;
+
+        GF_ASSERT (op >= BR_TBF_OP_MIN);
+        GF_ASSERT (op <= BR_TBF_OP_MAX);
+
+        bucket = *(tbf->bucket + op);
+        if (bucket) {
+                br_tbf_mod_bucket (bucket, tbfspec);
+        } else {
+                ret = br_tbf_init_bucket (tbf, tbfspec);
+        }
+
+        return ret;
+}
+
+void
+br_tbf_throttle (br_tbf_t *tbf, br_tbf_ops_t op, unsigned long tokens_requested)
+{
+        char waitq = 0;
+        br_tbf_bucket_t *bucket = NULL;
+        br_tbf_throttle_t *throttle = NULL;
+
+        GF_ASSERT (op >= BR_TBF_OP_MIN);
+        GF_ASSERT (op <= BR_TBF_OP_MAX);
+
+        bucket = *(tbf->bucket + op);
+        if (!bucket)
+                return;
+
+        LOCK (&bucket->lock);
+        {
+                /**
+                 * if there are enough tokens in the bucket there is no need
+                 * to throttle the request: therefore, consume the required
+                 * number of tokens and continue.
+                 */
+                if (tokens_requested <= bucket->tokens) {
+                        bucket->tokens -= tokens_requested;
+                } else {
+                        throttle = br_tbf_init_throttle (tokens_requested);
+                        if (!throttle) /* let it slip through for now.. */
+                                goto unblock;
+
+                        waitq = 1;
+                        pthread_mutex_lock (&throttle->mutex);
+                        list_add_tail (&throttle->list, &bucket->queued);
+                }
+        }
+ unblock:
+        UNLOCK (&bucket->lock);
+
+        if (waitq) {
+                while (!throttle->done) {
+                        pthread_cond_wait (&throttle->cond, &throttle->mutex);
+                }
+
+                pthread_mutex_unlock (&throttle->mutex);
+
+                pthread_mutex_destroy (&throttle->mutex);
+                pthread_cond_destroy (&throttle->cond);
+
+                GF_FREE (throttle);
+        }
+}