diff options
Diffstat (limited to 'xlators/features/bit-rot')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/Makefile.am | 4 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-scrub.c | 2 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-tbf.c | 306 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-tbf.h | 70 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 30 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 4 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h | 4 |
7 files changed, 29 insertions, 391 deletions
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am index a915f2d34b8..dfa29fd72d9 100644 --- a/xlators/features/bit-rot/src/bitd/Makefile.am +++ b/xlators/features/bit-rot/src/bitd/Makefile.am @@ -9,12 +9,12 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(CONTRIBDIR)/timer-wheel \ -I$(top_srcdir)/xlators/features/bit-rot/src/stub -bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit-rot-ssm.c \ +bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-ssm.c \ bit-rot-scrub-status.c bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la -noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h bit-rot-bitd-messages.h bit-rot-ssm.h \ +noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-bitd-messages.h bit-rot-ssm.h \ bit-rot-scrub-status.h AM_CFLAGS = -Wall -DBR_RATE_LIMIT_SIGNER $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c index a5885642b89..7678d8e6ed2 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c @@ -1938,7 +1938,7 @@ br_scrubber_init (xlator_t *this, br_private_t *priv) struct br_scrubber *fsscrub = NULL; int ret = 0; - priv->tbf = br_tbf_init (NULL, 0); + priv->tbf = tbf_init (NULL, 0); if (!priv->tbf) return -1; diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c deleted file mode 100644 index f8b9b75d575..00000000000 --- a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c +++ /dev/null @@ -1,306 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/** - * - * Basic token bucket implementation for rate limiting. As of now interfaces - * to throttle disk read request, directory entry scan and hash calculation - * are available. To throttle a particular request (operation), the call needs - * to be wrapped in-between throttling APIs, for e.g. - * - * TBF_THROTTLE_BEGIN (...); <-- induces "delays" if required - * { - * call (...); - * } - * TBF_THROTTLE_END (...); <-- not used atm, maybe needed later - * - */ - -#include "mem-pool.h" -#include "bit-rot-tbf.h" -#include "bit-rot-stub-mem-types.h" - -typedef struct br_tbf_throttle { - char done; - - pthread_mutex_t mutex; - pthread_cond_t cond; - - unsigned long tokens; - - struct list_head list; -} br_tbf_throttle_t; - -/** - * OK. Most implementations of TBF I've come across generate tokens - * every second (UML, etc..) and some chose sub-second granularity - * (blk-iothrottle cgroups). TBF algorithm itself does not enforce - * any logic for choosing generation interval and it seems pretty - * logical as one could jack up token count per interval w.r.t. - * generation rate. - * - * Value used here is chosen based on a series of test(s) performed - * to balance object signing time and not maxing out on all available - * CPU cores. It's obvious to have seconds granularity and jack up - * token count per interval, thereby achieving close to similar - * results. Let's stick to this as it seems to be working fine for - * the set of ops that are throttled. - */ -#define BR_TBF_TOKENGEN_INTERVAL_USEC 600000 - -static br_tbf_throttle_t * -br_tbf_init_throttle (unsigned long tokens_required) -{ - br_tbf_throttle_t *throttle = NULL; - - throttle = GF_CALLOC (1, sizeof (*throttle), - gf_br_mt_br_tbf_throttle_t); - if (!throttle) - return NULL; - - throttle->done = 0; - throttle->tokens = tokens_required; - INIT_LIST_HEAD (&throttle->list); - - (void) pthread_mutex_init (&throttle->mutex, NULL); - (void) pthread_cond_init (&throttle->cond, NULL); - - return throttle; -} - -void -_br_tbf_dispatch_queued (br_tbf_bucket_t *bucket) -{ - gf_boolean_t xcont = _gf_false; - br_tbf_throttle_t *tmp = NULL; - br_tbf_throttle_t *throttle = NULL; - - list_for_each_entry_safe (throttle, tmp, &bucket->queued, list) { - - pthread_mutex_lock (&throttle->mutex); - { - if (bucket->tokens < throttle->tokens) { - xcont = _gf_true; - goto unblock; - } - - /* this request can now be serviced */ - throttle->done = 1; - list_del_init (&throttle->list); - - bucket->tokens -= throttle->tokens; - pthread_cond_signal (&throttle->cond); - } - unblock: - pthread_mutex_unlock (&throttle->mutex); - if (xcont) - break; - } -} - -void *br_tbf_tokengenerator (void *arg) -{ - unsigned long tokenrate = 0; - unsigned long maxtokens = 0; - br_tbf_bucket_t *bucket = arg; - - tokenrate = bucket->tokenrate; - maxtokens = bucket->maxtokens; - - while (1) { - usleep (BR_TBF_TOKENGEN_INTERVAL_USEC); - - LOCK (&bucket->lock); - { - bucket->tokens += tokenrate; - if (bucket->tokens > maxtokens) - bucket->tokens = maxtokens; - - if (!list_empty (&bucket->queued)) - _br_tbf_dispatch_queued (bucket); - } - UNLOCK (&bucket->lock); - } - - return NULL; -} - -/** - * There is lazy synchronization between this routine (when invoked - * under br_tbf_mod() context) and br_tbf_throttle(). *bucket is - * updated _after_ all the required variables are initialized. - */ -static int32_t -br_tbf_init_bucket (br_tbf_t *tbf, br_tbf_opspec_t *spec) -{ - int ret = 0; - br_tbf_bucket_t *curr = NULL; - br_tbf_bucket_t **bucket = NULL; - - GF_ASSERT (spec->op >= BR_TBF_OP_MIN); - GF_ASSERT (spec->op <= BR_TBF_OP_MAX); - - /* no rate? no throttling. */ - if (!spec->rate) - return 0; - - bucket = tbf->bucket + spec->op; - - curr = GF_CALLOC (1, sizeof (*curr), gf_br_mt_br_tbf_bucket_t); - if (!curr) - goto error_return; - - LOCK_INIT (&curr->lock); - INIT_LIST_HEAD (&curr->queued); - - curr->tokens = 0; - curr->tokenrate = spec->rate; - curr->maxtokens = spec->maxlimit; - - ret = gf_thread_create (&curr->tokener, - NULL, br_tbf_tokengenerator, curr); - if (ret != 0) - goto freemem; - - *bucket = curr; - return 0; - - freemem: - LOCK_DESTROY (&curr->lock); - GF_FREE (curr); - error_return: - return -1; -} - -#define BR_TBF_ALLOC_SIZE \ - (sizeof (br_tbf_t) + (BR_TBF_OP_MAX * sizeof (br_tbf_bucket_t))) - -br_tbf_t * -br_tbf_init (br_tbf_opspec_t *tbfspec, unsigned int count) -{ - int32_t i = 0; - int32_t ret = 0; - br_tbf_t *tbf = NULL; - br_tbf_opspec_t *opspec = NULL; - - tbf = GF_CALLOC (1, BR_TBF_ALLOC_SIZE, gf_br_mt_br_tbf_t); - if (!tbf) - goto error_return; - - tbf->bucket = (br_tbf_bucket_t **) ((char *)tbf + sizeof (*tbf)); - for (i = 0; i < BR_TBF_OP_MAX; i++) { - *(tbf->bucket + i) = NULL; - } - - for (i = 0; i < count; i++) { - opspec = tbfspec + i; - - ret = br_tbf_init_bucket (tbf, opspec); - if (ret) - break; - } - - if (ret) - goto error_return; - - return tbf; - - error_return: - return NULL; -} - -static void -br_tbf_mod_bucket (br_tbf_bucket_t *bucket, br_tbf_opspec_t *spec) -{ - LOCK (&bucket->lock); - { - bucket->tokens = 0; - bucket->tokenrate = spec->rate; - bucket->maxtokens = spec->maxlimit; - } - UNLOCK (&bucket->lock); - - /* next token tick would unqueue pending operations */ -} - -int -br_tbf_mod (br_tbf_t *tbf, br_tbf_opspec_t *tbfspec) -{ - int ret = 0; - br_tbf_bucket_t *bucket = NULL; - br_tbf_ops_t op = BR_TBF_OP_MIN; - - if (!tbf || !tbfspec) - return -1; - - op = tbfspec->op; - - GF_ASSERT (op >= BR_TBF_OP_MIN); - GF_ASSERT (op <= BR_TBF_OP_MAX); - - bucket = *(tbf->bucket + op); - if (bucket) { - br_tbf_mod_bucket (bucket, tbfspec); - } else { - ret = br_tbf_init_bucket (tbf, tbfspec); - } - - return ret; -} - -void -br_tbf_throttle (br_tbf_t *tbf, br_tbf_ops_t op, unsigned long tokens_requested) -{ - char waitq = 0; - br_tbf_bucket_t *bucket = NULL; - br_tbf_throttle_t *throttle = NULL; - - GF_ASSERT (op >= BR_TBF_OP_MIN); - GF_ASSERT (op <= BR_TBF_OP_MAX); - - bucket = *(tbf->bucket + op); - if (!bucket) - return; - - LOCK (&bucket->lock); - { - /** - * if there are enough tokens in the bucket there is no need - * to throttle the request: therefore, consume the required - * number of tokens and continue. - */ - if (tokens_requested <= bucket->tokens) { - bucket->tokens -= tokens_requested; - } else { - throttle = br_tbf_init_throttle (tokens_requested); - if (!throttle) /* let it slip through for now.. */ - goto unblock; - - waitq = 1; - pthread_mutex_lock (&throttle->mutex); - list_add_tail (&throttle->list, &bucket->queued); - } - } - unblock: - UNLOCK (&bucket->lock); - - if (waitq) { - while (!throttle->done) { - pthread_cond_wait (&throttle->cond, &throttle->mutex); - } - - pthread_mutex_unlock (&throttle->mutex); - - pthread_mutex_destroy (&throttle->mutex); - pthread_cond_destroy (&throttle->cond); - - GF_FREE (throttle); - } -} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h deleted file mode 100644 index 5a41be4fd95..00000000000 --- a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include "list.h" -#include "xlator.h" -#include "locking.h" - -#ifndef __BIT_ROT_TBF_H__ -#define __BIT_ROT_TBF_H__ - -typedef enum br_tbf_ops { - BR_TBF_OP_MIN = -1, - BR_TBF_OP_HASH = 0, /* checksum calculation */ - BR_TBF_OP_READ = 1, /* inode read(s) */ - BR_TBF_OP_READDIR = 2, /* dentry read(s) */ - BR_TBF_OP_MAX = 3, -} br_tbf_ops_t; - -/** - * Operation rate specification - */ -typedef struct br_tbf_opspec { - br_tbf_ops_t op; - - unsigned long rate; - - unsigned long maxlimit; -} br_tbf_opspec_t; - -/** - * Token bucket for each operation type - */ -typedef struct br_tbf_bucket { - gf_lock_t lock; - - pthread_t tokener; /* token generator thread */ - - unsigned long tokenrate; /* token generation rate */ - - unsigned long tokens; /* number of current tokens */ - - unsigned long maxtokens; /* maximum token in the bucket */ - - struct list_head queued; /* list of non-conformant requests */ -} br_tbf_bucket_t; - -typedef struct br_tbf { - br_tbf_bucket_t **bucket; -} br_tbf_t; - -br_tbf_t * -br_tbf_init (br_tbf_opspec_t *, unsigned int); - -int -br_tbf_mod (br_tbf_t *, br_tbf_opspec_t *); - -void -br_tbf_throttle (br_tbf_t *, br_tbf_ops_t, unsigned long); - -#define TBF_THROTTLE_BEGIN(tbf, op, tokens) (br_tbf_throttle (tbf, op, tokens)) -#define TBF_THROTTLE_END(tbf, op, tokens) (void) - -#endif /** __BIT_ROT_TBF_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 174af2b6a15..ca3fc273e9f 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -278,7 +278,7 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, off_t offset, size_t size, SHA256_CTX *sha256) { int32_t ret = -1; - br_tbf_t *tbf = NULL; + tbf_t *tbf = NULL; struct iovec *iovec = NULL; struct iobref *iobref = NULL; br_private_t *priv = NULL; @@ -311,12 +311,12 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, goto out; for (i = 0; i < count; i++) { - TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); + TBF_THROTTLE_BEGIN (tbf, TBF_OP_HASH, iovec[i].iov_len); { SHA256_Update (sha256, (const unsigned char *) (iovec[i].iov_base), iovec[i].iov_len); } - TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); + TBF_THROTTLE_BEGIN (tbf, TBF_OP_HASH, iovec[i].iov_len); } out: @@ -1756,14 +1756,32 @@ static int32_t br_rate_limit_signer (xlator_t *this, int child_count, int numbricks) { br_private_t *priv = NULL; - br_tbf_opspec_t spec = {0,}; + tbf_opspec_t spec = {0,}; priv = this->private; - spec.op = BR_TBF_OP_HASH; + spec.op = TBF_OP_HASH; spec.rate = 0; spec.maxlimit = 0; +/** + * OK. Most implementations of TBF I've come across generate tokens + * every second (UML, etc..) and some chose sub-second granularity + * (blk-iothrottle cgroups). TBF algorithm itself does not enforce + * any logic for choosing generation interval and it seems pretty + * logical as one could jack up token count per interval w.r.t. + * generation rate. + * + * Value used here is chosen based on a series of test(s) performed + * to balance object signing time and not maxing out on all available + * CPU cores. It's obvious to have seconds granularity and jack up + * token count per interval, thereby achieving close to similar + * results. Let's stick to this as it seems to be working fine for + * the set of ops that are throttled. + **/ + spec.token_gen_interval = 600000; /* In usec */ + + #ifdef BR_RATE_LIMIT_SIGNER double contribution = 0; @@ -1783,7 +1801,7 @@ br_rate_limit_signer (xlator_t *this, int child_count, int numbricks) "[Rate Limit Info] \"tokens/sec (rate): %lu, " "maxlimit: %lu\"", spec.rate, spec.maxlimit); - priv->tbf = br_tbf_init (&spec, 1); + priv->tbf = tbf_init (&spec, 1); return priv->tbf ? 0 : -1; } diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index 8e9267048be..b5448f76d52 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -21,7 +21,7 @@ #include "changelog.h" #include "timer-wheel.h" -#include "bit-rot-tbf.h" +#include "throttle-tbf.h" #include "bit-rot-ssm.h" #include "bit-rot-common.h" @@ -209,7 +209,7 @@ struct br_private { uint32_t expiry_time; /* objects "wait" time */ - br_tbf_t *tbf; /* token bucket filter */ + tbf_t *tbf; /* token bucket filter */ gf_boolean_t iamscrubber; /* function as a fs scrubber */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index f70fafbca49..a33577cf598 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -22,10 +22,6 @@ enum br_mem_types { gf_br_mt_br_child_t, gf_br_mt_br_object_t, gf_br_mt_br_ob_n_wk_t, - gf_br_mt_br_tbf_t, - gf_br_mt_br_tbf_bucket_t, - gf_br_mt_br_tbf_throttle_t, - gf_br_mt_br_tbf_opspec_t, gf_br_mt_br_scrubber_t, gf_br_mt_br_fsscan_entry_t, gf_br_stub_mt_br_stub_fd_t, |