diff options
Diffstat (limited to 'xlators/features/bit-rot/src')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/Makefile.am | 4 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-tbf.c | 306 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot-tbf.h | 70 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 47 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.h | 11 | ||||
-rw-r--r-- | xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h | 3 |
6 files changed, 432 insertions, 9 deletions
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am index 160e3b653df..f67fa1a3acd 100644 --- a/xlators/features/bit-rot/src/bitd/Makefile.am +++ b/xlators/features/bit-rot/src/bitd/Makefile.am @@ -9,11 +9,11 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(CONTRIBDIR)/timer-wheel \ -I$(top_srcdir)/xlators/features/bit-rot/src/stub -bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c +bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-tbf.c bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la -noinst_HEADERS = bit-rot.h bit-rot-scrub.h +noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-tbf.h AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c new file mode 100644 index 00000000000..d9543416540 --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.c @@ -0,0 +1,306 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/** + * + * Basic token bucket implementation for rate limiting. As of now interfaces + * to throttle disk read request, directory entry scan and hash calculation + * are available. To throttle a particular request (operation), the call needs + * to be wrapped in-between throttling APIs, for e.g. + * + * TBF_THROTTLE_BEGIN (...); <-- induces "delays" if required + * { + * call (...); + * } + * TBF_THROTTLE_END (...); <-- not used atm, maybe needed later + * + */ + +#include "mem-pool.h" +#include "bit-rot-tbf.h" +#include "bit-rot-stub-mem-types.h" + +typedef struct br_tbf_throttle { + char done; + + pthread_mutex_t mutex; + pthread_cond_t cond; + + unsigned long tokens; + + struct list_head list; +} br_tbf_throttle_t; + +/** + * OK. Most implementations of TBF I've come across generate tokens + * every second (UML, etc..) and some chose sub-second granularity + * (blk-iothrottle cgroups). TBF algorithm itself does not enforce + * any logic for choosing generation interval and it seems pretty + * logical as one could jack up token count per interval w.r.t. + * generation rate. + * + * Value used here is chosen based on a series of test(s) performed + * to balance object signing time and not maxing out on all available + * CPU cores. It's obvious to have seconds granularity and jack up + * token count per interval, thereby achieving close to similar + * results. Let's stick to this as it seems to be working fine for + * the set of ops that are throttled. + */ +#define BR_TBF_TOKENGEN_INTERVAL_USEC 600000 + +static inline br_tbf_throttle_t * +br_tbf_init_throttle (unsigned long tokens_required) +{ + br_tbf_throttle_t *throttle = NULL; + + throttle = GF_CALLOC (1, sizeof (*throttle), + gf_br_mt_br_tbf_throttle_t); + if (!throttle) + return NULL; + + throttle->done = 0; + throttle->tokens = tokens_required; + INIT_LIST_HEAD (&throttle->list); + + (void) pthread_mutex_init (&throttle->mutex, NULL); + (void) pthread_cond_init (&throttle->cond, NULL); + + return throttle; +} + +void +_br_tbf_dispatch_queued (br_tbf_bucket_t *bucket) +{ + gf_boolean_t xcont = _gf_false; + br_tbf_throttle_t *tmp = NULL; + br_tbf_throttle_t *throttle = NULL; + + list_for_each_entry_safe (throttle, tmp, &bucket->queued, list) { + + pthread_mutex_lock (&throttle->mutex); + { + if (bucket->tokens < throttle->tokens) { + xcont = _gf_true; + goto unblock; + } + + /* this request can now be serviced */ + throttle->done = 1; + list_del_init (&throttle->list); + + bucket->tokens -= throttle->tokens; + pthread_cond_signal (&throttle->cond); + } + unblock: + pthread_mutex_unlock (&throttle->mutex); + if (xcont) + break; + } +} + +void *br_tbf_tokengenerator (void *arg) +{ + unsigned long tokenrate = 0; + unsigned long maxtokens = 0; + br_tbf_bucket_t *bucket = arg; + + tokenrate = bucket->tokenrate; + maxtokens = bucket->maxtokens; + + while (1) { + usleep (BR_TBF_TOKENGEN_INTERVAL_USEC); + + LOCK (&bucket->lock); + { + bucket->tokens += tokenrate; + if (bucket->tokens > maxtokens) + bucket->tokens = maxtokens; + + if (!list_empty (&bucket->queued)) + _br_tbf_dispatch_queued (bucket); + } + UNLOCK (&bucket->lock); + } + + return NULL; +} + +/** + * There is lazy synchronization between this routine (when invoked + * under br_tbf_mod() context) and br_tbf_throttle(). *bucket is + * updated _after_ all the required variables are initialized. + */ +static inline int32_t +br_tbf_init_bucket (br_tbf_t *tbf, br_tbf_opspec_t *spec) +{ + int ret = 0; + br_tbf_bucket_t *curr = NULL; + br_tbf_bucket_t **bucket = NULL; + + GF_ASSERT (spec->op >= BR_TBF_OP_MIN); + GF_ASSERT (spec->op <= BR_TBF_OP_MAX); + + /* no rate? no throttling. */ + if (!spec->rate) + return 0; + + bucket = tbf->bucket + spec->op; + + curr = GF_CALLOC (1, sizeof (*curr), gf_br_mt_br_tbf_bucket_t); + if (!curr) + goto error_return; + + LOCK_INIT (&curr->lock); + INIT_LIST_HEAD (&curr->queued); + + curr->tokens = 0; + curr->tokenrate = spec->rate; + curr->maxtokens = spec->maxlimit; + + ret = gf_thread_create (&curr->tokener, + NULL, br_tbf_tokengenerator, curr); + if (ret != 0) + goto freemem; + + *bucket = curr; + return 0; + + freemem: + LOCK_DESTROY (&curr->lock); + GF_FREE (curr); + error_return: + return -1; +} + +#define BR_TBF_ALLOC_SIZE \ + (sizeof (br_tbf_t) + (BR_TBF_OP_MAX * sizeof (br_tbf_bucket_t))) + +br_tbf_t * +br_tbf_init (br_tbf_opspec_t *tbfspec, unsigned int count) +{ + int32_t i = 0; + int32_t ret = 0; + br_tbf_t *tbf = NULL; + br_tbf_opspec_t *opspec = NULL; + + tbf = GF_CALLOC (1, BR_TBF_ALLOC_SIZE, gf_br_mt_br_tbf_t); + if (!tbf) + goto error_return; + + tbf->bucket = (br_tbf_bucket_t **) ((char *)tbf + sizeof (*tbf)); + for (i = 0; i < BR_TBF_OP_MAX; i++) { + *(tbf->bucket + i) = NULL; + } + + for (i = 0; i < count; i++) { + opspec = tbfspec + i; + + ret = br_tbf_init_bucket (tbf, opspec); + if (ret) + break; + } + + if (ret) + goto error_return; + + return tbf; + + error_return: + return NULL; +} + +static void +br_tbf_mod_bucket (br_tbf_bucket_t *bucket, br_tbf_opspec_t *spec) +{ + LOCK (&bucket->lock); + { + bucket->tokens = 0; + bucket->tokenrate = spec->rate; + bucket->maxtokens = spec->maxlimit; + } + UNLOCK (&bucket->lock); + + /* next token tick would unqueue pending operations */ +} + +int +br_tbf_mod (br_tbf_t *tbf, br_tbf_opspec_t *tbfspec) +{ + int ret = 0; + br_tbf_bucket_t *bucket = NULL; + br_tbf_ops_t op = BR_TBF_OP_MIN; + + if (!tbf || !tbfspec) + return -1; + + op = tbfspec->op; + + GF_ASSERT (op >= BR_TBF_OP_MIN); + GF_ASSERT (op <= BR_TBF_OP_MAX); + + bucket = *(tbf->bucket + op); + if (bucket) { + br_tbf_mod_bucket (bucket, tbfspec); + } else { + ret = br_tbf_init_bucket (tbf, tbfspec); + } + + return ret; +} + +void +br_tbf_throttle (br_tbf_t *tbf, br_tbf_ops_t op, unsigned long tokens_requested) +{ + char waitq = 0; + br_tbf_bucket_t *bucket = NULL; + br_tbf_throttle_t *throttle = NULL; + + GF_ASSERT (op >= BR_TBF_OP_MIN); + GF_ASSERT (op <= BR_TBF_OP_MAX); + + bucket = *(tbf->bucket + op); + if (!bucket) + return; + + LOCK (&bucket->lock); + { + /** + * if there are enough tokens in the bucket there is no need + * to throttle the request: therefore, consume the required + * number of tokens and continue. + */ + if (tokens_requested <= bucket->tokens) { + bucket->tokens -= tokens_requested; + } else { + throttle = br_tbf_init_throttle (tokens_requested); + if (!throttle) /* let it slip through for now.. */ + goto unblock; + + waitq = 1; + pthread_mutex_lock (&throttle->mutex); + list_add_tail (&throttle->list, &bucket->queued); + } + } + unblock: + UNLOCK (&bucket->lock); + + if (waitq) { + while (!throttle->done) { + pthread_cond_wait (&throttle->cond, &throttle->mutex); + } + + pthread_mutex_unlock (&throttle->mutex); + + pthread_mutex_destroy (&throttle->mutex); + pthread_cond_destroy (&throttle->cond); + + GF_FREE (throttle); + } +} diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h new file mode 100644 index 00000000000..5a41be4fd95 --- /dev/null +++ b/xlators/features/bit-rot/src/bitd/bit-rot-tbf.h @@ -0,0 +1,70 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "list.h" +#include "xlator.h" +#include "locking.h" + +#ifndef __BIT_ROT_TBF_H__ +#define __BIT_ROT_TBF_H__ + +typedef enum br_tbf_ops { + BR_TBF_OP_MIN = -1, + BR_TBF_OP_HASH = 0, /* checksum calculation */ + BR_TBF_OP_READ = 1, /* inode read(s) */ + BR_TBF_OP_READDIR = 2, /* dentry read(s) */ + BR_TBF_OP_MAX = 3, +} br_tbf_ops_t; + +/** + * Operation rate specification + */ +typedef struct br_tbf_opspec { + br_tbf_ops_t op; + + unsigned long rate; + + unsigned long maxlimit; +} br_tbf_opspec_t; + +/** + * Token bucket for each operation type + */ +typedef struct br_tbf_bucket { + gf_lock_t lock; + + pthread_t tokener; /* token generator thread */ + + unsigned long tokenrate; /* token generation rate */ + + unsigned long tokens; /* number of current tokens */ + + unsigned long maxtokens; /* maximum token in the bucket */ + + struct list_head queued; /* list of non-conformant requests */ +} br_tbf_bucket_t; + +typedef struct br_tbf { + br_tbf_bucket_t **bucket; +} br_tbf_t; + +br_tbf_t * +br_tbf_init (br_tbf_opspec_t *, unsigned int); + +int +br_tbf_mod (br_tbf_t *, br_tbf_opspec_t *); + +void +br_tbf_throttle (br_tbf_t *, br_tbf_ops_t, unsigned long); + +#define TBF_THROTTLE_BEGIN(tbf, op, tokens) (br_tbf_throttle (tbf, op, tokens)) +#define TBF_THROTTLE_END(tbf, op, tokens) (void) + +#endif /** __BIT_ROT_TBF_H__ */ diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index d985cc4442c..880b16edfa8 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -27,6 +27,17 @@ #include "tw.h" +#define BR_HASH_CALC_READ_SIZE (128 * 1024) + +br_tbf_opspec_t opthrottle[] = { + { + .op = BR_TBF_OP_HASH, + .rate = BR_HASH_CALC_READ_SIZE, + .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE), + }, + /** TODO: throttle getdents(), read() request(s) */ +}; + static int br_find_child_index (xlator_t *this, xlator_t *child) { @@ -288,8 +299,10 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, off_t offset, size_t size, SHA256_CTX *sha256) { int32_t ret = -1; + br_tbf_t *tbf = NULL; struct iovec *iovec = NULL; struct iobref *iobref = NULL; + br_private_t *priv = NULL; int count = 0; int i = 0; @@ -297,6 +310,12 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, GF_VALIDATE_OR_GOTO (this->name, fd, out); GF_VALIDATE_OR_GOTO (this->name, fd->inode, out); GF_VALIDATE_OR_GOTO (this->name, child, out); + GF_VALIDATE_OR_GOTO (this->name, this->private, out); + + priv = this->private; + + GF_VALIDATE_OR_GOTO (this->name, priv->tbf, out); + tbf = priv->tbf; ret = syncop_readv (child->xl, fd, size, offset, 0, &iovec, &count, &iobref, NULL, @@ -313,9 +332,12 @@ br_object_read_block_and_sign (xlator_t *this, fd_t *fd, br_child_t *child, goto out; for (i = 0; i < count; i++) { - SHA256_Update (sha256, - (const unsigned char *) (iovec[i].iov_base), - iovec[i].iov_len); + TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); + { + SHA256_Update (sha256, (const unsigned char *) + (iovec[i].iov_base), iovec[i].iov_len); + } + TBF_THROTTLE_BEGIN (tbf, BR_TBF_OP_HASH, iovec[i].iov_len); } out: @@ -334,7 +356,7 @@ br_calculate_obj_checksum (unsigned char *md, { int32_t ret = -1; off_t offset = 0; - size_t block = 128 * 1024; /* 128K block size */ + size_t block = BR_HASH_CALC_READ_SIZE; xlator_t *this = NULL; SHA256_CTX sha256; @@ -1358,6 +1380,16 @@ br_init_signer (xlator_t *this, br_private_t *priv) } int32_t +br_init_rate_limiter (br_private_t *priv) +{ + br_tbf_opspec_t *spec = opthrottle; + priv->tbf = br_tbf_init (spec, sizeof (opthrottle) + / sizeof (br_tbf_opspec_t)); + + return priv->tbf ? 0 : -1; +} + +int32_t init (xlator_t *this) { int i = 0; @@ -1411,12 +1443,16 @@ init (xlator_t *this) INIT_LIST_HEAD (&priv->children[i].list); INIT_LIST_HEAD (&priv->bricks); + ret = br_init_rate_limiter (priv); + if (ret) + goto cleanup_mutex; + this->private = priv; if (!priv->iamscrubber) { ret = br_init_signer (this, priv); if (ret) - goto cleanup_mutex; + goto cleanup_tbf; } ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this); @@ -1433,6 +1469,7 @@ init (xlator_t *this) return 0; } + cleanup_tbf: cleanup_mutex: (void) pthread_cond_destroy (&priv->cond); (void) pthread_mutex_destroy (&priv->lock); diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h index a634a1fa76f..5b641801916 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.h +++ b/xlators/features/bit-rot/src/bitd/bit-rot.h @@ -25,13 +25,18 @@ #include "changelog.h" #include "timer-wheel.h" +#include "bit-rot-tbf.h" + #include "bit-rot-common.h" #include "bit-rot-stub-mem-types.h" #include <openssl/sha.h> -/* TODO: make this configurable */ -#define BR_WORKERS 8 +/** + * TODO: make this configurable. As a best practice, set this to the + * number of processor cores. + */ +#define BR_WORKERS 4 #define signature_size(hl) (sizeof (br_isignature_t) + hl + 1) @@ -92,6 +97,8 @@ struct br_private { the objects */ int32_t expiry_time; /* objects "wait" time */ + br_tbf_t *tbf; /* token bucket filter */ + gf_boolean_t iamscrubber; /* function as a fs scrubber */ }; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h index 492278639b4..bb4030493db 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h @@ -22,6 +22,9 @@ enum br_mem_types { gf_br_mt_br_child_t, gf_br_mt_br_object_t, gf_br_mt_br_ob_n_wk_t, + gf_br_mt_br_tbf_t, + gf_br_mt_br_tbf_bucket_t, + gf_br_mt_br_tbf_throttle_t, gf_br_stub_mt_end }; |