diff options
author | Venky Shankar <vshankar@redhat.com> | 2015-04-27 21:34:34 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-05-07 22:51:41 -0700 |
commit | 9ba8963999bca431ec14a25961a163810cfe1e5b (patch) | |
tree | 783f5a29b7cfc63331a88a1ec5d222a7a4c2d57e /xlators/features/bit-rot/src/bitd/bit-rot.c | |
parent | 4ccd70b323d4cb929b7b7a88e592fc98fab06198 (diff) |
features/bitrot: Throttle filesystem scrubber
This patch introduces multithreaded filesystem scrubber based
on throttling option configured for a particular volume. The
implementation "logically" breaks scanning and scrubbing with
the number of scrubber threads auto-configured depending upon
the throttle configuration. Scanning (crawling) is left single
threaded (per brick) with entries scrubbed in bulk. On reaching
this "bulk" watermark, scanner waits until entries are scrubbed.
Bricks for a particular volume have a set of thread(s) assigned
for scrubbing, with entries for each brick scrubbed in a round
robin fashion to avoid scrub "stalls" when a brick (out of N
bricks) is under active scrubbing.
This mechanism helps us implement "pause/resume" with ease: all
one need to do is to cleanup scrubber threads and let the main
scanner thread "wait" untill scrubbing is resumed (where the
scrubber thread(s) are spawned again), therefore continuing
where we left off (unless we restart the deamons, where crawl
initiates from root directory again, but I guess that's OK).
[
NOTE:
Throttling is optional for the signer daemon, without which
it runs full throttle. However, passing "-DBR_RATE_LIMIT_SIGNER"
predefined in CFLAGS enables CPU throttling (during checksum
calculation) thereby avoiding high CPU usage.
]
Subsequent patches would introduce CPU throttling during hash
calculation for scrubber.
Change-Id: I5701dd6cd4dff27ca3144ac5e3798a2216b39d4f
BUG: 1207020
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/10511
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/features/bit-rot/src/bitd/bit-rot.c')
-rw-r--r-- | xlators/features/bit-rot/src/bitd/bit-rot.c | 146 |
1 files changed, 117 insertions, 29 deletions
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index 880b16edfa8..eea81aec53a 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -29,15 +29,6 @@ #define BR_HASH_CALC_READ_SIZE (128 * 1024) -br_tbf_opspec_t opthrottle[] = { - { - .op = BR_TBF_OP_HASH, - .rate = BR_HASH_CALC_READ_SIZE, - .maxlimit = (2 * BR_WORKERS * BR_HASH_CALC_READ_SIZE), - }, - /** TODO: throttle getdents(), read() request(s) */ -}; - static int br_find_child_index (xlator_t *this, xlator_t *child) { @@ -1066,6 +1057,7 @@ br_enact_signer (xlator_t *this, br_child_t *child, br_stub_init_t *stub) child->threadrunning = 1; /* it's OK to continue, "old" objects would be signed when modified */ + list_del_init (&child->list); return 0; dealloc: @@ -1078,14 +1070,45 @@ static inline int32_t br_enact_scrubber (xlator_t *this, br_child_t *child) { int32_t ret = 0; + br_private_t *priv = NULL; + struct br_scanfs *fsscan = NULL; + struct br_scrubber *fsscrub = NULL; + + priv = this->private; + + fsscan = &child->fsscan; + fsscrub = &priv->fsscrub; + + LOCK_INIT (&fsscan->entrylock); + pthread_mutex_init (&fsscan->waitlock, NULL); + pthread_cond_init (&fsscan->waitcond, NULL); - ret = gf_thread_create (&child->thread, NULL, br_scrubber, child); + fsscan->entries = 0; + INIT_LIST_HEAD (&fsscan->queued); + INIT_LIST_HEAD (&fsscan->ready); + + ret = gf_thread_create (&child->thread, NULL, br_fsscanner, child); if (ret != 0) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "failed to spawn scrubber"); + gf_log (this->name, GF_LOG_ALERT, "failed to spawn bitrot " + "scrubber daemon [Brick: %s]", child->brick_path); + goto error_return; } - return ret; + /** + * Everything has been setup.. add this subvolume to scrubbers + * list. + */ + pthread_mutex_lock (&fsscrub->mutex); + { + list_move (&child->list, &fsscrub->scrublist); + pthread_cond_broadcast (&fsscrub->cond); + } + pthread_mutex_unlock (&fsscrub->mutex); + + return 0; + + error_return: + return -1; } /** @@ -1202,8 +1225,7 @@ br_handle_events (void *arg) "failed to connect to the " "child (subvolume: %s)", child->xl->name); - else - list_del_init (&child->list); + } } @@ -1379,16 +1401,72 @@ br_init_signer (xlator_t *this, br_private_t *priv) return -1; } -int32_t -br_init_rate_limiter (br_private_t *priv) +/** + * For signer, only rate limit CPU usage (during hash calculation) when + * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full + * throttle. + */ +static int32_t +br_rate_limit_signer (xlator_t *this, int child_count, int numbricks) { - br_tbf_opspec_t *spec = opthrottle; - priv->tbf = br_tbf_init (spec, sizeof (opthrottle) - / sizeof (br_tbf_opspec_t)); + br_private_t *priv = NULL; + br_tbf_opspec_t spec = {0,}; + + priv = this->private; + + spec.op = BR_TBF_OP_HASH; + spec.rate = 0; + spec.maxlimit = 0; + +#ifdef BR_RATE_LIMIT_SIGNER + + double contribution = 0; + contribution = ((double)1 - ((double)child_count / (double)numbricks)); + if (contribution == 0) + contribution = 1; + spec.rate = BR_HASH_CALC_READ_SIZE * contribution; + spec.maxlimit = BR_WORKERS * BR_HASH_CALC_READ_SIZE; + +#endif + + if (!spec.rate) + gf_log (this->name, + GF_LOG_INFO, "[Rate Limit Info] \"FULL THROTTLE\""); + else + gf_log (this->name, GF_LOG_INFO, + "[Rate Limit Info] \"tokens/sec (rate): %lu, " + "maxlimit: %lu\"", spec.rate, spec.maxlimit); + priv->tbf = br_tbf_init (&spec, 1); return priv->tbf ? 0 : -1; } +static int32_t +br_signer_init (xlator_t *this, br_private_t *priv) +{ + int32_t ret = 0; + int numbricks = 0; + + GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, error_return); + GF_OPTION_INIT ("brick-count", numbricks, int32, error_return); + + ret = br_rate_limit_signer (this, priv->child_count, numbricks); + if (ret) + goto error_return; + + ret = br_init_signer (this, priv); + if (ret) + goto cleanup_tbf; + + return 0; + + cleanup_tbf: + /* cleanup TBF */ + error_return: + return -1; + +} + int32_t init (xlator_t *this) { @@ -1410,7 +1488,6 @@ init (xlator_t *this) } GF_OPTION_INIT ("scrubber", priv->iamscrubber, bool, out); - GF_OPTION_INIT ("expiry-time", priv->expiry_time, int32, out); priv->child_count = xlator_subvolume_count (this); priv->children = GF_CALLOC (priv->child_count, sizeof (*priv->children), @@ -1443,18 +1520,19 @@ init (xlator_t *this) INIT_LIST_HEAD (&priv->children[i].list); INIT_LIST_HEAD (&priv->bricks); - ret = br_init_rate_limiter (priv); - if (ret) - goto cleanup_mutex; - this->private = priv; if (!priv->iamscrubber) { - ret = br_init_signer (this, priv); - if (ret) - goto cleanup_tbf; + ret = br_signer_init (this, priv); + } else { + ret = br_scrubber_init (this, priv); + if (!ret) + ret = br_scrubber_handle_options (this, priv, NULL); } + if (ret) + goto cleanup_mutex; + ret = gf_thread_create (&priv->thread, NULL, br_handle_events, this); if (ret != 0) { gf_log (this->name, GF_LOG_ERROR, @@ -1469,7 +1547,6 @@ init (xlator_t *this) return 0; } - cleanup_tbf: cleanup_mutex: (void) pthread_cond_destroy (&priv->cond); (void) pthread_mutex_destroy (&priv->lock); @@ -1505,6 +1582,17 @@ fini (xlator_t *this) return; } +int +reconfigure (xlator_t *this, dict_t *options) +{ + br_private_t *priv = this->private; + + if (!priv->iamscrubber) + return 0; + + return br_scrubber_handle_options (this, priv, options); +} + struct xlator_fops fops; struct xlator_cbks cbks; |