diff options
| author | Richard Wareing <rwareing@fb.com> | 2015-10-11 02:02:28 -0700 |
|---|---|---|
| committer | Shreyas Siravara <sshreyas@fb.com> | 2016-12-09 09:06:01 -0800 |
| commit | e52d41938468c2fffd9372a0e47ae3b61c8aa965 (patch) | |
| tree | 616e591520ab8cf497a2f7a5527a3050e947320e | |
| parent | 46254326030296c121c8d3d01dc511f792ea8d1d (diff) | |
performance/io-threads: Eliminate spinlock contention via fops-per-thread-ratio
Summary:
- Background: Frequently spinlock is observed on busy GFS clusters,
which wastes CPU and destroys the performance of the cluster.
Current solutions to this problem involve under-provisioning the thread
pool, but this is problematic as during busy periods there may not be
enough threads to service the queue.
- This patch introduces a technique to avoid the stampeding herd problem with
the io-threads workers. This is done by dynamically tuning the
threads by a ratio of threads to queue depth, there-by keeping
already running threads sufficiently busy by a tunable FOP to thread
ratio. Ratio is controllable by the
performanace.io-threads-fops-per-threads-ratio option.
- More detailed reading on this approach can be found here:
https://h21007.www2.hp.com/portal/download/files/unprot/hpux/MakingConditionVariablesPerform.pdf
- Cherry-pick of D2530504 for 3.8
Test Plan:
- Stress teston my dev server
- shadow testing
Reviewed By: moox, sshreyas
Signed-off-by: Shreyas Siravara <sshreyas@fb.com>
Change-Id: I771ae783aa4ca5a6fd0449db64e07d1f4bff0d04
Reviewed-on: http://review.gluster.org/16080
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Tested-by: Shreyas Siravara <sshreyas@fb.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kevin Vigor <kvigor@fb.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 5 | ||||
| -rw-r--r-- | xlators/performance/io-threads/src/io-threads.c | 39 | ||||
| -rw-r--r-- | xlators/performance/io-threads/src/io-threads.h | 5 |
3 files changed, 44 insertions, 5 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 1e24adabe0c..7517672de8c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1461,6 +1461,11 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "thread-count", .op_version = 1 }, + { .key = "performance.io-thread-fops-per-thread-ratio", + .voltype = "performance/io-threads", + .option = "fops-per-thread-ratio", + .op_version = 1 + }, { .key = "performance.high-prio-threads", .voltype = "performance/io-threads", .op_version = 1 diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index c81a97d8a39..541079e2070 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -202,7 +202,7 @@ iot_worker (void *data) &conf->mutex, &sleep); pthread_mutex_unlock(&conf->mutex); continue; - } + } } pthread_mutex_unlock (&conf->mutex); @@ -228,14 +228,25 @@ int do_iot_schedule (iot_conf_t *conf, call_stub_t *stub, int pri) { int ret = 0; + int active_count = 0; pthread_mutex_lock (&conf->mutex); { __iot_enqueue (conf, stub, pri); - pthread_cond_signal (&conf->cond); - - ret = __iot_workers_scale (conf); + /* If we have an ample supply of threads alive already + * it's massively more efficient to keep the ones you have + * busy vs making new ones and signaling everyone + */ + active_count = conf->curr_count - conf->sleep_count; + if (conf->fops_per_thread_ratio == 0 || active_count == 0 || + (conf->queue_size/active_count > + conf->fops_per_thread_ratio && + active_count < conf->max_count)) { + pthread_cond_signal (&conf->cond); + + ret = __iot_workers_scale (conf); + } } pthread_mutex_unlock (&conf->mutex); @@ -900,6 +911,9 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("thread-count", conf->max_count, options, int32, out); + GF_OPTION_RECONF ("fops-per-thread-ratio", conf->fops_per_thread_ratio, + options, int32, out); + GF_OPTION_RECONF ("high-prio-threads", conf->ac_iot_limit[IOT_PRI_HI], options, int32, out); @@ -972,6 +986,9 @@ init (xlator_t *this) GF_OPTION_INIT ("thread-count", conf->max_count, int32, out); + GF_OPTION_INIT ("fops-per-thread-ratio", conf->fops_per_thread_ratio, + int32, out); + GF_OPTION_INIT ("high-prio-threads", conf->ac_iot_limit[IOT_PRI_HI], int32, out); @@ -1096,6 +1113,20 @@ struct volume_options options[] = { "perform concurrent IO operations" }, + { .key = {"fops-per-thread-ratio"}, + .type = GF_OPTION_TYPE_INT, + .min = IOT_MIN_FOP_PER_THREAD, + .max = IOT_MAX_FOP_PER_THREAD, + .default_value = "20", + .description = "The optimal ratio of threads to FOPs in the queue " + "we wish to achieve before creating a new thread. " + "The idea here is it's far cheaper to keep our " + "currently running threads busy than spin up " + "new threads or cause a stampeding herd of threads " + "to service a singlular FOP when you have a thread " + "which will momentarily become available to do the " + "work." + }, { .key = {"high-prio-threads"}, .type = GF_OPTION_TYPE_INT, .min = IOT_MIN_THREADS, diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index d8eea2cf77a..e5c97f690a2 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -34,7 +34,9 @@ struct iot_conf; #define IOT_MIN_THREADS 1 #define IOT_DEFAULT_THREADS 16 -#define IOT_MAX_THREADS 64 +#define IOT_MAX_THREADS 256 +#define IOT_MIN_FOP_PER_THREAD 0 +#define IOT_MAX_FOP_PER_THREAD 2000 #define IOT_THREAD_STACK_SIZE ((size_t)(1024*1024)) @@ -62,6 +64,7 @@ struct iot_conf { pthread_cond_t cond; int32_t max_count; /* configured maximum */ + int32_t fops_per_thread_ratio; int32_t curr_count; /* actual number of threads running */ int32_t sleep_count; |
