summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2017-03-22 17:14:25 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-08-11 18:06:40 +0000
commitc7101426c64db6723752c55f116dcbfbaf84a668 (patch)
treeee29bead8a1164893366f1eb19df2d0928c6477f
parent082868b81a0f4c9ed27f71b48708b2ddfd379150 (diff)
cluster/dht: Make rebalance throttle option tuned by number
Current rebalance throttle options: lazy/normal/aggressive may not always be sufficient for the purpose of throttling. In our recent test, we observed for certain setups, normal and aggressive modes behaved similarly consuming full disk bandwidth. So in cases like this admin should be able to tune it down(or vice versa) depending on the need. Along with old throttle configurations, thread counts are tuned based on number. e.g. gluster v set vol-name cluster-rebal.throttle 5. Admin can tune up/down between 0 and the number of cores available. Note: For heterogenous servers, validation will fail on the old server if "number" is given for throttle configuration. The message looks something like this: "volume set: failed: Staging failed on vm2. Error: cluster.rebal-throttle should be {lazy|normal|aggressive}" Test: Manual test by logging active thread number after reconfiguring throttle option. testcase: tests/basic/distribute/throttle-rebal.t > Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3 > BUG: 1438370 > Signed-off-by: Susant Palai <spalai@redhat.com> > Reviewed-on: https://review.gluster.org/16980 > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > Smoke: Gluster Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Atin Mukherjee <amukherj@redhat.com> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> > Signed-off-by: Susant Palai <spalai@redhat.com> Change-Id: I46e3cde546900307831028b344ecf601fd9b02c3 BUG: 1473136 Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: https://review.gluster.org/17835 CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
-rw-r--r--tests/basic/distribute/throttle-rebal.t14
-rw-r--r--xlators/cluster/dht/src/dht-common.h3
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c25
-rw-r--r--xlators/cluster/dht/src/dht-shared.c108
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c32
5 files changed, 152 insertions, 30 deletions
diff --git a/tests/basic/distribute/throttle-rebal.t b/tests/basic/distribute/throttle-rebal.t
index 89495aee71b..f4823cf4f21 100644
--- a/tests/basic/distribute/throttle-rebal.t
+++ b/tests/basic/distribute/throttle-rebal.t
@@ -16,6 +16,11 @@ function set_throttle {
$CLI volume set $V0 cluster.rebal-throttle $level 2>&1 |grep -oE 'success|failed'
}
+#Determine number of cores
+cores=$(cat /proc/cpuinfo | grep processor | wc -l)
+if [ "$cores" == "" ]; then
+ echo "Could not get number of cores available"
+fi
THROTTLE_LEVEL="lazy"
EXPECT "success" set_throttle $THROTTLE_LEVEL
@@ -36,6 +41,15 @@ EXPECT "failed" set_throttle $THROTTLE_LEVEL
#check if throttle-level is still aggressive
EXPECT "aggressive" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+EXPECT "success" set_throttle $cores
+
+#Setting thorttle number to be more than the number of cores should fail
+THORTTLE_LEVEL=$((cores+1))
+TEST echo $THORTTLE_LEVEL
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+EXPECT "$cores" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
TEST $CLI volume stop $V0;
TEST $CLI volume delete $V0;
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 9aae11f2d0f..6f0dd774181 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -33,6 +33,7 @@
#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
#define TIERING_MIGRATION_KEY "tiering.migration"
#define DHT_LAYOUT_HASH_INVALID 1
+#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
#define DHT_DIR_STAT_BLOCKS 8
#define DHT_DIR_STAT_SIZE 4096
@@ -581,7 +582,7 @@ struct dht_conf {
/* Support size-weighted rebalancing (heterogeneous bricks). */
gf_boolean_t do_weighting;
gf_boolean_t randomize_by_gfid;
- char *dthrottle;
+ int dthrottle;
dht_methods_t methods;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 27410897d34..9d6858cb535 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2764,7 +2764,7 @@ gf_defrag_task (void *opaque)
defrag->current_thread_count--;
gf_log ("DHT", GF_LOG_INFO,
"Thread sleeping. "
- "defrag->current_thread_count: %d",
+ "current thread count: %d",
defrag->current_thread_count);
pthread_cond_wait (
@@ -2772,11 +2772,11 @@ gf_defrag_task (void *opaque)
&defrag->dfq_mutex);
defrag->current_thread_count++;
-
gf_log ("DHT", GF_LOG_INFO,
"Thread wokeup. "
- "defrag->current_thread_count: %d",
+ "current thread count: %d",
defrag->current_thread_count);
+
}
if (defrag->q_entry_count) {
@@ -2829,6 +2829,14 @@ gf_defrag_task (void *opaque)
finished */
if (!defrag->crawl_done) {
+
+ defrag->current_thread_count--;
+ gf_log ("DHT", GF_LOG_INFO, "Thread "
+ " sleeping while waiting for "
+ "migration entries. current "
+ "thread count :%d",
+ defrag->current_thread_count);
+
pthread_cond_wait (
&defrag->parallel_migration_cond,
&defrag->dfq_mutex);
@@ -2836,10 +2844,19 @@ gf_defrag_task (void *opaque)
if (defrag->crawl_done &&
!defrag->q_entry_count) {
+ defrag->current_thread_count++;
+ gf_msg_debug ("DHT", 0, "Exiting thread");
+
pthread_cond_broadcast (
&defrag->parallel_migration_cond);
goto unlock;
} else {
+ defrag->current_thread_count++;
+ gf_msg_debug ("DHT", 0, "Thread woke up"
+ " as found migrating entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
pthread_mutex_unlock
(&defrag->dfq_mutex);
continue;
@@ -4282,7 +4299,7 @@ gf_defrag_start_crawl (void *data)
INIT_LIST_HEAD (&(defrag->queue[0].list));
- thread_spawn_count = MAX ((sysconf(_SC_NPROCESSORS_ONLN) - 4), 4);
+ thread_spawn_count = MAX (MAX_REBAL_THREADS, 4);
gf_msg_debug (this->name, 0, "thread_spawn_count: %d",
thread_spawn_count);
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index f13762e34fb..9c3eb16869c 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -400,7 +400,7 @@ dht_reconfigure (xlator_t *this, dict_t *options)
char *temp_str = NULL;
gf_boolean_t search_unhashed;
int ret = -1;
- int throttle_count = 0;
+ int rebal_thread_count = 0;
GF_VALIDATE_OR_GOTO ("dht", this, out);
GF_VALIDATE_OR_GOTO ("dht", options, out);
@@ -456,22 +456,54 @@ dht_reconfigure (xlator_t *this, dict_t *options)
conf->randomize_by_gfid,
options, bool, out);
- GF_OPTION_RECONF ("rebal-throttle", conf->dthrottle, options,
- str, out);
-
GF_OPTION_RECONF ("lock-migration", conf->lock_migration_enabled,
options, bool, out);
if (conf->defrag) {
+ pthread_mutex_lock (&conf->defrag->dfq_mutex);
+ {
+ if (dict_get_str (options, "rebal-throttle", &temp_str) == 0) {
+ if (!strcasecmp (temp_str, "lazy")) {
+ conf->defrag->recon_thread_count = 1;
+ } else if (!strcasecmp (temp_str, "normal")) {
+ conf->defrag->recon_thread_count = 3;
+ } else if (!strcasecmp (temp_str, "aggressive")) {
+ conf->defrag->recon_thread_count = MAX ((MAX_REBAL_THREADS - 4), 4);
+ } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
+ if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
+ gf_msg_debug (this->name, 0, "rebal throttle count reconfigured to %d", rebal_thread_count);
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be "
+ "within range of 0 and maximum number of"
+ " cores available");
+ ret = -1;
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be {lazy|normal|aggressive}"
+ " or a number upto number of cores available,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->dthrottle);
+ ret = -1;
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ goto out;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ }
+
+ if (conf->defrag) {
conf->defrag->lock_migration_enabled =
conf->lock_migration_enabled;
-
- GF_DECIDE_DEFRAG_THROTTLE_COUNT (throttle_count, conf);
- gf_msg ("DHT", GF_LOG_INFO, 0,
- DHT_MSG_REBAL_THROTTLE_INFO,
- "conf->dthrottle: %s, "
- "conf->defrag->recon_thread_count: %d",
- conf->dthrottle, conf->defrag->recon_thread_count);
}
if (conf->defrag) {
@@ -608,8 +640,8 @@ dht_init (xlator_t *this)
gf_defrag_info_t *defrag = NULL;
int cmd = 0;
char *node_uuid = NULL;
- int throttle_count = 0;
uint32_t commit_hash = 0;
+ int rebal_thread_count = 0;
GF_VALIDATE_OR_GOTO ("dht", this, err);
@@ -809,15 +841,49 @@ dht_init (xlator_t *this)
conf->randomize_by_gfid, bool, err);
if (defrag) {
- GF_OPTION_INIT ("rebal-throttle",
- conf->dthrottle, str, err);
-
- GF_DECIDE_DEFRAG_THROTTLE_COUNT(throttle_count, conf);
-
- gf_msg_debug ("DHT", 0, "conf->dthrottle: %s, "
- "conf->defrag->recon_thread_count: %d",
- conf->dthrottle,
- conf->defrag->recon_thread_count);
+ GF_OPTION_INIT ("rebal-throttle", temp_str, str, err);
+ if (temp_str) {
+
+ pthread_mutex_lock (&conf->defrag->dfq_mutex);
+ {
+ if (!strcasecmp (temp_str, "lazy")) {
+ conf->defrag->recon_thread_count = 1;
+ } else if (!strcasecmp (temp_str, "normal")) {
+ conf->defrag->recon_thread_count = 2;
+ } else if (!strcasecmp (temp_str, "aggressive")) {
+ conf->defrag->recon_thread_count = MAX (MAX_REBAL_THREADS - 4, 4);
+ } else if ((gf_string2int (temp_str, &rebal_thread_count) == 0)) {
+ if ((rebal_thread_count > 0) && (rebal_thread_count <= MAX_REBAL_THREADS)) {
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+ "rebal thread count configured to %d",
+ rebal_thread_count);
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be "
+ "within range of 0 and maximum number of"
+ " cores available");
+ ret = -1;
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ goto err;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be {lazy|normal|aggressive}"
+ " or a number upto number of cores available,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->dthrottle);
+ ret = -1;
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ goto err;
+ }
+ }
+ pthread_mutex_unlock (&conf->defrag->dfq_mutex);
+ }
}
GF_OPTION_INIT ("xattr-name", conf->xattr_name, str, err);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index dac9d4eabfd..6bf07233a70 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -547,21 +547,45 @@ static int
validate_defrag_throttle_option (glusterd_volinfo_t *volinfo, dict_t *dict,
char *key, char *value, char **op_errstr)
{
- char errstr[2048] = "";
- int ret = 0;
- xlator_t *this = NULL;
+ char errstr[2048] = "";
+ int ret = 0;
+ xlator_t *this = NULL;
+ int thread_count = 0;
+ long int cores_available = 0;
this = THIS;
GF_ASSERT (this);
+ cores_available = sysconf(_SC_NPROCESSORS_ONLN);
+
+ /* Throttle option should be one of lazy|normal|aggressive or a number
+ * configured by user max up to the number of cores in the machine */
+
if (!strcasecmp (value, "lazy") ||
!strcasecmp (value, "normal") ||
!strcasecmp (value, "aggressive")) {
ret = 0;
+ } else if ((gf_string2int (value, &thread_count) == 0)) {
+ if ((thread_count > 0) && (thread_count <= cores_available)) {
+ ret = 0;
+ } else {
+ ret = -1;
+ snprintf (errstr, sizeof (errstr), "%s should be within"
+ " range of 0 and maximum number of cores "
+ "available (cores available - %ld)", key,
+ cores_available);
+
+ gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", errstr);
+
+ *op_errstr = gf_strdup (errstr);
+ }
} else {
ret = -1;
snprintf (errstr, sizeof (errstr), "%s should be "
- "{lazy|normal|aggressive}", key);
+ "{lazy|normal|aggressive} or a number upto number of"
+ " cores available (cores availble - %ld)", key,
+ cores_available);
gf_msg (this->name, GF_LOG_ERROR, EINVAL,
GD_MSG_INVALID_ENTRY, "%s", errstr);
*op_errstr = gf_strdup (errstr);