summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/bugs/glusterd/bug-1699339.t69
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c269
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c55
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h10
6 files changed, 359 insertions, 48 deletions
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
new file mode 100644
index 00000000000..3e950f48432
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1699339.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+NUM_VOLS=15
+
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
+ TEST $CLI_1 volume start $vol_name
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+
+# The option accepts the value in the range from 5 to 200
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
+
+TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=28
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+ create_volume $i
+done
+
+TEST kill_glusterd 1
+
+vol1=$(printf "%s-vol%02d" $V0 1)
+TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
+vol2=$(printf "%s-vol%02d" $V0 2)
+TEST $CLI_2 volume set $vol2 performance.readdir-ahead on
+
+# Bring back 1st glusterd
+TEST $glusterd_1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
+
+vol_name=$(printf "%s-vol%02d" $V0 2)
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index a2cb4024f11..b51892a6e0e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {
* TBD: Discuss the default value for this. Maybe this should be a
* dynamic value depending on the memory specifications per node */
{GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
+ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},
{GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},
{GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
{NULL},
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
index cf17fcbff16..71f1c78622f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
goto out;
}
+ if (!snap_count)
+ goto out;
+
for (i = 1; i <= snap_count; i++) {
/* Compare one snapshot from peer_data at a time */
ret = glusterd_compare_snap(peer_data, i, peername, peerid);
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 50534852d8d..e50db060aa1 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -152,6 +152,47 @@ out:
return ret;
}
+int
+get_gd_vol_thread_limit(int *thread_limit)
+{
+ char *value = NULL;
+ int ret = -1;
+ int vol_per_thread_limit = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (!is_brick_mx_enabled()) {
+ vol_per_thread_limit = 1;
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD,
+ SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value);
+ if (ret) {
+ value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE;
+ }
+ ret = gf_string2int(value, &vol_per_thread_limit);
+ if (ret)
+ goto out;
+
+out:
+ *thread_limit = vol_per_thread_limit;
+
+ gf_msg_debug("glusterd", 0,
+ "Per Thread volume limit set to %d glusterd to populate dict "
+ "data parallel",
+ *thread_limit);
+
+ return ret;
+}
+
extern struct volopt_map_entry glusterd_volopt_map[];
extern glusterd_all_vol_opts valid_all_vol_opts[];
@@ -2991,50 +3032,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
/* tiering related variables */
- snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
- if (ret)
- goto out;
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count);
+ ret = dict_set_uint32(dict, key,
+ volinfo->tier_info.cold_disperse_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count);
+ ret = dict_set_uint32(dict, key,
+ volinfo->tier_info.cold_redundancy_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count);
+ ret = dict_set_uint32(dict, key,
+ volinfo->tier_info.cold_dist_leaf_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type);
+ if (ret)
+ goto out;
- snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
- ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count);
+ ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count);
+ if (ret)
+ goto out;
+ }
snprintf(key, sizeof(key), "%s%d", prefix, count);
ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo);
@@ -3284,33 +3330,40 @@ out:
return ret;
}
-int32_t
-glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+void *
+glusterd_add_bulk_volumes_create_thread(void *data)
{
int32_t ret = -1;
- dict_t *dict = NULL;
glusterd_conf_t *priv = NULL;
glusterd_volinfo_t *volinfo = NULL;
int32_t count = 0;
- glusterd_dict_ctx_t ctx = {0};
xlator_t *this = NULL;
+ glusterd_add_dict_args_t *arg = NULL;
+ dict_t *dict = NULL;
+ int start = 0;
+ int end = 0;
- this = THIS;
- GF_ASSERT(this);
+ GF_ASSERT(data);
+
+ arg = data;
+ dict = arg->voldict;
+ start = arg->start;
+ end = arg->end;
+ this = arg->this;
+ THIS = arg->this;
priv = this->private;
GF_ASSERT(priv);
- dict = dict_new();
- if (!dict)
- goto out;
-
cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
{
count++;
+ if ((count < start) || (count > end))
+ continue;
+
ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
if (ret)
goto out;
- if (!glusterd_is_volume_quota_enabled(volinfo))
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
continue;
ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
"volume");
@@ -3318,7 +3371,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)
goto out;
}
- ret = dict_set_int32n(dict, "count", SLEN("count"), count);
+out:
+ GF_ATOMIC_DEC(priv->thread_count);
+ free(arg);
+ return NULL;
+}
+
+int32_t
+glusterd_add_volumes_to_export_dict(dict_t **peer_data)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ dict_t *dict_arr[128] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t count = 0;
+ glusterd_dict_ctx_t ctx = {0};
+ xlator_t *this = NULL;
+ int totthread = 0;
+ int volcnt = 0;
+ int start = 1;
+ int endindex = 0;
+ int vol_per_thread_limit = 0;
+ glusterd_add_dict_args_t *arg = NULL;
+ pthread_t th_id = {
+ 0,
+ };
+ int th_ret = 0;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ /* Count the total number of volumes */
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++;
+
+ get_gd_vol_thread_limit(&vol_per_thread_limit);
+
+ if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) {
+ totthread = 0;
+ } else {
+ totthread = volcnt / vol_per_thread_limit;
+ endindex = volcnt % vol_per_thread_limit;
+ if (endindex)
+ totthread++;
+ }
+
+ if (totthread == 0) {
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ count++;
+ ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");
+ if (ret)
+ goto out;
+
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+ continue;
+
+ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,
+ "volume");
+ if (ret)
+ goto out;
+ }
+ } else {
+ for (i = 0; i < totthread; i++) {
+ arg = calloc(1, sizeof(*arg));
+ dict_arr[i] = dict_new();
+ arg->this = this;
+ arg->voldict = dict_arr[i];
+ arg->start = start;
+ if (!endindex) {
+ arg->end = ((i + 1) * vol_per_thread_limit);
+ } else {
+ arg->end = (start + endindex);
+ }
+ th_ret = gf_thread_create_detached(
+ &th_id, glusterd_add_bulk_volumes_create_thread, arg,
+ "bulkvoldict");
+ if (th_ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "glusterd_add_bulk_volume %s"
+ " thread creation failed",
+ "bulkvoldict");
+ free(arg);
+ goto out;
+ }
+
+ start = start + vol_per_thread_limit;
+ GF_ATOMIC_INC(priv->thread_count);
+ gf_log(this->name, GF_LOG_INFO,
+ "Create thread %d to populate dict data for volume"
+ " start index is %d end index is %d",
+ (i + 1), arg->start, arg->end);
+ }
+ while (GF_ATOMIC_GET(priv->thread_count)) {
+ sleep(1);
+ }
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Finished dictionary popluation in all threads");
+ for (i = 0; i < totthread; i++) {
+ dict_copy_with_ref(dict_arr[i], dict);
+ dict_unref(dict_arr[i]);
+ }
+ gf_log(this->name, GF_LOG_INFO,
+ "Finished merger of all dictionraies into single one");
+ }
+
+ ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);
if (ret)
goto out;
@@ -3420,6 +3588,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
goto out;
}
+ if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))
+ goto skip_quota;
+
snprintf(key, sizeof(key), "volume%d.quota-version", count);
ret = dict_get_uint32(peer_data, key, &quota_version);
if (ret) {
@@ -3471,6 +3642,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,
goto out;
}
}
+
+skip_quota:
*status = GLUSTERD_VOL_COMP_SCS;
out:
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 5120838a70a..4cdbbbec250 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -590,6 +590,51 @@ out:
}
static int
+validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char *value, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ uint val = 0;
+ int ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ if (!is_brick_mx_enabled()) {
+ gf_asprintf(op_errstr,
+ "Brick-multiplexing is not enabled. "
+ "Please enable brick multiplexing before trying "
+ "to set this option.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ ret = gf_string2uint(value, &val);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "%s is not a valid count. "
+ "%s expects an unsigned integer.",
+ value, key);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
+ *op_errstr);
+ }
+
+ if ((val < 5) || (val > 200)) {
+ gf_asprintf(
+ op_errstr,
+ "Please set this option to a greater than 5 or less than 200 "
+ "to optimize dict generated while no. of volumes are more");
+ ret = -1;
+ goto out;
+ }
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
char *value, char **op_errstr)
{
@@ -2785,6 +2830,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {
"brick multiplexing. Brick multiplexing ensures that "
"compatible brick instances can share one single "
"brick process."},
+ {.key = GLUSTERD_VOL_CNT_PER_THRD,
+ .voltype = "mgmt/glusterd",
+ .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE,
+ .op_version = 700000,
+ .validate_fn = validate_volume_per_thread_limit,
+ .type = GLOBAL_NO_DOC,
+ .description =
+ "This option can be used to limit the number of volumes "
+ "handled by per thread to populate peer data.The option accepts "
+ " the value in the range of 5 to 200"},
{.key = GLUSTERD_BRICKMUX_LIMIT_KEY,
.voltype = "mgmt/glusterd",
.value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE,
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index eb89e3be93e..5b9fed6c80d 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -57,8 +57,10 @@
#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
#define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"
#define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex"
+#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread"
#define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"
#define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250"
+#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100"
#define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging"
#define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level"
@@ -222,8 +224,16 @@ typedef struct {
which might lead the modification of volinfo
list.
*/
+ gf_atomic_t thread_count;
} glusterd_conf_t;
+typedef struct glusterd_add_dict_args {
+ xlator_t *this;
+ dict_t *voldict;
+ int start;
+ int end;
+} glusterd_add_dict_args_t;
+
typedef enum gf_brick_status {
GF_BRICK_STOPPED,
GF_BRICK_STARTED,