diff options
-rw-r--r-- | tests/bugs/glusterd/bug-1699339.t | 69 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 269 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 10 |
6 files changed, 359 insertions, 48 deletions
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t new file mode 100644 index 00000000000..3e950f48432 --- /dev/null +++ b/tests/bugs/glusterd/bug-1699339.t @@ -0,0 +1,69 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + +cleanup; + +NUM_VOLS=15 + + +get_brick_base () { + printf "%s/vol%02d" $B0 $1 +} + +function count_up_bricks { + vol=$1; + $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l +} + +create_volume () { + + local vol_name=$(printf "%s-vol%02d" $V0 $1) + + TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name} + TEST $CLI_1 volume start $vol_name +} + +TEST launch_cluster 3 +TEST $CLI_1 volume set all cluster.brick-multiplex on + +# The option accepts the value in the range from 5 to 200 +TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210 +TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4 + +TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5 + +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +TEST $CLI_1 peer probe $H3; +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +# Our infrastructure can't handle an arithmetic expression here. The formula +# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other +# NUM_VOLS-1 and there are 5 such statements in each iteration. +TESTS_EXPECTED_IN_LOOP=28 +for i in $(seq 1 $NUM_VOLS); do + starttime="$(date +%s)"; + create_volume $i +done + +TEST kill_glusterd 1 + +vol1=$(printf "%s-vol%02d" $V0 1) +TEST $CLI_2 volume set $vol1 performance.readdir-ahead on +vol2=$(printf "%s-vol%02d" $V0 2) +TEST $CLI_2 volume set $vol2 performance.readdir-ahead on + +# Bring back 1st glusterd +TEST $glusterd_1 +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead + +vol_name=$(printf "%s-vol%02d" $V0 2) +EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead + +cleanup diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index a2cb4024f11..b51892a6e0e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = { * TBD: Discuss the default value for this. Maybe this should be a * dynamic value depending on the memory specifications per node */ {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, + {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE}, {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"}, {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"}, {NULL}, diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index cf17fcbff16..71f1c78622f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername, goto out; } + if (!snap_count) + goto out; + for (i = 1; i <= snap_count; i++) { /* Compare one snapshot from peer_data at a time */ ret = glusterd_compare_snap(peer_data, i, peername, peerid); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 50534852d8d..e50db060aa1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -152,6 +152,47 @@ out: return ret; } +int +get_gd_vol_thread_limit(int *thread_limit) +{ + char *value = NULL; + int ret = -1; + int vol_per_thread_limit = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + if (!is_brick_mx_enabled()) { + vol_per_thread_limit = 1; + ret = 0; + goto out; + } + + ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD, + SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value); + if (ret) { + value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE; + } + ret = gf_string2int(value, &vol_per_thread_limit); + if (ret) + goto out; + +out: + *thread_limit = vol_per_thread_limit; + + gf_msg_debug("glusterd", 0, + "Per Thread volume limit set to %d glusterd to populate dict " + "data parallel", + *thread_limit); + + return ret; +} + extern struct volopt_map_entry glusterd_volopt_map[]; extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -2991,50 +3032,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict, /* tiering related variables */ - snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); - if (ret) - goto out; + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); + ret = dict_set_uint32(dict, key, + volinfo->tier_info.cold_disperse_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); + ret = dict_set_uint32(dict, key, + volinfo->tier_info.cold_redundancy_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); + ret = dict_set_uint32(dict, key, + volinfo->tier_info.cold_dist_leaf_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); + if (ret) + goto out; - snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); - ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); - if (ret) - goto out; + snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); + ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); + if (ret) + goto out; + } snprintf(key, sizeof(key), "%s%d", prefix, count); ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo); @@ -3284,33 +3330,40 @@ out: return ret; } -int32_t -glusterd_add_volumes_to_export_dict(dict_t **peer_data) +void * +glusterd_add_bulk_volumes_create_thread(void *data) { int32_t ret = -1; - dict_t *dict = NULL; glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; - glusterd_dict_ctx_t ctx = {0}; xlator_t *this = NULL; + glusterd_add_dict_args_t *arg = NULL; + dict_t *dict = NULL; + int start = 0; + int end = 0; - this = THIS; - GF_ASSERT(this); + GF_ASSERT(data); + + arg = data; + dict = arg->voldict; + start = arg->start; + end = arg->end; + this = arg->this; + THIS = arg->this; priv = this->private; GF_ASSERT(priv); - dict = dict_new(); - if (!dict) - goto out; - cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) { count++; + if ((count < start) || (count > end)) + continue; + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); if (ret) goto out; - if (!glusterd_is_volume_quota_enabled(volinfo)) + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) continue; ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, "volume"); @@ -3318,7 +3371,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data) goto out; } - ret = dict_set_int32n(dict, "count", SLEN("count"), count); +out: + GF_ATOMIC_DEC(priv->thread_count); + free(arg); + return NULL; +} + +int32_t +glusterd_add_volumes_to_export_dict(dict_t **peer_data) +{ + int32_t ret = -1; + dict_t *dict = NULL; + dict_t *dict_arr[128] = { + 0, + }; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; + xlator_t *this = NULL; + int totthread = 0; + int volcnt = 0; + int start = 1; + int endindex = 0; + int vol_per_thread_limit = 0; + glusterd_add_dict_args_t *arg = NULL; + pthread_t th_id = { + 0, + }; + int th_ret = 0; + int i = 0; + + this = THIS; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + dict = dict_new(); + if (!dict) + goto out; + + /* Count the total number of volumes */ + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; + + get_gd_vol_thread_limit(&vol_per_thread_limit); + + if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) { + totthread = 0; + } else { + totthread = volcnt / vol_per_thread_limit; + endindex = volcnt % vol_per_thread_limit; + if (endindex) + totthread++; + } + + if (totthread == 0) { + cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) + { + count++; + ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); + if (ret) + goto out; + + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + continue; + + ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, + "volume"); + if (ret) + goto out; + } + } else { + for (i = 0; i < totthread; i++) { + arg = calloc(1, sizeof(*arg)); + dict_arr[i] = dict_new(); + arg->this = this; + arg->voldict = dict_arr[i]; + arg->start = start; + if (!endindex) { + arg->end = ((i + 1) * vol_per_thread_limit); + } else { + arg->end = (start + endindex); + } + th_ret = gf_thread_create_detached( + &th_id, glusterd_add_bulk_volumes_create_thread, arg, + "bulkvoldict"); + if (th_ret) { + gf_log(this->name, GF_LOG_ERROR, + "glusterd_add_bulk_volume %s" + " thread creation failed", + "bulkvoldict"); + free(arg); + goto out; + } + + start = start + vol_per_thread_limit; + GF_ATOMIC_INC(priv->thread_count); + gf_log(this->name, GF_LOG_INFO, + "Create thread %d to populate dict data for volume" + " start index is %d end index is %d", + (i + 1), arg->start, arg->end); + } + while (GF_ATOMIC_GET(priv->thread_count)) { + sleep(1); + } + + gf_log(this->name, GF_LOG_INFO, + "Finished dictionary popluation in all threads"); + for (i = 0; i < totthread; i++) { + dict_copy_with_ref(dict_arr[i], dict); + dict_unref(dict_arr[i]); + } + gf_log(this->name, GF_LOG_INFO, + "Finished merger of all dictionraies into single one"); + } + + ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt); if (ret) goto out; @@ -3420,6 +3588,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, goto out; } + if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) + goto skip_quota; + snprintf(key, sizeof(key), "volume%d.quota-version", count); ret = dict_get_uint32(peer_data, key, "a_version); if (ret) { @@ -3471,6 +3642,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count, goto out; } } + +skip_quota: *status = GLUSTERD_VOL_COMP_SCS; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 5120838a70a..4cdbbbec250 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -590,6 +590,51 @@ out: } static int +validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict, + char *key, char *value, char **op_errstr) +{ + xlator_t *this = NULL; + uint val = 0; + int ret = -1; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + + if (!is_brick_mx_enabled()) { + gf_asprintf(op_errstr, + "Brick-multiplexing is not enabled. " + "Please enable brick multiplexing before trying " + "to set this option."); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", + *op_errstr); + goto out; + } + + ret = gf_string2uint(value, &val); + if (ret) { + gf_asprintf(op_errstr, + "%s is not a valid count. " + "%s expects an unsigned integer.", + value, key); + gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", + *op_errstr); + } + + if ((val < 5) || (val > 200)) { + gf_asprintf( + op_errstr, + "Please set this option to a greater than 5 or less than 200 " + "to optimize dict generated while no. of volumes are more"); + ret = -1; + goto out; + } +out: + gf_msg_debug("glusterd", 0, "Returning %d", ret); + + return ret; +} + +static int validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, char **op_errstr) { @@ -2785,6 +2830,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { "brick multiplexing. Brick multiplexing ensures that " "compatible brick instances can share one single " "brick process."}, + {.key = GLUSTERD_VOL_CNT_PER_THRD, + .voltype = "mgmt/glusterd", + .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE, + .op_version = 700000, + .validate_fn = validate_volume_per_thread_limit, + .type = GLOBAL_NO_DOC, + .description = + "This option can be used to limit the number of volumes " + "handled by per thread to populate peer data.The option accepts " + " the value in the range of 5 to 200"}, {.key = GLUSTERD_BRICKMUX_LIMIT_KEY, .voltype = "mgmt/glusterd", .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index eb89e3be93e..5b9fed6c80d 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -57,8 +57,10 @@ #define GLUSTER_SHARED_STORAGE "gluster_shared_storage" #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage" #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread" #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process" #define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" +#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100" #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging" #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" @@ -222,8 +224,16 @@ typedef struct { which might lead the modification of volinfo list. */ + gf_atomic_t thread_count; } glusterd_conf_t; +typedef struct glusterd_add_dict_args { + xlator_t *this; + dict_t *voldict; + int start; + int end; +} glusterd_add_dict_args_t; + typedef enum gf_brick_status { GF_BRICK_STOPPED, GF_BRICK_STARTED, |