diff options
| -rw-r--r-- | tests/bugs/glusterd/bug-1699339.t | 69 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 269 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 55 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 10 | 
6 files changed, 359 insertions, 48 deletions
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t new file mode 100644 index 00000000000..3e950f48432 --- /dev/null +++ b/tests/bugs/glusterd/bug-1699339.t @@ -0,0 +1,69 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + +cleanup; + +NUM_VOLS=15 + + +get_brick_base () { +	printf "%s/vol%02d" $B0 $1 +} + +function count_up_bricks { +        vol=$1; +        $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l +} + +create_volume () { + +	local vol_name=$(printf "%s-vol%02d" $V0 $1) + +        TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name} +	TEST $CLI_1 volume start $vol_name +} + +TEST launch_cluster 3 +TEST $CLI_1 volume set all cluster.brick-multiplex on + +# The option accepts the value in the range from 5 to 200 +TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210 +TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4 + +TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5 + +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count + +TEST $CLI_1 peer probe $H3; +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +# Our infrastructure can't handle an arithmetic expression here.  The formula +# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other +# NUM_VOLS-1 and there are 5 such statements in each iteration. +TESTS_EXPECTED_IN_LOOP=28 +for i in $(seq 1 $NUM_VOLS); do +        starttime="$(date +%s)"; +	create_volume $i +done + +TEST kill_glusterd 1 + +vol1=$(printf "%s-vol%02d" $V0 1) +TEST $CLI_2 volume set $vol1 performance.readdir-ahead on +vol2=$(printf "%s-vol%02d" $V0 2) +TEST $CLI_2 volume set $vol2 performance.readdir-ahead on + +# Bring back 1st glusterd +TEST $glusterd_1 +EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count + +EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead + +vol_name=$(printf "%s-vol%02d" $V0 2) +EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol2 performance.readdir-ahead + +cleanup diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index a2cb4024f11..b51892a6e0e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -87,6 +87,7 @@ glusterd_all_vol_opts valid_all_vol_opts[] = {       * TBD: Discuss the default value for this. Maybe this should be a       * dynamic value depending on the memory specifications per node */      {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE}, +    {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},      {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},      {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},      {NULL}, diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index cf17fcbff16..71f1c78622f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -2099,6 +2099,9 @@ glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,          goto out;      } +    if (!snap_count) +        goto out; +      for (i = 1; i <= snap_count; i++) {          /* Compare one snapshot from peer_data at a time */          ret = glusterd_compare_snap(peer_data, i, peername, peerid); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 50534852d8d..e50db060aa1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -152,6 +152,47 @@ out:      return ret;  } +int +get_gd_vol_thread_limit(int *thread_limit) +{ +    char *value = NULL; +    int ret = -1; +    int vol_per_thread_limit = 0; +    xlator_t *this = NULL; +    glusterd_conf_t *priv = NULL; + +    this = THIS; +    GF_VALIDATE_OR_GOTO("glusterd", this, out); + +    priv = this->private; +    GF_VALIDATE_OR_GOTO(this->name, priv, out); + +    if (!is_brick_mx_enabled()) { +        vol_per_thread_limit = 1; +        ret = 0; +        goto out; +    } + +    ret = dict_get_strn(priv->opts, GLUSTERD_VOL_CNT_PER_THRD, +                        SLEN(GLUSTERD_VOL_CNT_PER_THRD), &value); +    if (ret) { +        value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE; +    } +    ret = gf_string2int(value, &vol_per_thread_limit); +    if (ret) +        goto out; + +out: +    *thread_limit = vol_per_thread_limit; + +    gf_msg_debug("glusterd", 0, +                 "Per Thread volume limit set to %d glusterd to populate dict " +                 "data parallel", +                 *thread_limit); + +    return ret; +} +  extern struct volopt_map_entry glusterd_volopt_map[];  extern glusterd_all_vol_opts valid_all_vol_opts[]; @@ -2991,50 +3032,55 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,      /* tiering related variables */ -    snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); -    if (ret) -        goto out; +    if (volinfo->type == GF_CLUSTER_TYPE_TIER) { +        snprintf(key, sizeof(key), "%s%d.cold_brick_count", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_brick_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.cold_type", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_type); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.cold_replica_count", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_replica_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_disperse_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.cold_disperse_count", prefix, count); +        ret = dict_set_uint32(dict, key, +                              volinfo->tier_info.cold_disperse_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_redundancy_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.cold_redundancy_count", prefix, count); +        ret = dict_set_uint32(dict, key, +                              volinfo->tier_info.cold_redundancy_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.cold_dist_leaf_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.cold_dist_count", prefix, count); +        ret = dict_set_uint32(dict, key, +                              volinfo->tier_info.cold_dist_leaf_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.hot_brick_count", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_brick_count); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.hot_type", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_type); +        if (ret) +            goto out; -    snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); -    ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); -    if (ret) -        goto out; +        snprintf(key, sizeof(key), "%s%d.hot_replica_count", prefix, count); +        ret = dict_set_uint32(dict, key, volinfo->tier_info.hot_replica_count); +        if (ret) +            goto out; +    }      snprintf(key, sizeof(key), "%s%d", prefix, count);      ret = gd_add_vol_snap_details_to_dict(dict, key, volinfo); @@ -3284,33 +3330,40 @@ out:      return ret;  } -int32_t -glusterd_add_volumes_to_export_dict(dict_t **peer_data) +void * +glusterd_add_bulk_volumes_create_thread(void *data)  {      int32_t ret = -1; -    dict_t *dict = NULL;      glusterd_conf_t *priv = NULL;      glusterd_volinfo_t *volinfo = NULL;      int32_t count = 0; -    glusterd_dict_ctx_t ctx = {0};      xlator_t *this = NULL; +    glusterd_add_dict_args_t *arg = NULL; +    dict_t *dict = NULL; +    int start = 0; +    int end = 0; -    this = THIS; -    GF_ASSERT(this); +    GF_ASSERT(data); + +    arg = data; +    dict = arg->voldict; +    start = arg->start; +    end = arg->end; +    this = arg->this; +    THIS = arg->this;      priv = this->private;      GF_ASSERT(priv); -    dict = dict_new(); -    if (!dict) -        goto out; -      cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)      {          count++; +        if ((count < start) || (count > end)) +            continue; +          ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume");          if (ret)              goto out; -        if (!glusterd_is_volume_quota_enabled(volinfo)) +        if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA))              continue;          ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count,                                                    "volume"); @@ -3318,7 +3371,122 @@ glusterd_add_volumes_to_export_dict(dict_t **peer_data)              goto out;      } -    ret = dict_set_int32n(dict, "count", SLEN("count"), count); +out: +    GF_ATOMIC_DEC(priv->thread_count); +    free(arg); +    return NULL; +} + +int32_t +glusterd_add_volumes_to_export_dict(dict_t **peer_data) +{ +    int32_t ret = -1; +    dict_t *dict = NULL; +    dict_t *dict_arr[128] = { +        0, +    }; +    glusterd_conf_t *priv = NULL; +    glusterd_volinfo_t *volinfo = NULL; +    int32_t count = 0; +    glusterd_dict_ctx_t ctx = {0}; +    xlator_t *this = NULL; +    int totthread = 0; +    int volcnt = 0; +    int start = 1; +    int endindex = 0; +    int vol_per_thread_limit = 0; +    glusterd_add_dict_args_t *arg = NULL; +    pthread_t th_id = { +        0, +    }; +    int th_ret = 0; +    int i = 0; + +    this = THIS; +    GF_ASSERT(this); +    priv = this->private; +    GF_ASSERT(priv); + +    dict = dict_new(); +    if (!dict) +        goto out; + +    /* Count the total number of volumes */ +    cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) volcnt++; + +    get_gd_vol_thread_limit(&vol_per_thread_limit); + +    if ((vol_per_thread_limit == 1) || (vol_per_thread_limit > 100)) { +        totthread = 0; +    } else { +        totthread = volcnt / vol_per_thread_limit; +        endindex = volcnt % vol_per_thread_limit; +        if (endindex) +            totthread++; +    } + +    if (totthread == 0) { +        cds_list_for_each_entry(volinfo, &priv->volumes, vol_list) +        { +            count++; +            ret = glusterd_add_volume_to_dict(volinfo, dict, count, "volume"); +            if (ret) +                goto out; + +            if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) +                continue; + +            ret = glusterd_vol_add_quota_conf_to_dict(volinfo, dict, count, +                                                      "volume"); +            if (ret) +                goto out; +        } +    } else { +        for (i = 0; i < totthread; i++) { +            arg = calloc(1, sizeof(*arg)); +            dict_arr[i] = dict_new(); +            arg->this = this; +            arg->voldict = dict_arr[i]; +            arg->start = start; +            if (!endindex) { +                arg->end = ((i + 1) * vol_per_thread_limit); +            } else { +                arg->end = (start + endindex); +            } +            th_ret = gf_thread_create_detached( +                &th_id, glusterd_add_bulk_volumes_create_thread, arg, +                "bulkvoldict"); +            if (th_ret) { +                gf_log(this->name, GF_LOG_ERROR, +                       "glusterd_add_bulk_volume %s" +                       " thread creation failed", +                       "bulkvoldict"); +                free(arg); +                goto out; +            } + +            start = start + vol_per_thread_limit; +            GF_ATOMIC_INC(priv->thread_count); +            gf_log(this->name, GF_LOG_INFO, +                   "Create thread %d to populate dict data for volume" +                   " start index is %d end index is %d", +                   (i + 1), arg->start, arg->end); +        } +        while (GF_ATOMIC_GET(priv->thread_count)) { +            sleep(1); +        } + +        gf_log(this->name, GF_LOG_INFO, +               "Finished dictionary popluation in all threads"); +        for (i = 0; i < totthread; i++) { +            dict_copy_with_ref(dict_arr[i], dict); +            dict_unref(dict_arr[i]); +        } +        gf_log(this->name, GF_LOG_INFO, +               "Finished merger of all dictionraies into single one"); +    } + +    ret = dict_set_int32n(dict, "count", SLEN("count"), volcnt);      if (ret)          goto out; @@ -3420,6 +3588,9 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,          goto out;      } +    if (!dict_get_sizen(volinfo->dict, VKEY_FEATURES_QUOTA)) +        goto skip_quota; +      snprintf(key, sizeof(key), "volume%d.quota-version", count);      ret = dict_get_uint32(peer_data, key, "a_version);      if (ret) { @@ -3471,6 +3642,8 @@ glusterd_compare_friend_volume(dict_t *peer_data, int32_t count,              goto out;          }      } + +skip_quota:      *status = GLUSTERD_VOL_COMP_SCS;  out: diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 5120838a70a..4cdbbbec250 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -590,6 +590,51 @@ out:  }  static int +validate_volume_per_thread_limit(glusterd_volinfo_t *volinfo, dict_t *dict, +                                 char *key, char *value, char **op_errstr) +{ +    xlator_t *this = NULL; +    uint val = 0; +    int ret = -1; + +    this = THIS; +    GF_VALIDATE_OR_GOTO("glusterd", this, out); + +    if (!is_brick_mx_enabled()) { +        gf_asprintf(op_errstr, +                    "Brick-multiplexing is not enabled. " +                    "Please enable brick multiplexing before trying " +                    "to set this option."); +        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_WRONG_OPTS_SETTING, "%s", +               *op_errstr); +        goto out; +    } + +    ret = gf_string2uint(value, &val); +    if (ret) { +        gf_asprintf(op_errstr, +                    "%s is not a valid count. " +                    "%s expects an unsigned integer.", +                    value, key); +        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s", +               *op_errstr); +    } + +    if ((val < 5) || (val > 200)) { +        gf_asprintf( +            op_errstr, +            "Please set this option to a greater than 5 or less than 200 " +            "to optimize dict generated while no. of volumes are more"); +        ret = -1; +        goto out; +    } +out: +    gf_msg_debug("glusterd", 0, "Returning %d", ret); + +    return ret; +} + +static int  validate_boolean(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,                   char *value, char **op_errstr)  { @@ -2785,6 +2830,16 @@ struct volopt_map_entry glusterd_volopt_map[] = {                      "brick multiplexing. Brick multiplexing ensures that "                      "compatible brick instances can share one single "                      "brick process."}, +    {.key = GLUSTERD_VOL_CNT_PER_THRD, +     .voltype = "mgmt/glusterd", +     .value = GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE, +     .op_version = 700000, +     .validate_fn = validate_volume_per_thread_limit, +     .type = GLOBAL_NO_DOC, +     .description = +         "This option can be used to limit the number of volumes " +         "handled by per thread to populate peer data.The option accepts " +         " the value in the range of 5 to 200"},      {.key = GLUSTERD_BRICKMUX_LIMIT_KEY,       .voltype = "mgmt/glusterd",       .value = GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index eb89e3be93e..5b9fed6c80d 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -57,8 +57,10 @@  #define GLUSTER_SHARED_STORAGE "gluster_shared_storage"  #define GLUSTERD_SHARED_STORAGE_KEY "cluster.enable-shared-storage"  #define GLUSTERD_BRICK_MULTIPLEX_KEY "cluster.brick-multiplex" +#define GLUSTERD_VOL_CNT_PER_THRD "glusterd.vol_count_per_thread"  #define GLUSTERD_BRICKMUX_LIMIT_KEY "cluster.max-bricks-per-process"  #define GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE "250" +#define GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE "100"  #define GLUSTERD_LOCALTIME_LOGGING_KEY "cluster.localtime-logging"  #define GLUSTERD_DAEMON_LOG_LEVEL_KEY "cluster.daemon-log-level" @@ -222,8 +224,16 @@ typedef struct {                                      which might lead the modification of volinfo                                      list.                                   */ +    gf_atomic_t thread_count;  } glusterd_conf_t; +typedef struct glusterd_add_dict_args { +    xlator_t *this; +    dict_t *voldict; +    int start; +    int end; +} glusterd_add_dict_args_t; +  typedef enum gf_brick_status {      GF_BRICK_STOPPED,      GF_BRICK_STARTED,  | 
