diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2014-05-15 10:35:14 +0200 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-07-11 10:34:24 -0700 |
commit | 1392da3e237d8ea080573909015916e3544a6d2c (patch) | |
tree | 89f7f37e65b5d526c18e043cc7dbb51c9e19a50e /xlators/mgmt/glusterd/src/glusterd-utils.c | |
parent | ad112305a1c7452b13c92238b40ded80361838f3 (diff) |
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli
interface:
disperse [<count>] redundancy <count>
Both are optional. A dispersed volume is created by specifying, at
least, one of them. If 'disperse' is missing or it's present but
'<count>' does not, the number of bricks enumerated in the command
line is taken as the disperse count.
If 'redundancy' is missing, the lowest optimal value is assumed. A
configuration is considered optimal (for most workloads) when the
disperse count - redundancy count is a power of 2. If the resulting
redundancy is 1, the volume is created normally, but if it's greater
than 1, a warning is shown to the user and he/she must answer yes/no
to continue volume creation. If there isn't any optimal value for
the given number of bricks, a warning is also shown and, if the user
accepts, a redundancy of 1 is used.
If 'redundancy' is specified and the resulting volume is not optimal,
another warning is shown to the user.
A distributed-disperse volume can be created using a number of bricks
multiple of the disperse count.
Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7782
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 80 |
1 files changed, 71 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index dc923b1eeb4..aff2356eb4f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, new_volinfo->type = volinfo->type; new_volinfo->replica_count = volinfo->replica_count; new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->disperse_count = volinfo->disperse_count; + new_volinfo->redundancy_count = volinfo->redundancy_count; new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; new_volinfo->sub_count = volinfo->sub_count; new_volinfo->transport_type = volinfo->transport_type; @@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); + ret = dict_set_int32 (dict, key, volinfo->disperse_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); + ret = dict_set_int32 (dict, key, volinfo->redundancy_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count); if (ret) @@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count, gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); + /* not having a 'disperse_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'redundancy_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + /* not having a 'dist_count' key is not a error (as peer may be of old version) */ memset (key, 0, sizeof (key)); @@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo) int rcount = volinfo->replica_count; int scount = volinfo->stripe_count; + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) + return volinfo->disperse_count; + return (rcount ? rcount : 1) * (scount ? scount : 1); } @@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) } } + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + if (volinfo->op_version < GD_OP_VERSION_3_6_0) + volinfo->op_version = GD_OP_VERSION_3_6_0; + if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) + volinfo->client_op_version = GD_OP_VERSION_3_6_0; + } + return; } @@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict, goto out; } - up_count = volinfo->replica_count - down_count; + up_count = volinfo->dist_leaf_count - down_count; if (quorum_type && !strcmp (quorum_type, "fixed")) { if (up_count >= quorum_count) { @@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict, goto out; } } else { - if (volinfo->replica_count % 2 == 0) { + if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) && + (volinfo->dist_leaf_count % 2 == 0)) { if ((up_count > quorum_count) || ((up_count == quorum_count) && first_brick_on)) { quorum_met = _gf_true; @@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, goto out; } - if (!glusterd_is_volume_replicate (volinfo) || - volinfo->replica_count < 3) { + if ((!glusterd_is_volume_replicate (volinfo) || + volinfo->replica_count < 3) && + (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) { for (i = 0; i < volinfo->brick_count ; i++) { /* for a pure distribute volume, and replica volume with replica count 2, quorum is not met if even @@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, ret = 0; quorum_met = _gf_true; } else { - distribute_subvols = volinfo->brick_count / volinfo->replica_count; + distribute_subvols = volinfo->brick_count / + volinfo->dist_leaf_count; for (j = 0; j < distribute_subvols; j++) { // by default assume quorum is not met /* TODO: Handle distributed striped replicate volumes @@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, */ ret = 1; quorum_met = _gf_false; - for (i = 0; i < volinfo->replica_count; i++) { + for (i = 0; i < volinfo->dist_leaf_count; i++) { snprintf (key, sizeof (key), "%s%"PRId64".brick%"PRId64".status", key_prefix, index, - (j * volinfo->replica_count) + i); + (j * volinfo->dist_leaf_count) + i); ret = dict_get_int32 (dict, key, &brick_online); if (ret || !brick_online) { if (i == 0) @@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume, else quorum_count = volinfo->replica_count/2 + 1; + } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { + quorum_count = volinfo->disperse_count - + volinfo->redundancy_count; } else { quorum_count = volinfo->brick_count; } @@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume, if the quorum-type option is not set to auto, the behavior is set to the default behavior) */ - if (!ret) - quorum_count = tmp; + if (!ret) { + /* for dispersed volumes, only allow quorums + equal or larger than minimum functional + value. + */ + if ((GF_CLUSTER_TYPE_DISPERSE != + volinfo->type) || + (tmp >= quorum_count)) { + quorum_count = tmp; + } else { + gf_log(this->name, GF_LOG_INFO, + "Ignoring small quorum-count " + "(%d) on dispersed volume", tmp); + quorum_type = NULL; + } + } else quorum_type = NULL; } |