summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-utils.c
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-05-15 10:35:14 +0200
committerVijay Bellur <vbellur@redhat.com>2014-07-11 10:34:24 -0700
commit1392da3e237d8ea080573909015916e3544a6d2c (patch)
tree89f7f37e65b5d526c18e043cc7dbb51c9e19a50e /xlators/mgmt/glusterd/src/glusterd-utils.c
parentad112305a1c7452b13c92238b40ded80361838f3 (diff)
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli interface: disperse [<count>] redundancy <count> Both are optional. A dispersed volume is created by specifying, at least, one of them. If 'disperse' is missing or it's present but '<count>' does not, the number of bricks enumerated in the command line is taken as the disperse count. If 'redundancy' is missing, the lowest optimal value is assumed. A configuration is considered optimal (for most workloads) when the disperse count - redundancy count is a power of 2. If the resulting redundancy is 1, the volume is created normally, but if it's greater than 1, a warning is shown to the user and he/she must answer yes/no to continue volume creation. If there isn't any optimal value for the given number of bricks, a warning is also shown and, if the user accepts, a redundancy of 1 is used. If 'redundancy' is specified and the resulting volume is not optimal, another warning is shown to the user. A distributed-disperse volume can be created using a number of bricks multiple of the disperse count. Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4 BUG: 1118629 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/7782 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c80
1 files changed, 71 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index dc923b1eeb4..aff2356eb4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->type = volinfo->type;
new_volinfo->replica_count = volinfo->replica_count;
new_volinfo->stripe_count = volinfo->stripe_count;
+ new_volinfo->disperse_count = volinfo->disperse_count;
+ new_volinfo->redundancy_count = volinfo->redundancy_count;
new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
@@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
goto out;
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
if (ret)
@@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count,
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
+ /* not having a 'disperse_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ /* not having a 'redundancy_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
/* not having a 'dist_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
@@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
int rcount = volinfo->replica_count;
int scount = volinfo->stripe_count;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)
+ return volinfo->disperse_count;
+
return (rcount ? rcount : 1) * (scount ? scount : 1);
}
@@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
}
}
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ if (volinfo->op_version < GD_OP_VERSION_3_6_0)
+ volinfo->op_version = GD_OP_VERSION_3_6_0;
+ if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
+ volinfo->client_op_version = GD_OP_VERSION_3_6_0;
+ }
+
return;
}
@@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
- up_count = volinfo->replica_count - down_count;
+ up_count = volinfo->dist_leaf_count - down_count;
if (quorum_type && !strcmp (quorum_type, "fixed")) {
if (up_count >= quorum_count) {
@@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
} else {
- if (volinfo->replica_count % 2 == 0) {
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) &&
+ (volinfo->dist_leaf_count % 2 == 0)) {
if ((up_count > quorum_count) ||
((up_count == quorum_count) && first_brick_on)) {
quorum_met = _gf_true;
@@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
goto out;
}
- if (!glusterd_is_volume_replicate (volinfo) ||
- volinfo->replica_count < 3) {
+ if ((!glusterd_is_volume_replicate (volinfo) ||
+ volinfo->replica_count < 3) &&
+ (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
for (i = 0; i < volinfo->brick_count ; i++) {
/* for a pure distribute volume, and replica volume
with replica count 2, quorum is not met if even
@@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
ret = 0;
quorum_met = _gf_true;
} else {
- distribute_subvols = volinfo->brick_count / volinfo->replica_count;
+ distribute_subvols = volinfo->brick_count /
+ volinfo->dist_leaf_count;
for (j = 0; j < distribute_subvols; j++) {
// by default assume quorum is not met
/* TODO: Handle distributed striped replicate volumes
@@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
*/
ret = 1;
quorum_met = _gf_false;
- for (i = 0; i < volinfo->replica_count; i++) {
+ for (i = 0; i < volinfo->dist_leaf_count; i++) {
snprintf (key, sizeof (key),
"%s%"PRId64".brick%"PRId64".status", key_prefix,
index,
- (j * volinfo->replica_count) + i);
+ (j * volinfo->dist_leaf_count) + i);
ret = dict_get_int32 (dict, key, &brick_online);
if (ret || !brick_online) {
if (i == 0)
@@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
else
quorum_count =
volinfo->replica_count/2 + 1;
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ quorum_count = volinfo->disperse_count -
+ volinfo->redundancy_count;
} else {
quorum_count = volinfo->brick_count;
}
@@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
if the quorum-type option is not set to auto,
the behavior is set to the default behavior)
*/
- if (!ret)
- quorum_count = tmp;
+ if (!ret) {
+ /* for dispersed volumes, only allow quorums
+ equal or larger than minimum functional
+ value.
+ */
+ if ((GF_CLUSTER_TYPE_DISPERSE !=
+ volinfo->type) ||
+ (tmp >= quorum_count)) {
+ quorum_count = tmp;
+ } else {
+ gf_log(this->name, GF_LOG_INFO,
+ "Ignoring small quorum-count "
+ "(%d) on dispersed volume", tmp);
+ quorum_type = NULL;
+ }
+ }
else
quorum_type = NULL;
}