diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2014-05-15 10:35:14 +0200 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-07-11 10:34:24 -0700 |
commit | 1392da3e237d8ea080573909015916e3544a6d2c (patch) | |
tree | 89f7f37e65b5d526c18e043cc7dbb51c9e19a50e /xlators | |
parent | ad112305a1c7452b13c92238b40ded80361838f3 (diff) |
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli
interface:
disperse [<count>] redundancy <count>
Both are optional. A dispersed volume is created by specifying, at
least, one of them. If 'disperse' is missing or it's present but
'<count>' does not, the number of bricks enumerated in the command
line is taken as the disperse count.
If 'redundancy' is missing, the lowest optimal value is assumed. A
configuration is considered optimal (for most workloads) when the
disperse count - redundancy count is a power of 2. If the resulting
redundancy is 1, the volume is created normally, but if it's greater
than 1, a warning is shown to the user and he/she must answer yes/no
to continue volume creation. If there isn't any optimal value for
the given number of bricks, a warning is also shown and, if the user
accepts, a redundancy of 1 is used.
If 'redundancy' is specified and the resulting volume is not optimal,
another warning is shown to the user.
A distributed-disperse volume can be created using a number of bricks
multiple of the disperse count.
Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7782
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 15 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 80 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 24 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 21 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 2 |
8 files changed, 168 insertions, 9 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 452df759ad4..089c7d637c9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -169,6 +169,12 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count, } } break; + case GF_CLUSTER_TYPE_DISPERSE: + snprintf (err_str, err_len, "Volume %s cannot be converted " + "from dispersed to striped-" + "dispersed", volinfo->volname); + gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; } out: @@ -259,6 +265,12 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count, } } break; + case GF_CLUSTER_TYPE_DISPERSE: + snprintf (err_str, err_len, "Volume %s cannot be converted " + "from dispersed to replicated-" + "dispersed", volinfo->volname); + gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; } out: return ret; @@ -276,6 +288,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, switch (volinfo->type) { case GF_CLUSTER_TYPE_NONE: case GF_CLUSTER_TYPE_STRIPE: + case GF_CLUSTER_TYPE_DISPERSE: snprintf (err_str, err_len, "replica count (%d) option given for non replicate " "volume %s", replica_count, volinfo->volname); @@ -737,6 +750,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) strcpy (vol_type, "stripe"); } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) { strcpy (vol_type, "stripe-replicate"); + } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + strcpy (vol_type, "disperse"); } else { strcpy (vol_type, "distribute"); } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index ed4bd60f88b..e10dc22b56b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -398,6 +398,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, 256, "volume%d.disperse_count", count); + ret = dict_set_int32 (volumes, key, volinfo->disperse_count); + if (ret) + goto out; + + snprintf (key, 256, "volume%d.redundancy_count", count); + ret = dict_set_int32 (volumes, key, volinfo->redundancy_count); + if (ret) + goto out; + snprintf (key, 256, "volume%d.transport", count); ret = dict_set_int32 (volumes, key, volinfo->transport_type); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index c31d8a8ad71..086a6550a72 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -844,6 +844,18 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) if (ret) goto out; + snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, + buf); + if (ret) + goto out; + snprintf (buf, sizeof (buf), "%d", volinfo->version); ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf); if (ret) @@ -2618,6 +2630,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) { volinfo->replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, + strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) { + volinfo->disperse_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, + strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) { + volinfo->redundancy_count = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT, strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) { volinfo->transport_type = atoi (value); @@ -2754,6 +2772,11 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo->replica_count > 0); break; + case GF_CLUSTER_TYPE_DISPERSE: + GF_ASSERT (volinfo->disperse_count > 0); + GF_ASSERT (volinfo->redundancy_count > 0); + break; + default: GF_ASSERT (0); break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 89cf24de789..fb7de7b1b10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -44,6 +44,8 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" #define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" #define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count" +#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count" #define GLUSTERD_STORE_KEY_VOL_BRICK "brick" #define GLUSTERD_STORE_KEY_VOL_VERSION "version" #define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index dc923b1eeb4..aff2356eb4f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, new_volinfo->type = volinfo->type; new_volinfo->replica_count = volinfo->replica_count; new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->disperse_count = volinfo->disperse_count; + new_volinfo->redundancy_count = volinfo->redundancy_count; new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; new_volinfo->sub_count = volinfo->sub_count; new_volinfo->transport_type = volinfo->transport_type; @@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); + ret = dict_set_int32 (dict, key, volinfo->disperse_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); + ret = dict_set_int32 (dict, key, volinfo->redundancy_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count); if (ret) @@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count, gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); + /* not having a 'disperse_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'redundancy_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + /* not having a 'dist_count' key is not a error (as peer may be of old version) */ memset (key, 0, sizeof (key)); @@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo) int rcount = volinfo->replica_count; int scount = volinfo->stripe_count; + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) + return volinfo->disperse_count; + return (rcount ? rcount : 1) * (scount ? scount : 1); } @@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) } } + if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { + if (volinfo->op_version < GD_OP_VERSION_3_6_0) + volinfo->op_version = GD_OP_VERSION_3_6_0; + if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) + volinfo->client_op_version = GD_OP_VERSION_3_6_0; + } + return; } @@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict, goto out; } - up_count = volinfo->replica_count - down_count; + up_count = volinfo->dist_leaf_count - down_count; if (quorum_type && !strcmp (quorum_type, "fixed")) { if (up_count >= quorum_count) { @@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict, goto out; } } else { - if (volinfo->replica_count % 2 == 0) { + if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) && + (volinfo->dist_leaf_count % 2 == 0)) { if ((up_count > quorum_count) || ((up_count == quorum_count) && first_brick_on)) { quorum_met = _gf_true; @@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, goto out; } - if (!glusterd_is_volume_replicate (volinfo) || - volinfo->replica_count < 3) { + if ((!glusterd_is_volume_replicate (volinfo) || + volinfo->replica_count < 3) && + (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) { for (i = 0; i < volinfo->brick_count ; i++) { /* for a pure distribute volume, and replica volume with replica count 2, quorum is not met if even @@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, ret = 0; quorum_met = _gf_true; } else { - distribute_subvols = volinfo->brick_count / volinfo->replica_count; + distribute_subvols = volinfo->brick_count / + volinfo->dist_leaf_count; for (j = 0; j < distribute_subvols; j++) { // by default assume quorum is not met /* TODO: Handle distributed striped replicate volumes @@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index, */ ret = 1; quorum_met = _gf_false; - for (i = 0; i < volinfo->replica_count; i++) { + for (i = 0; i < volinfo->dist_leaf_count; i++) { snprintf (key, sizeof (key), "%s%"PRId64".brick%"PRId64".status", key_prefix, index, - (j * volinfo->replica_count) + i); + (j * volinfo->dist_leaf_count) + i); ret = dict_get_int32 (dict, key, &brick_online); if (ret || !brick_online) { if (i == 0) @@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume, else quorum_count = volinfo->replica_count/2 + 1; + } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { + quorum_count = volinfo->disperse_count - + volinfo->redundancy_count; } else { quorum_count = volinfo->brick_count; } @@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume, if the quorum-type option is not set to auto, the behavior is set to the default behavior) */ - if (!ret) - quorum_count = tmp; + if (!ret) { + /* for dispersed volumes, only allow quorums + equal or larger than minimum functional + value. + */ + if ((GF_CLUSTER_TYPE_DISPERSE != + volinfo->type) || + (tmp >= quorum_count)) { + quorum_count = tmp; + } else { + gf_log(this->name, GF_LOG_INFO, + "Ignoring small quorum-count " + "(%d) on dispersed volume", tmp); + quorum_type = NULL; + } + } else quorum_type = NULL; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 6ab899a16cf..9701c6b939c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -2684,10 +2684,14 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, "%s-replicate-%d"}; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; + char *disperse_args[] = {"cluster/disperse", + "%s-disperse-%d"}; + char option[32] = ""; int rclusters = 0; int clusters = 0; int dist_count = 0; int ret = -1; + xlator_t * ec = NULL; if (!volinfo->dist_leaf_count) goto out; @@ -2737,6 +2741,26 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (clusters < 0) goto out; break; + case GF_CLUSTER_TYPE_DISPERSE: + clusters = volgen_graph_build_clusters (graph, volinfo, + disperse_args[0], + disperse_args[1], + volinfo->brick_count, + volinfo->disperse_count); + if (clusters < 0) + goto out; + + sprintf(option, "%d", volinfo->redundancy_count); + ec = first_of (graph); + while (clusters-- > 0) { + ret = xlator_set_option (ec, "redundancy", option); + if (ret) + goto out; + + ec = ec->next; + } + + break; default: gf_log ("", GF_LOG_ERROR, "volume inconsistency: " "unrecognized clustering type"); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 53beebe0555..f23a9eb96b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1689,6 +1689,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) "replica count for volume %s", volname); goto out; } + } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { + ret = dict_get_int32 (dict, "disperse-count", + &volinfo->disperse_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "disperse count for volume %s", volname); + goto out; + } + ret = dict_get_int32 (dict, "redundancy-count", + &volinfo->redundancy_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "redundancy count for volume %s", volname); + goto out; + } + if (priv->op_version < GD_OP_VERSION_3_6_0) { + gf_log (this->name, GF_LOG_ERROR, "Disperse volume " + "needs op-version 3.6.0 or higher"); + ret = -1; + goto out; + } } /* dist-leaf-count is the count of brick nodes for a given diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index a8ecb505a5b..ddbb2c81338 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -336,6 +336,8 @@ struct glusterd_volinfo_ { int sub_count; /* backward compatibility */ int stripe_count; int replica_count; + int disperse_count; + int redundancy_count; int subvol_count; /* Number of subvolumes in a distribute volume */ int dist_leaf_count; /* Number of bricks in one |