summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXavier Hernandez <xhernandez@datalab.es>2014-05-15 10:35:14 +0200
committerVijay Bellur <vbellur@redhat.com>2014-07-11 10:34:24 -0700
commit1392da3e237d8ea080573909015916e3544a6d2c (patch)
tree89f7f37e65b5d526c18e043cc7dbb51c9e19a50e
parentad112305a1c7452b13c92238b40ded80361838f3 (diff)
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli interface: disperse [<count>] redundancy <count> Both are optional. A dispersed volume is created by specifying, at least, one of them. If 'disperse' is missing or it's present but '<count>' does not, the number of bricks enumerated in the command line is taken as the disperse count. If 'redundancy' is missing, the lowest optimal value is assumed. A configuration is considered optimal (for most workloads) when the disperse count - redundancy count is a power of 2. If the resulting redundancy is 1, the volume is created normally, but if it's greater than 1, a warning is shown to the user and he/she must answer yes/no to continue volume creation. If there isn't any optimal value for the given number of bricks, a warning is also shown and, if the user accepts, a redundancy of 1 is used. If 'redundancy' is specified and the resulting volume is not optimal, another warning is shown to the user. A distributed-disperse volume can be created using a number of bricks multiple of the disperse count. Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4 BUG: 1118629 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/7782 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--cli/src/cli-cmd-parser.c203
-rw-r--r--cli/src/cli-cmd-volume.c58
-rw-r--r--cli/src/cli-rpc-ops.c21
-rw-r--r--cli/src/cli-xml-output.c28
-rw-r--r--cli/src/cli.h4
-rw-r--r--rpc/xdr/src/cli1-xdr.x3
-rw-r--r--tests/basic/ec/ec-12-4.t14
-rw-r--r--tests/basic/ec/ec-3-1.t14
-rw-r--r--tests/basic/ec/ec-4-1.t14
-rw-r--r--tests/basic/ec/ec-5-1.t14
-rw-r--r--tests/basic/ec/ec-5-2.t14
-rw-r--r--tests/basic/ec/ec-6-2.t14
-rw-r--r--tests/basic/ec/ec-7-3.t14
-rw-r--r--tests/basic/ec/ec-common143
-rw-r--r--tests/basic/ec/ec.t233
-rw-r--r--tests/basic/ec/self-heal.t123
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c23
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c80
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h2
24 files changed, 1054 insertions, 37 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 1a39be8d121..4a00b8485d3 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -177,7 +177,86 @@ out:
}
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_create_disperse_check(struct cli_state * state, int * disperse,
+ int * redundancy, int count)
+{
+ int i = 0;
+ int tmp = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char question[128];
+
+ const char * question1 = "There isn't an optimal redundancy value "
+ "for this configuration. Do you want to "
+ "create the volume with redundancy 1 ?";
+
+ const char * question2 = "The optimal redundancy for this "
+ "configuration is %d. Do you want to create "
+ "the volume with this value ?";
+
+ const char * question3 = "This configuration is not optimal on most "
+ "workloads. Do you want to use it ?";
+
+ if (*disperse <= 0) {
+ if (count < 3) {
+ cli_err ("number of bricks must be greater "
+ "than 2");
+
+ return -1;
+ }
+ *disperse = count;
+ }
+
+ if (*redundancy == 0) {
+ tmp = *disperse - 1;
+ for (i = tmp / 2;
+ (i > 0) && ((tmp & -tmp) != tmp);
+ i--, tmp--);
+
+ if (i == 0) {
+ answer = cli_cmd_get_confirmation(state, question1);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+
+ *redundancy = 1;
+ }
+ else
+ {
+ *redundancy = *disperse - tmp;
+ if (*redundancy > 1) {
+ sprintf(question, question2, *redundancy);
+ answer = cli_cmd_get_confirmation(state,
+ question);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
+ }
+
+ tmp = 0;
+ }
+ else {
+ tmp = *disperse - *redundancy;
+ }
+
+ if (*redundancy > (*disperse - 1) / 2) {
+ cli_err ("redundancy must be less than %d for a "
+ "disperse %d volume",
+ (*disperse + 1) / 2, *disperse);
+
+ return -1;
+ }
+
+ if ((tmp & -tmp) != tmp) {
+ answer = cli_cmd_get_confirmation(state, question3);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
+
+ return 0;
+}
+
+int32_t
+cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
+ int wordcount, dict_t **options)
{
dict_t *dict = NULL;
char *volname = NULL;
@@ -191,7 +270,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
int32_t index = 0;
char *bricks = NULL;
int32_t brick_count = 0;
- char *opwords[] = { "replica", "stripe", "transport", NULL };
+ char *opwords[] = { "replica", "stripe", "transport", "disperse",
+ "redundancy", NULL };
char *invalid_volnames[] = {"volume", "type", "subvolumes", "option",
"end-volume", "all", "volume_not_in_ring",
@@ -200,9 +280,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
"snap-max-soft-limit", "auto-delete",
NULL};
char *w = NULL;
+ char *ptr = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t stripe_count = 1;
+ int32_t disperse_count = -1;
+ int32_t redundancy_count = 0;
gf_boolean_t is_force = _gf_false;
int wc = wordcount;
@@ -279,6 +362,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
case GF_CLUSTER_TYPE_STRIPE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ cli_err ("replicated-dispersed volume is not "
+ "supported");
+ goto out;
}
if (wordcount < (index+2)) {
@@ -310,6 +397,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
case GF_CLUSTER_TYPE_REPLICATE:
type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ cli_err ("striped-dispersed volume is not "
+ "supported");
+ goto out;
}
if (wordcount < (index + 2)) {
ret = -1;
@@ -348,6 +439,90 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
index += 2;
+
+ } else if ((strcmp (w, "disperse")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_DISPERSE:
+ if (disperse_count >= 0) {
+ cli_err ("disperse option given "
+ "twice");
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_DISPERSE;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ cli_err ("striped-replicated-dispersed volume "
+ "is not supported");
+ goto out;
+ case GF_CLUSTER_TYPE_STRIPE:
+ cli_err ("striped-dispersed volume is not "
+ "supported");
+ goto out;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err ("replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ }
+
+ if (wordcount >= (index+2)) {
+ disperse_count = strtol (words[index + 1],
+ &ptr, 0);
+ if (*ptr != 0)
+ disperse_count = 0;
+ else {
+ if (disperse_count < 3) {
+ cli_err ("disperse count must "
+ "be greater than 2");
+ ret = -1;
+ goto out;
+ }
+ index++;
+ }
+ }
+
+ index++;
+
+ } else if ((strcmp (w, "redundancy")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_DISPERSE;
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ if (redundancy_count > 0) {
+ cli_err ("redundancy option given "
+ "twice");
+ goto out;
+ }
+ break;
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ cli_err ("striped-replicated-dispersed volume "
+ "is not supported");
+ goto out;
+ case GF_CLUSTER_TYPE_STRIPE:
+ cli_err ("striped-dispersed volume is not "
+ "supported");
+ goto out;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err ("replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ }
+
+ if (wordcount < (index+2)) {
+ ret = -1;
+ goto out;
+ }
+ redundancy_count = strtol (words[index+1], NULL, 0);
+ if (redundancy_count < 1) {
+ cli_err ("redundancy must be greater than 0");
+ ret = -1;
+ goto out;
+ }
+
+ index += 2;
+
} else {
GF_ASSERT (!"opword mismatch");
ret = -1;
@@ -359,8 +534,6 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (!trans_type)
trans_type = gf_strdup ("tcp");
- sub_count = stripe_count * replica_count;
-
/* reset the count value now */
count = 1;
@@ -389,6 +562,23 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
+ if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ ret = cli_cmd_create_disperse_check(state, &disperse_count,
+ &redundancy_count,
+ brick_count);
+ if (!ret)
+ ret = dict_set_int32 (dict, "disperse-count",
+ disperse_count);
+ if (!ret)
+ ret = dict_set_int32 (dict, "redundancy-count",
+ redundancy_count);
+ if (ret)
+ goto out;
+
+ sub_count = disperse_count;
+ } else
+ sub_count = stripe_count * replica_count;
+
if (brick_count % sub_count) {
if (type == GF_CLUSTER_TYPE_STRIPE)
cli_err ("number of bricks is not a multiple of "
@@ -396,6 +586,9 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
else if (type == GF_CLUSTER_TYPE_REPLICATE)
cli_err ("number of bricks is not a multiple of "
"replica count");
+ else if (type == GF_CLUSTER_TYPE_DISPERSE)
+ cli_err ("number of bricks is not a multiple of "
+ "disperse count");
else
cli_err ("number of bricks given doesn't match "
"required count");
@@ -404,7 +597,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
- /* Everything if parsed fine. start setting info in dict */
+ /* Everything is parsed fine. start setting info in dict */
ret = dict_set_str (dict, "volname", volname);
if (ret)
goto out;
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index b1b6c8275bc..43e696d56c5 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -362,7 +362,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
- ret = cli_cmd_volume_create_parse (words, wordcount, &options);
+ ret = cli_cmd_volume_create_parse (state, words, wordcount, &options);
if (ret) {
cli_usage_out (word->pattern);
@@ -376,32 +376,55 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
- (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) {
- if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve bricks list");
+ (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) ||
+ (type == GF_CLUSTER_TYPE_DISPERSE)) {
+ if ((ret = dict_get_str (options, "bricks",
+ &brick_list)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could "
+ "not retrieve bricks "
+ "list");
goto out;
}
- if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve brick count");
+ if ((ret = dict_get_int32 (options, "count",
+ &brick_count)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could "
+ "not retrieve brick "
+ "count");
goto out;
}
- if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve replica count");
- goto out;
+
+ if (type != GF_CLUSTER_TYPE_DISPERSE) {
+ if ((ret = dict_get_int32 (options, "replica-count",
+ &sub_count)) != 0) {
+ gf_log ("cli", GF_LOG_ERROR, "Bricks check : "
+ "Could not retrieve "
+ "replica count");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
+ " Checking brick order.");
+ } else {
+ ret = dict_get_int32 (options, "disperse-count",
+ &sub_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Bricks check : "
+ "Could not retrieve "
+ "disperse count");
+ goto out;
+ }
+ gf_log ("cli", GF_LOG_INFO, "Disperse cluster type found. "
+ "Checking brick order.");
}
- gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
- " Checking brick order.");
- ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count);
+ ret = cli_cmd_check_brick_order (state, brick_list,
+ brick_count, sub_count);
if (ret) {
- gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order");
+ gf_log("cli", GF_LOG_INFO, "Not creating volume "
+ "because of bad brick "
+ "order");
goto out;
}
}
-
ret = dict_get_str (options, "transport", &trans_type);
if (ret) {
gf_log("cli", GF_LOG_ERROR, "Unable to get transport type");
@@ -2328,6 +2351,7 @@ struct cli_cmd volume_cmds[] = {
"list information of all volumes"},
{ "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] "
+ "[disperse [<COUNT>]] [redundancy <COUNT>] "
"[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
#ifdef HAVE_BD_XLATOR
"?<vg_name>"
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index c077622c0f1..43db8358bcf 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -59,9 +59,11 @@ char *cli_vol_type_str[] = {"Distribute",
"Stripe",
"Replicate",
"Striped-Replicate",
+ "Disperse",
"Distributed-Stripe",
"Distributed-Replicate",
"Distributed-Striped-Replicate",
+ "Distributed-Disperse",
};
char *cli_vol_status_str[] = {"Created",
@@ -518,6 +520,8 @@ gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
int32_t dist_count = 0;
int32_t stripe_count = 0;
int32_t replica_count = 0;
+ int32_t disperse_count = 0;
+ int32_t redundancy_count = 0;
int32_t vol_type = 0;
int32_t transport = 0;
char *volume_id_str = NULL;
@@ -671,6 +675,16 @@ xml_output:
if (ret)
goto out;
+ snprintf (key, 256, "volume%d.disperse_count", i);
+ ret = dict_get_int32 (dict, key, &disperse_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.redundancy_count", i);
+ ret = dict_get_int32 (dict, key, &redundancy_count);
+ if (ret)
+ goto out;
+
snprintf (key, 256, "volume%d.transport", i);
ret = dict_get_int32 (dict, key, &transport);
if (ret)
@@ -685,7 +699,7 @@ xml_output:
// Distributed (stripe/replicate/stripe-replica) setups
if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 3;
+ vol_type = type + 4;
cli_out ("Volume Name: %s", volname);
cli_out ("Type: %s", cli_vol_type_str[vol_type]);
@@ -734,6 +748,11 @@ next:
brick_count);
} else if (type == GF_CLUSTER_TYPE_NONE) {
cli_out ("Number of Bricks: %d", brick_count);
+ } else if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ cli_out ("Number of Bricks: %d x (%d + %d) = %d",
+ (brick_count / dist_count),
+ disperse_count - redundancy_count,
+ redundancy_count, brick_count);
} else {
/* For both replicate and stripe, dist_count is
good enough */
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index b16c238f7fc..1bf4e874647 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -2528,6 +2528,8 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
int dist_count = 0;
int stripe_count = 0;
int replica_count = 0;
+ int disperse_count = 0;
+ int redundancy_count = 0;
int transport = 0;
char *brick = NULL;
char key[1024] = {0,};
@@ -2622,13 +2624,35 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.disperse_count", i);
+ ret = dict_get_int32 (dict, key, &disperse_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"disperseCount",
+ "%d", disperse_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32 (dict, key, &redundancy_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"redundancyCount",
+ "%d", redundancy_count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.type", i);
ret = dict_get_int32 (dict, key, &type);
if (ret)
goto out;
- /* For Distributed-(stripe,replicate,stipe-replicate) types */
+ /* For Distributed-(stripe,replicate,stipe-replicate,disperse)
+ types
+ */
if ((type > 0) && (dist_count < brick_count))
- type += 3;
+ type += 4;
ret = xmlTextWriterWriteFormatElement (local->writer,
(xmlChar *)"type",
"%d", type);
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 69a7e82bf63..a1a78eca2bc 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -221,8 +221,8 @@ cli_submit_request (struct rpc_clnt *rpc, void *req, call_frame_t *frame,
xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_create_parse (struct cli_state *state, const char **words,
+ int wordcount, dict_t **options);
int32_t
cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt);
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index 3c43e374d95..3a9841934cb 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -23,7 +23,8 @@
GF_CLUSTER_TYPE_NONE = 0,
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
- GF_CLUSTER_TYPE_STRIPE_REPLICATE
+ GF_CLUSTER_TYPE_STRIPE_REPLICATE,
+ GF_CLUSTER_TYPE_DISPERSE
};
enum gf1_cli_replace_op {
diff --git a/tests/basic/ec/ec-12-4.t b/tests/basic/ec/ec-12-4.t
new file mode 100644
index 00000000000..9ab47018617
--- /dev/null
+++ b/tests/basic/ec/ec-12-4.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=12
+REDUNDANCY=4
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=634
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-3-1.t b/tests/basic/ec/ec-3-1.t
new file mode 100644
index 00000000000..5769c202289
--- /dev/null
+++ b/tests/basic/ec/ec-3-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=3
+REDUNDANCY=1
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=238
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-4-1.t b/tests/basic/ec/ec-4-1.t
new file mode 100644
index 00000000000..d34e1fb4e95
--- /dev/null
+++ b/tests/basic/ec/ec-4-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=4
+REDUNDANCY=1
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=282
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-5-1.t b/tests/basic/ec/ec-5-1.t
new file mode 100644
index 00000000000..61d1cb6ce48
--- /dev/null
+++ b/tests/basic/ec/ec-5-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=5
+REDUNDANCY=1
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=326
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-5-2.t b/tests/basic/ec/ec-5-2.t
new file mode 100644
index 00000000000..4dc1c186f02
--- /dev/null
+++ b/tests/basic/ec/ec-5-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=5
+REDUNDANCY=2
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=326
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-6-2.t b/tests/basic/ec/ec-6-2.t
new file mode 100644
index 00000000000..23ec84e60e9
--- /dev/null
+++ b/tests/basic/ec/ec-6-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=6
+REDUNDANCY=2
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=370
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-7-3.t b/tests/basic/ec/ec-7-3.t
new file mode 100644
index 00000000000..4ebba2a1de3
--- /dev/null
+++ b/tests/basic/ec/ec-7-3.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=7
+REDUNDANCY=3
+
+# This must be equal to 44 * $DISPERSE + 106
+TESTS_EXPECTED_IN_LOOP=414
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-common b/tests/basic/ec/ec-common
new file mode 100644
index 00000000000..95f53f250bc
--- /dev/null
+++ b/tests/basic/ec/ec-common
@@ -0,0 +1,143 @@
+
+SIZE_LIST="1048576 1000 12345 0"
+
+LAST_BRICK=$(($DISPERSE - 1))
+
+function fragment_size
+{
+ local fragments=$(($DISPERSE - $REDUNDANCY))
+ local block_size=$((128 * $fragments))
+ local size=$(($1 + $block_size - 1))
+
+ echo $((( $size - ( $size ) % $block_size ) / $fragments))
+}
+
+cleanup
+
+tmp=`mktemp -d`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{0..$LAST_BRICK}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST dd if=/dev/urandom of=$tmp/small bs=1024 count=1
+TEST dd if=/dev/urandom of=$tmp/big bs=1024 count=4096
+
+cs_small=$(sha1sum $tmp/small | awk '{ print $1 }')
+cs_big=$(sha1sum $tmp/big | awk '{ print $1 }')
+cp $tmp/small $tmp/small1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/small1
+ eval cs_small_truncate[$size]=$(sha1sum $tmp/small1 | awk '{ print $1 }')
+done
+cp $tmp/big $tmp/big1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/big1
+ eval cs_big_truncate[$size]=$(sha1sum $tmp/big1 | awk '{ print $1 }')
+done
+
+TEST df -h
+TEST stat $M0
+
+for idx in `seq 0 $LAST_BRICK`; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+cd $M0
+EXPECT "2" echo $(ls -a1 | wc -l)
+TEST mkdir dir1
+TEST [ -d dir1 ]
+TEST touch file1
+TEST [ -f file1 ]
+
+for dir in . dir1; do
+ TEST cp $tmp/small $dir/small
+ TEST [ -f $dir/small ]
+ fsize=$(fragment_size 1024)
+ EXPECT "1024" stat -c "%s" $dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }')
+
+ TEST cp $tmp/big $dir/big
+ TEST [ -f $dir/big ]
+ fsize=$(fragment_size 4194304)
+ EXPECT "4194304" stat -c "%s" $dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }')
+
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST kill_brick $V0 $H0 $B0/$V0$idx
+
+ EXPECT "1024" stat -c "%s" $dir/small
+ EXPECT "4194304" stat -c "%s" $dir/big
+ EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }')
+ EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }')
+
+ cd
+ TEST umount $M0
+ TEST $CLI volume stop $V0 force
+ TEST $CLI volume start $V0
+ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+ cd $M0
+ done
+
+ for size in $SIZE_LIST; do
+ TEST truncate -s $size $dir/small
+ TEST [ -f $dir/small ]
+ fsize=$(fragment_size $size)
+ EXPECT "$size" stat -c "%s" $dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "${cs_small_truncate[$size]}" echo $(sha1sum $dir/small | awk '{ print $1 }')
+
+ TEST truncate -s $size $dir/big
+ TEST [ -f $dir/big ]
+ EXPECT "$size" stat -c "%s" $dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "${cs_big_truncate[$size]}" echo $(sha1sum $dir/big | awk '{ print $1 }')
+ done
+
+ TEST rm -f $dir/small
+ TEST [ ! -e $dir/small ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/small ]
+ done
+
+ TEST rm -f $dir/big
+ TEST [ ! -e $dir/big ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/big ]
+ done
+done
+
+TEST rmdir dir1
+TEST [ ! -e dir1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/dir1 ]
+done
+
+TEST rm -f file1
+TEST [ ! -e file1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/file1 ]
+done
+
+rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t
new file mode 100644
index 00000000000..e81de0d97bd
--- /dev/null
+++ b/tests/basic/ec/ec.t
@@ -0,0 +1,233 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST_USER=test-ec-user
+TEST_UID=27341
+
+function my_getfattr {
+ getfattr --only-values -e text $* 2> /dev/null
+}
+
+function get_rep_count {
+ v=$(my_getfattr -n trusted.nsr.rep-count $1)
+ #echo $v > /dev/tty
+ echo $v
+}
+
+function create_file {
+ dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null
+}
+
+function setup_perm_file {
+ mkdir $1/perm_dir || return 1
+ chown ${TEST_USER} $1/perm_dir || return 1
+ su ${TEST_USER} -c "touch $1/perm_dir/perm_file" || return 1
+ return 0
+}
+
+# Functions to check repair for specific operation types.
+
+function check_create_write {
+ for b in $*; do
+ cmp $tmpdir/create-write $b/create-write || return 1
+ done
+ return 0
+}
+
+function check_truncate {
+ truncate --size=8192 $tmpdir/truncate
+ for b in $*; do
+ cmp $tmpdir/truncate $b/truncate || return 1
+ done
+ return 0
+}
+
+function check_hard_link {
+ for b in $*; do
+ inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)
+ inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1)
+ [ "$inum1" = "$inum2" ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_soft_link {
+ for b in $*; do
+ [ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_unlink {
+ for b in $*; do
+ [ ! -e $b/unlink ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_mkdir {
+ for b in $*; do
+ [ -d $b/mkdir ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_rmdir {
+ for b in $*; do
+ [ ! -e $b/rmdir ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_setxattr {
+ for b in $*; do
+ v=$(my_getfattr -n user.foo $b/setxattr)
+ [ "$v" = "ash_nazg_durbatuluk" ] || return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_removexattr {
+ for b in $*; do
+ my_getfattr -n user.bar $b/removexattr 2> /dev/null
+ [ $? = 0 ] && return 1
+ done
+ echo "Y"
+ return 0
+}
+
+function check_perm_file {
+ b1=$1
+ shift 1
+ ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file)
+ #echo "first u/g/a = $ftext" > /dev/tty
+ for b in $*; do
+ btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)
+ #echo " next u/a/a = $btext" > /dev/tty
+ if [ x"$btext" != x"$ftext" ]; then
+ return 1
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+cleanup
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+trap "userdel --force ${TEST_USER}" EXIT
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '10' brick_count $V0
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# Create local files for comparisons etc.
+tmpdir=$(mktemp -d)
+trap "rm -rf $tmpdir" EXIT
+TEST create_file $tmpdir/create-write 10
+TEST create_file $tmpdir/truncate 10
+
+# Prepare files and directories we'll need later.
+TEST cp $tmpdir/truncate $M0/
+TEST touch $M0/hard-link-1
+TEST touch $M0/unlink
+TEST mkdir $M0/rmdir
+TEST touch $M0/setxattr
+TEST touch $M0/removexattr
+TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr
+
+# Kill a couple of bricks and allow some time for things to settle.
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}8
+sleep 10
+
+# Test create+write
+TEST cp $tmpdir/create-write $M0/
+# Test truncate
+TEST truncate --size=8192 $M0/truncate
+# Test hard link
+TEST ln $M0/hard-link-1 $M0/hard-link-2
+# Test soft link
+TEST ln -s soft-link-tgt $M0/soft-link
+# Test unlink
+TEST rm $M0/unlink
+# Test rmdir
+TEST rmdir $M0/rmdir
+# Test mkdir
+TEST mkdir $M0/mkdir
+# Test setxattr
+TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr
+# Test removexattr
+TEST setfattr -x user.bar $M0/removexattr
+# Test uid/gid behavior
+TEST setup_perm_file $M0
+
+# Unmount/remount so that create/write and truncate don't see cached data.
+TEST umount $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# Test create/write and truncate *before* the bricks are brought back.
+TEST check_create_write $M0
+TEST check_truncate $M0
+
+# Restart the bricks and allow repair to occur.
+TEST $CLI volume start $V0 force
+sleep 10
+
+# Unmount/remount again, same reason as before.
+TEST umount $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# Make sure everything is as it should be. Most tests check for consistency
+# between the bricks and the front end. This is not valid for disperse, so we
+# check the mountpoint state instead.
+
+TEST check_create_write $M0
+TEST check_truncate $M0
+
+TEST stat $M0/hard-link-1
+TEST stat $M0/hard-link-2
+TEST stat $M0/soft-link
+TEST ! stat $M0/unlink
+TEST ! stat $M0/rmdir
+TEST stat $M0/mkdir
+TEST stat $M0/setxattr
+TEST stat $M0/removexattr
+TEST stat $M0/perm_dir
+TEST stat $M0/perm_dir/perm_file
+
+EXPECT_WITHIN 5 "Y" check_hard_link $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_soft_link $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_unlink $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_rmdir $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_mkdir $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_setxattr $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_removexattr $B0/${V0}{0..9}
+EXPECT_WITHIN 5 "Y" check_perm_file $B0/${V0}{0..9}
+
+rm -rf $tmpdir
+userdel --force ${TEST_USER}
+
+cleanup
+
diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
new file mode 100644
index 00000000000..99cfd9420aa
--- /dev/null
+++ b/tests/basic/ec/self-heal.t
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks self-healing feature of dispersed volumes
+
+cleanup
+
+tmp=`mktemp -d`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TESTS_EXPECTED_IN_LOOP=85
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024
+
+cs=$(sha1sum $tmp/test | awk '{ print $1 }')
+
+TEST df -h
+TEST stat $M0
+
+for idx in {0..5}; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+cd $M0
+TEST cp $tmp/test test
+TEST chmod 644 test
+EXPECT "-rw-r--r--" stat -c "%A" test
+
+for idx1 in {0..5}; do
+ TEST chmod 666 ${brick[$idx1]}/test
+ sleep 1
+ EXPECT "-rw-r--r--" stat -c "%A" test
+ EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
+done
+
+for idx1 in {0..4}; do
+ for idx2 in `seq $(($idx1 + 1)) 5`; do
+ if [ $idx1 -ne $idx2 ]; then
+ TEST chmod 666 ${brick[$idx1]}/test
+ TEST chmod 600 ${brick[$idx2]}/test
+ sleep 1
+ EXPECT "-rw-r--r--" stat -c "%A" test
+ EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test
+ EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx2]}/test
+ fi
+ done
+done
+
+TEST truncate -s 0 ${brick[0]}/test
+TEST truncate -s 2097152 ${brick[1]}/test
+TEST setfattr -n user.test -v "test1" ${brick[0]}/test
+TEST setfattr -n user.test -v "test2" ${brick[1]}/test
+TEST chmod 600 ${brick[0]}/test
+TEST chmod 666 ${brick[1]}/test
+sleep 1
+
+EXPECT "1048576" stat -c "%s" test
+TEST ! getfattr -n user.test test
+
+EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test
+EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test
+TEST ! getfattr -n user.test ${brick[0]}/test
+TEST ! getfattr -n user.test ${brick[1]}/test
+EXPECT "-rw-r--r--" stat -c "%A" ${brick[0]}/test
+EXPECT "-rw-r--r--" stat -c "%A" ${brick[1]}/test
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST cp $tmp/test test2
+EXPECT "1048576" stat -c "%s" test2
+TEST chmod 777 test2
+EXPECT "-rwxrwxrwx" stat -c "%A" test2
+
+TEST mkdir dir1
+TEST ls -al dir1
+
+TEST ln -s test2 test3
+TEST [ -h test3 ]
+
+TEST ln test2 test4
+TEST [ -f test4 ]
+EXPECT "2" stat -c "%h" test2
+EXPECT "2" stat -c "%h" test4
+
+cd
+TEST umount $M0
+TEST $CLI volume stop $V0 force
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+
+EXPECT "1048576" stat -c "%s" test2
+EXPECT "-rwxrwxrwx" stat -c "%A" test2
+EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test2
+EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[0]}/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[1]}/test2
+
+TEST ls -al dir1
+EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[0]}/dir1 ]; then echo 1; fi"
+EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[1]}/dir1 ]; then echo 1; fi"
+
+TEST [ -h test3 ]
+EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[0]}/test3 ]; then echo 1; fi"
+EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[1]}/test3 ]; then echo 1; fi"
+
+EXPECT "2" stat -c "%h" test4
+EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[0]}/test4
+EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[1]}/test4
+
+rm -rf $tmp
+
+cleanup
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 452df759ad4..089c7d637c9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -169,6 +169,12 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
}
}
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to striped-"
+ "dispersed", volinfo->volname);
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
out:
@@ -259,6 +265,12 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
}
}
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf (err_str, err_len, "Volume %s cannot be converted "
+ "from dispersed to replicated-"
+ "dispersed", volinfo->volname);
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str);
+ goto out;
}
out:
return ret;
@@ -276,6 +288,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_DISPERSE:
snprintf (err_str, err_len,
"replica count (%d) option given for non replicate "
"volume %s", replica_count, volinfo->volname);
@@ -737,6 +750,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
strcpy (vol_type, "stripe");
} else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
strcpy (vol_type, "stripe-replicate");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ strcpy (vol_type, "disperse");
} else {
strcpy (vol_type, "distribute");
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index ed4bd60f88b..e10dc22b56b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -398,6 +398,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
if (ret)
goto out;
+ snprintf (key, 256, "volume%d.disperse_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "volume%d.redundancy_count", count);
+ ret = dict_set_int32 (volumes, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
snprintf (key, 256, "volume%d.transport", count);
ret = dict_set_int32 (volumes, key, volinfo->transport_type);
if (ret)
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index c31d8a8ad71..086a6550a72 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -844,6 +844,18 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
if (ret)
goto out;
+ snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ buf);
+ if (ret)
+ goto out;
+
snprintf (buf, sizeof (buf), "%d", volinfo->version);
ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);
if (ret)
@@ -2618,6 +2630,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
volinfo->replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
+ volinfo->disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) {
+ volinfo->redundancy_count = atoi (value);
} else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
volinfo->transport_type = atoi (value);
@@ -2754,6 +2772,11 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->replica_count > 0);
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ GF_ASSERT (volinfo->disperse_count > 0);
+ GF_ASSERT (volinfo->redundancy_count > 0);
+ break;
+
default:
GF_ASSERT (0);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index 89cf24de789..fb7de7b1b10 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -44,6 +44,8 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
+#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
+#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index dc923b1eeb4..aff2356eb4f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->type = volinfo->type;
new_volinfo->replica_count = volinfo->replica_count;
new_volinfo->stripe_count = volinfo->stripe_count;
+ new_volinfo->disperse_count = volinfo->disperse_count;
+ new_volinfo->redundancy_count = volinfo->redundancy_count;
new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
@@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,
goto out;
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->disperse_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_set_int32 (dict, key, volinfo->redundancy_count);
+ if (ret)
+ goto out;
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);
ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);
if (ret)
@@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count,
gf_log (THIS->name, GF_LOG_INFO,
"peer is possibly old version");
+ /* not having a 'disperse_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
+ /* not having a 'redundancy_count' key is not a error
+ (as peer may be of old version) */
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count);
+ ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count);
+ if (ret)
+ gf_log (THIS->name, GF_LOG_INFO,
+ "peer is possibly old version");
+
/* not having a 'dist_count' key is not a error
(as peer may be of old version) */
memset (key, 0, sizeof (key));
@@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)
int rcount = volinfo->replica_count;
int scount = volinfo->stripe_count;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE)
+ return volinfo->disperse_count;
+
return (rcount ? rcount : 1) * (scount ? scount : 1);
}
@@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)
}
}
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ if (volinfo->op_version < GD_OP_VERSION_3_6_0)
+ volinfo->op_version = GD_OP_VERSION_3_6_0;
+ if (volinfo->client_op_version < GD_OP_VERSION_3_6_0)
+ volinfo->client_op_version = GD_OP_VERSION_3_6_0;
+ }
+
return;
}
@@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
- up_count = volinfo->replica_count - down_count;
+ up_count = volinfo->dist_leaf_count - down_count;
if (quorum_type && !strcmp (quorum_type, "fixed")) {
if (up_count >= quorum_count) {
@@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,
goto out;
}
} else {
- if (volinfo->replica_count % 2 == 0) {
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) &&
+ (volinfo->dist_leaf_count % 2 == 0)) {
if ((up_count > quorum_count) ||
((up_count == quorum_count) && first_brick_on)) {
quorum_met = _gf_true;
@@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
goto out;
}
- if (!glusterd_is_volume_replicate (volinfo) ||
- volinfo->replica_count < 3) {
+ if ((!glusterd_is_volume_replicate (volinfo) ||
+ volinfo->replica_count < 3) &&
+ (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
for (i = 0; i < volinfo->brick_count ; i++) {
/* for a pure distribute volume, and replica volume
with replica count 2, quorum is not met if even
@@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
ret = 0;
quorum_met = _gf_true;
} else {
- distribute_subvols = volinfo->brick_count / volinfo->replica_count;
+ distribute_subvols = volinfo->brick_count /
+ volinfo->dist_leaf_count;
for (j = 0; j < distribute_subvols; j++) {
// by default assume quorum is not met
/* TODO: Handle distributed striped replicate volumes
@@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,
*/
ret = 1;
quorum_met = _gf_false;
- for (i = 0; i < volinfo->replica_count; i++) {
+ for (i = 0; i < volinfo->dist_leaf_count; i++) {
snprintf (key, sizeof (key),
"%s%"PRId64".brick%"PRId64".status", key_prefix,
index,
- (j * volinfo->replica_count) + i);
+ (j * volinfo->dist_leaf_count) + i);
ret = dict_get_int32 (dict, key, &brick_online);
if (ret || !brick_online) {
if (i == 0)
@@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
else
quorum_count =
volinfo->replica_count/2 + 1;
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ quorum_count = volinfo->disperse_count -
+ volinfo->redundancy_count;
} else {
quorum_count = volinfo->brick_count;
}
@@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,
if the quorum-type option is not set to auto,
the behavior is set to the default behavior)
*/
- if (!ret)
- quorum_count = tmp;
+ if (!ret) {
+ /* for dispersed volumes, only allow quorums
+ equal or larger than minimum functional
+ value.
+ */
+ if ((GF_CLUSTER_TYPE_DISPERSE !=
+ volinfo->type) ||
+ (tmp >= quorum_count)) {
+ quorum_count = tmp;
+ } else {
+ gf_log(this->name, GF_LOG_INFO,
+ "Ignoring small quorum-count "
+ "(%d) on dispersed volume", tmp);
+ quorum_type = NULL;
+ }
+ }
else
quorum_type = NULL;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 6ab899a16cf..9701c6b939c 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -2684,10 +2684,14 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
"%s-replicate-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
+ xlator_t * ec = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@@ -2737,6 +2741,26 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ clusters = volgen_graph_build_clusters (graph, volinfo,
+ disperse_args[0],
+ disperse_args[1],
+ volinfo->brick_count,
+ volinfo->disperse_count);
+ if (clusters < 0)
+ goto out;
+
+ sprintf(option, "%d", volinfo->redundancy_count);
+ ec = first_of (graph);
+ while (clusters-- > 0) {
+ ret = xlator_set_option (ec, "redundancy", option);
+ if (ret)
+ goto out;
+
+ ec = ec->next;
+ }
+
+ break;
default:
gf_log ("", GF_LOG_ERROR, "volume inconsistency: "
"unrecognized clustering type");
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
index 53beebe0555..f23a9eb96b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c
@@ -1689,6 +1689,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)
"replica count for volume %s", volname);
goto out;
}
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ ret = dict_get_int32 (dict, "disperse-count",
+ &volinfo->disperse_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "disperse count for volume %s", volname);
+ goto out;
+ }
+ ret = dict_get_int32 (dict, "redundancy-count",
+ &volinfo->redundancy_count);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to get "
+ "redundancy count for volume %s", volname);
+ goto out;
+ }
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ gf_log (this->name, GF_LOG_ERROR, "Disperse volume "
+ "needs op-version 3.6.0 or higher");
+ ret = -1;
+ goto out;
+ }
}
/* dist-leaf-count is the count of brick nodes for a given
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index a8ecb505a5b..ddbb2c81338 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -336,6 +336,8 @@ struct glusterd_volinfo_ {
int sub_count; /* backward compatibility */
int stripe_count;
int replica_count;
+ int disperse_count;
+ int redundancy_count;
int subvol_count; /* Number of subvolumes in a
distribute volume */
int dist_leaf_count; /* Number of bricks in one