From 1392da3e237d8ea080573909015916e3544a6d2c Mon Sep 17 00:00:00 2001 From: Xavier Hernandez Date: Thu, 15 May 2014 10:35:14 +0200 Subject: cli/glusterd: Added support for dispersed volumes Two new options have been added to the 'create' command of the cli interface: disperse [] redundancy Both are optional. A dispersed volume is created by specifying, at least, one of them. If 'disperse' is missing or it's present but '' does not, the number of bricks enumerated in the command line is taken as the disperse count. If 'redundancy' is missing, the lowest optimal value is assumed. A configuration is considered optimal (for most workloads) when the disperse count - redundancy count is a power of 2. If the resulting redundancy is 1, the volume is created normally, but if it's greater than 1, a warning is shown to the user and he/she must answer yes/no to continue volume creation. If there isn't any optimal value for the given number of bricks, a warning is also shown and, if the user accepts, a redundancy of 1 is used. If 'redundancy' is specified and the resulting volume is not optimal, another warning is shown to the user. A distributed-disperse volume can be created using a number of bricks multiple of the disperse count. Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4 BUG: 1118629 Signed-off-by: Xavier Hernandez Reviewed-on: http://review.gluster.org/7782 Tested-by: Gluster Build System Reviewed-by: Jeff Darcy Reviewed-by: Vijay Bellur --- cli/src/cli-cmd-parser.c | 203 +++++++++++++++++++++++++++++++++++++++++++++-- cli/src/cli-cmd-volume.c | 58 ++++++++++---- cli/src/cli-rpc-ops.c | 21 ++++- cli/src/cli-xml-output.c | 28 ++++++- cli/src/cli.h | 4 +- 5 files changed, 287 insertions(+), 27 deletions(-) (limited to 'cli') diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 1a39be8d121..4a00b8485d3 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -177,7 +177,86 @@ out: } int32_t -cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options) +cli_cmd_create_disperse_check(struct cli_state * state, int * disperse, + int * redundancy, int count) +{ + int i = 0; + int tmp = 0; + gf_answer_t answer = GF_ANSWER_NO; + char question[128]; + + const char * question1 = "There isn't an optimal redundancy value " + "for this configuration. Do you want to " + "create the volume with redundancy 1 ?"; + + const char * question2 = "The optimal redundancy for this " + "configuration is %d. Do you want to create " + "the volume with this value ?"; + + const char * question3 = "This configuration is not optimal on most " + "workloads. Do you want to use it ?"; + + if (*disperse <= 0) { + if (count < 3) { + cli_err ("number of bricks must be greater " + "than 2"); + + return -1; + } + *disperse = count; + } + + if (*redundancy == 0) { + tmp = *disperse - 1; + for (i = tmp / 2; + (i > 0) && ((tmp & -tmp) != tmp); + i--, tmp--); + + if (i == 0) { + answer = cli_cmd_get_confirmation(state, question1); + if (answer == GF_ANSWER_NO) + return -1; + + *redundancy = 1; + } + else + { + *redundancy = *disperse - tmp; + if (*redundancy > 1) { + sprintf(question, question2, *redundancy); + answer = cli_cmd_get_confirmation(state, + question); + if (answer == GF_ANSWER_NO) + return -1; + } + } + + tmp = 0; + } + else { + tmp = *disperse - *redundancy; + } + + if (*redundancy > (*disperse - 1) / 2) { + cli_err ("redundancy must be less than %d for a " + "disperse %d volume", + (*disperse + 1) / 2, *disperse); + + return -1; + } + + if ((tmp & -tmp) != tmp) { + answer = cli_cmd_get_confirmation(state, question3); + if (answer == GF_ANSWER_NO) + return -1; + } + + return 0; +} + +int32_t +cli_cmd_volume_create_parse (struct cli_state *state, const char **words, + int wordcount, dict_t **options) { dict_t *dict = NULL; char *volname = NULL; @@ -191,7 +270,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options int32_t index = 0; char *bricks = NULL; int32_t brick_count = 0; - char *opwords[] = { "replica", "stripe", "transport", NULL }; + char *opwords[] = { "replica", "stripe", "transport", "disperse", + "redundancy", NULL }; char *invalid_volnames[] = {"volume", "type", "subvolumes", "option", "end-volume", "all", "volume_not_in_ring", @@ -200,9 +280,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options "snap-max-soft-limit", "auto-delete", NULL}; char *w = NULL; + char *ptr = NULL; int op_count = 0; int32_t replica_count = 1; int32_t stripe_count = 1; + int32_t disperse_count = -1; + int32_t redundancy_count = 0; gf_boolean_t is_force = _gf_false; int wc = wordcount; @@ -279,6 +362,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options case GF_CLUSTER_TYPE_STRIPE: type = GF_CLUSTER_TYPE_STRIPE_REPLICATE; break; + case GF_CLUSTER_TYPE_DISPERSE: + cli_err ("replicated-dispersed volume is not " + "supported"); + goto out; } if (wordcount < (index+2)) { @@ -310,6 +397,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options case GF_CLUSTER_TYPE_REPLICATE: type = GF_CLUSTER_TYPE_STRIPE_REPLICATE; break; + case GF_CLUSTER_TYPE_DISPERSE: + cli_err ("striped-dispersed volume is not " + "supported"); + goto out; } if (wordcount < (index + 2)) { ret = -1; @@ -348,6 +439,90 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options goto out; } index += 2; + + } else if ((strcmp (w, "disperse")) == 0) { + switch (type) { + case GF_CLUSTER_TYPE_DISPERSE: + if (disperse_count >= 0) { + cli_err ("disperse option given " + "twice"); + goto out; + } + break; + case GF_CLUSTER_TYPE_NONE: + type = GF_CLUSTER_TYPE_DISPERSE; + break; + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + cli_err ("striped-replicated-dispersed volume " + "is not supported"); + goto out; + case GF_CLUSTER_TYPE_STRIPE: + cli_err ("striped-dispersed volume is not " + "supported"); + goto out; + case GF_CLUSTER_TYPE_REPLICATE: + cli_err ("replicated-dispersed volume is not " + "supported"); + goto out; + } + + if (wordcount >= (index+2)) { + disperse_count = strtol (words[index + 1], + &ptr, 0); + if (*ptr != 0) + disperse_count = 0; + else { + if (disperse_count < 3) { + cli_err ("disperse count must " + "be greater than 2"); + ret = -1; + goto out; + } + index++; + } + } + + index++; + + } else if ((strcmp (w, "redundancy")) == 0) { + switch (type) { + case GF_CLUSTER_TYPE_NONE: + type = GF_CLUSTER_TYPE_DISPERSE; + break; + case GF_CLUSTER_TYPE_DISPERSE: + if (redundancy_count > 0) { + cli_err ("redundancy option given " + "twice"); + goto out; + } + break; + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + cli_err ("striped-replicated-dispersed volume " + "is not supported"); + goto out; + case GF_CLUSTER_TYPE_STRIPE: + cli_err ("striped-dispersed volume is not " + "supported"); + goto out; + case GF_CLUSTER_TYPE_REPLICATE: + cli_err ("replicated-dispersed volume is not " + "supported"); + goto out; + } + + if (wordcount < (index+2)) { + ret = -1; + goto out; + } + redundancy_count = strtol (words[index+1], NULL, 0); + if (redundancy_count < 1) { + cli_err ("redundancy must be greater than 0"); + ret = -1; + goto out; + } + + index += 2; + } else { GF_ASSERT (!"opword mismatch"); ret = -1; @@ -359,8 +534,6 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options if (!trans_type) trans_type = gf_strdup ("tcp"); - sub_count = stripe_count * replica_count; - /* reset the count value now */ count = 1; @@ -389,6 +562,23 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options goto out; } + if (type == GF_CLUSTER_TYPE_DISPERSE) { + ret = cli_cmd_create_disperse_check(state, &disperse_count, + &redundancy_count, + brick_count); + if (!ret) + ret = dict_set_int32 (dict, "disperse-count", + disperse_count); + if (!ret) + ret = dict_set_int32 (dict, "redundancy-count", + redundancy_count); + if (ret) + goto out; + + sub_count = disperse_count; + } else + sub_count = stripe_count * replica_count; + if (brick_count % sub_count) { if (type == GF_CLUSTER_TYPE_STRIPE) cli_err ("number of bricks is not a multiple of " @@ -396,6 +586,9 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options else if (type == GF_CLUSTER_TYPE_REPLICATE) cli_err ("number of bricks is not a multiple of " "replica count"); + else if (type == GF_CLUSTER_TYPE_DISPERSE) + cli_err ("number of bricks is not a multiple of " + "disperse count"); else cli_err ("number of bricks given doesn't match " "required count"); @@ -404,7 +597,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options goto out; } - /* Everything if parsed fine. start setting info in dict */ + /* Everything is parsed fine. start setting info in dict */ ret = dict_set_str (dict, "volname", volname); if (ret) goto out; diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index b1b6c8275bc..43e696d56c5 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -362,7 +362,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, if (!frame) goto out; - ret = cli_cmd_volume_create_parse (words, wordcount, &options); + ret = cli_cmd_volume_create_parse (state, words, wordcount, &options); if (ret) { cli_usage_out (word->pattern); @@ -376,32 +376,55 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, goto out; } if ((type == GF_CLUSTER_TYPE_REPLICATE) || - (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) { - if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " - "Could not retrieve bricks list"); + (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) || + (type == GF_CLUSTER_TYPE_DISPERSE)) { + if ((ret = dict_get_str (options, "bricks", + &brick_list)) != 0) { + gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " + "not retrieve bricks " + "list"); goto out; } - if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " - "Could not retrieve brick count"); + if ((ret = dict_get_int32 (options, "count", + &brick_count)) != 0) { + gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " + "not retrieve brick " + "count"); goto out; } - if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " - "Could not retrieve replica count"); - goto out; + + if (type != GF_CLUSTER_TYPE_DISPERSE) { + if ((ret = dict_get_int32 (options, "replica-count", + &sub_count)) != 0) { + gf_log ("cli", GF_LOG_ERROR, "Bricks check : " + "Could not retrieve " + "replica count"); + goto out; + } + gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found." + " Checking brick order."); + } else { + ret = dict_get_int32 (options, "disperse-count", + &sub_count); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, "Bricks check : " + "Could not retrieve " + "disperse count"); + goto out; + } + gf_log ("cli", GF_LOG_INFO, "Disperse cluster type found. " + "Checking brick order."); } - gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found." - " Checking brick order."); - ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count); + ret = cli_cmd_check_brick_order (state, brick_list, + brick_count, sub_count); if (ret) { - gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order"); + gf_log("cli", GF_LOG_INFO, "Not creating volume " + "because of bad brick " + "order"); goto out; } } - ret = dict_get_str (options, "transport", &trans_type); if (ret) { gf_log("cli", GF_LOG_ERROR, "Unable to get transport type"); @@ -2328,6 +2351,7 @@ struct cli_cmd volume_cmds[] = { "list information of all volumes"}, { "volume create [stripe ] [replica ] " + "[disperse []] [redundancy ] " "[transport ] " #ifdef HAVE_BD_XLATOR "?" diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index c077622c0f1..43db8358bcf 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -59,9 +59,11 @@ char *cli_vol_type_str[] = {"Distribute", "Stripe", "Replicate", "Striped-Replicate", + "Disperse", "Distributed-Stripe", "Distributed-Replicate", "Distributed-Striped-Replicate", + "Distributed-Disperse", }; char *cli_vol_status_str[] = {"Created", @@ -518,6 +520,8 @@ gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov, int32_t dist_count = 0; int32_t stripe_count = 0; int32_t replica_count = 0; + int32_t disperse_count = 0; + int32_t redundancy_count = 0; int32_t vol_type = 0; int32_t transport = 0; char *volume_id_str = NULL; @@ -671,6 +675,16 @@ xml_output: if (ret) goto out; + snprintf (key, 256, "volume%d.disperse_count", i); + ret = dict_get_int32 (dict, key, &disperse_count); + if (ret) + goto out; + + snprintf (key, 256, "volume%d.redundancy_count", i); + ret = dict_get_int32 (dict, key, &redundancy_count); + if (ret) + goto out; + snprintf (key, 256, "volume%d.transport", i); ret = dict_get_int32 (dict, key, &transport); if (ret) @@ -685,7 +699,7 @@ xml_output: // Distributed (stripe/replicate/stripe-replica) setups if ((type > 0) && ( dist_count < brick_count)) - vol_type = type + 3; + vol_type = type + 4; cli_out ("Volume Name: %s", volname); cli_out ("Type: %s", cli_vol_type_str[vol_type]); @@ -734,6 +748,11 @@ next: brick_count); } else if (type == GF_CLUSTER_TYPE_NONE) { cli_out ("Number of Bricks: %d", brick_count); + } else if (type == GF_CLUSTER_TYPE_DISPERSE) { + cli_out ("Number of Bricks: %d x (%d + %d) = %d", + (brick_count / dist_count), + disperse_count - redundancy_count, + redundancy_count, brick_count); } else { /* For both replicate and stripe, dist_count is good enough */ diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index b16c238f7fc..1bf4e874647 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -2528,6 +2528,8 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict) int dist_count = 0; int stripe_count = 0; int replica_count = 0; + int disperse_count = 0; + int redundancy_count = 0; int transport = 0; char *brick = NULL; char key[1024] = {0,}; @@ -2621,14 +2623,36 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict) "%d", replica_count); XML_RET_CHECK_AND_GOTO (ret, out); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.disperse_count", i); + ret = dict_get_int32 (dict, key, &disperse_count); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"disperseCount", + "%d", disperse_count); + XML_RET_CHECK_AND_GOTO (ret, out); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.redundancy_count", i); + ret = dict_get_int32 (dict, key, &redundancy_count); + if (ret) + goto out; + ret = xmlTextWriterWriteFormatElement (local->writer, + (xmlChar *)"redundancyCount", + "%d", redundancy_count); + XML_RET_CHECK_AND_GOTO (ret, out); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.type", i); ret = dict_get_int32 (dict, key, &type); if (ret) goto out; - /* For Distributed-(stripe,replicate,stipe-replicate) types */ + /* For Distributed-(stripe,replicate,stipe-replicate,disperse) + types + */ if ((type > 0) && (dist_count < brick_count)) - type += 3; + type += 4; ret = xmlTextWriterWriteFormatElement (local->writer, (xmlChar *)"type", "%d", type); diff --git a/cli/src/cli.h b/cli/src/cli.h index 69a7e82bf63..a1a78eca2bc 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -221,8 +221,8 @@ cli_submit_request (struct rpc_clnt *rpc, void *req, call_frame_t *frame, xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); int32_t -cli_cmd_volume_create_parse (const char **words, int wordcount, - dict_t **options); +cli_cmd_volume_create_parse (struct cli_state *state, const char **words, + int wordcount, dict_t **options); int32_t cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt); -- cgit