diff options
| author | Xavier Hernandez <xhernandez@datalab.es> | 2014-05-15 10:35:14 +0200 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2014-07-11 10:34:24 -0700 | 
| commit | 1392da3e237d8ea080573909015916e3544a6d2c (patch) | |
| tree | 89f7f37e65b5d526c18e043cc7dbb51c9e19a50e | |
| parent | ad112305a1c7452b13c92238b40ded80361838f3 (diff) | |
cli/glusterd: Added support for dispersed volumes
Two new options have been added to the 'create' command of the cli
interface:
    disperse [<count>] redundancy <count>
Both are optional. A dispersed volume is created by specifying, at
least, one of them. If 'disperse' is missing or it's present but
'<count>' does not, the number of bricks enumerated in the command
line is taken as the disperse count.
If 'redundancy' is missing, the lowest optimal value is assumed. A
configuration is considered optimal (for most workloads) when the
disperse count - redundancy count is a power of 2. If the resulting
redundancy is 1, the volume is created normally, but if it's greater
than 1, a warning is shown to the user and he/she must answer yes/no
to continue volume creation. If there isn't any optimal value for
the given number of bricks, a warning is also shown and, if the user
accepts, a redundancy of 1 is used.
If 'redundancy' is specified and the resulting volume is not optimal,
another warning is shown to the user.
A distributed-disperse volume can be created using a number of bricks
multiple of the disperse count.
Change-Id: Iab93efbe78e905cdb91f54f3741599f7ea6645e4
BUG: 1118629
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/7782
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
24 files changed, 1054 insertions, 37 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 1a39be8d121..4a00b8485d3 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -177,7 +177,86 @@ out:  }  int32_t -cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options) +cli_cmd_create_disperse_check(struct cli_state * state, int * disperse, +                              int * redundancy, int count) +{ +        int i = 0; +        int tmp = 0; +        gf_answer_t answer = GF_ANSWER_NO; +        char question[128]; + +        const char * question1 = "There isn't an optimal redundancy value " +                                 "for this configuration. Do you want to " +                                 "create the volume with redundancy 1 ?"; + +        const char * question2 = "The optimal redundancy for this " +                                 "configuration is %d. Do you want to create " +                                 "the volume with this value ?"; + +        const char * question3 = "This configuration is not optimal on most " +                                 "workloads. Do you want to use it ?"; + +        if (*disperse <= 0) { +                if (count < 3) { +                        cli_err ("number of bricks must be greater " +                                 "than 2"); + +                        return -1; +                } +                *disperse = count; +        } + +        if (*redundancy == 0) { +                tmp = *disperse - 1; +                for (i = tmp / 2; +                     (i > 0) && ((tmp & -tmp) != tmp); +                     i--, tmp--); + +                if (i == 0) { +                        answer = cli_cmd_get_confirmation(state, question1); +                        if (answer == GF_ANSWER_NO) +                                return -1; + +                        *redundancy = 1; +                } +                else +                { +                        *redundancy = *disperse - tmp; +                        if (*redundancy > 1) { +                                sprintf(question, question2, *redundancy); +                                answer = cli_cmd_get_confirmation(state, +                                                                  question); +                                if (answer == GF_ANSWER_NO) +                                        return -1; +                        } +                } + +                tmp = 0; +        } +        else { +                tmp = *disperse - *redundancy; +        } + +        if (*redundancy > (*disperse - 1) / 2) { +                cli_err ("redundancy must be less than %d for a " +                         "disperse %d volume", +                         (*disperse + 1) / 2, *disperse); + +                return -1; +        } + +        if ((tmp & -tmp) != tmp) { +                answer = cli_cmd_get_confirmation(state, question3); +                if (answer == GF_ANSWER_NO) +                        return -1; +        } + +        return 0; +} + +int32_t +cli_cmd_volume_create_parse (struct cli_state *state, const char **words, +                             int wordcount, dict_t **options)  {          dict_t  *dict = NULL;          char    *volname = NULL; @@ -191,7 +270,8 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options          int32_t index = 0;          char    *bricks = NULL;          int32_t brick_count = 0; -        char    *opwords[] = { "replica", "stripe", "transport", NULL }; +        char    *opwords[] = { "replica", "stripe", "transport", "disperse", +                               "redundancy", NULL };          char    *invalid_volnames[] = {"volume", "type", "subvolumes", "option",                                         "end-volume", "all", "volume_not_in_ring", @@ -200,9 +280,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                                         "snap-max-soft-limit", "auto-delete",                                         NULL};          char    *w = NULL; +        char    *ptr = NULL;          int      op_count = 0;          int32_t  replica_count = 1;          int32_t  stripe_count = 1; +        int32_t  disperse_count = -1; +        int32_t  redundancy_count = 0;          gf_boolean_t is_force = _gf_false;          int wc = wordcount; @@ -279,6 +362,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                          case GF_CLUSTER_TYPE_STRIPE:                                  type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;                                  break; +                        case GF_CLUSTER_TYPE_DISPERSE: +                                cli_err ("replicated-dispersed volume is not " +                                         "supported"); +                                goto out;                          }                          if (wordcount < (index+2)) { @@ -310,6 +397,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                          case GF_CLUSTER_TYPE_REPLICATE:                                  type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;                                  break; +                        case GF_CLUSTER_TYPE_DISPERSE: +                                cli_err ("striped-dispersed volume is not " +                                         "supported"); +                                goto out;                          }                          if (wordcount < (index + 2)) {                                  ret = -1; @@ -348,6 +439,90 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                                  goto out;                          }                          index += 2; + +                } else if ((strcmp (w, "disperse")) == 0) { +                        switch (type) { +                        case GF_CLUSTER_TYPE_DISPERSE: +                                if (disperse_count >= 0) { +                                        cli_err ("disperse option given " +                                                 "twice"); +                                        goto out; +                                } +                                break; +                        case GF_CLUSTER_TYPE_NONE: +                                type = GF_CLUSTER_TYPE_DISPERSE; +                                break; +                        case GF_CLUSTER_TYPE_STRIPE_REPLICATE: +                                cli_err ("striped-replicated-dispersed volume " +                                         "is not supported"); +                                goto out; +                        case GF_CLUSTER_TYPE_STRIPE: +                                cli_err ("striped-dispersed volume is not " +                                         "supported"); +                                goto out; +                        case GF_CLUSTER_TYPE_REPLICATE: +                                cli_err ("replicated-dispersed volume is not " +                                         "supported"); +                                goto out; +                        } + +                        if (wordcount >= (index+2)) { +                                disperse_count = strtol (words[index + 1], +                                                         &ptr, 0); +                                if (*ptr != 0) +                                        disperse_count = 0; +                                else { +                                        if (disperse_count < 3) { +                                                cli_err ("disperse count must " +                                                         "be greater than 2"); +                                                ret = -1; +                                                goto out; +                                        } +                                        index++; +                                } +                        } + +                        index++; + +                } else if ((strcmp (w, "redundancy")) == 0) { +                        switch (type) { +                        case GF_CLUSTER_TYPE_NONE: +                                type = GF_CLUSTER_TYPE_DISPERSE; +                                break; +                        case GF_CLUSTER_TYPE_DISPERSE: +                                if (redundancy_count > 0) { +                                        cli_err ("redundancy option given " +                                                 "twice"); +                                        goto out; +                                } +                                break; +                        case GF_CLUSTER_TYPE_STRIPE_REPLICATE: +                                cli_err ("striped-replicated-dispersed volume " +                                         "is not supported"); +                                goto out; +                        case GF_CLUSTER_TYPE_STRIPE: +                                cli_err ("striped-dispersed volume is not " +                                         "supported"); +                                goto out; +                        case GF_CLUSTER_TYPE_REPLICATE: +                                cli_err ("replicated-dispersed volume is not " +                                         "supported"); +                                goto out; +                        } + +                        if (wordcount < (index+2)) { +                                ret = -1; +                                goto out; +                        } +                        redundancy_count = strtol (words[index+1], NULL, 0); +                        if (redundancy_count < 1) { +                                cli_err ("redundancy must be greater than 0"); +                                ret = -1; +                                goto out; +                        } + +                        index += 2; +                  }              else {                          GF_ASSERT (!"opword mismatch");                          ret = -1; @@ -359,8 +534,6 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options          if (!trans_type)                  trans_type = gf_strdup ("tcp"); -        sub_count = stripe_count * replica_count; -          /* reset the count value now */          count = 1; @@ -389,6 +562,23 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                  goto out;          } +        if (type == GF_CLUSTER_TYPE_DISPERSE) { +                ret = cli_cmd_create_disperse_check(state, &disperse_count, +                                                    &redundancy_count, +                                                    brick_count); +                if (!ret) +                        ret = dict_set_int32 (dict, "disperse-count", +                                              disperse_count); +                if (!ret) +                        ret = dict_set_int32 (dict, "redundancy-count", +                                              redundancy_count); +                if (ret) +                        goto out; + +                sub_count = disperse_count; +        } else +                sub_count = stripe_count * replica_count; +          if (brick_count % sub_count) {                  if (type == GF_CLUSTER_TYPE_STRIPE)                          cli_err ("number of bricks is not a multiple of " @@ -396,6 +586,9 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                  else if (type == GF_CLUSTER_TYPE_REPLICATE)                          cli_err ("number of bricks is not a multiple of "                                   "replica count"); +                else if (type == GF_CLUSTER_TYPE_DISPERSE) +                        cli_err ("number of bricks is not a multiple of " +                                 "disperse count");                  else                          cli_err ("number of bricks given doesn't match "                                   "required count"); @@ -404,7 +597,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options                  goto out;          } -        /* Everything if parsed fine. start setting info in dict */ +        /* Everything is parsed fine. start setting info in dict */          ret = dict_set_str (dict, "volname", volname);          if (ret)                  goto out; diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index b1b6c8275bc..43e696d56c5 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -362,7 +362,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,          if (!frame)                  goto out; -        ret = cli_cmd_volume_create_parse (words, wordcount, &options); +        ret = cli_cmd_volume_create_parse (state, words, wordcount, &options);          if (ret) {                  cli_usage_out (word->pattern); @@ -376,32 +376,55 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,                  goto out;          }          if ((type == GF_CLUSTER_TYPE_REPLICATE) || -            (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) { -                if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) { -                        gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " -                                                     "Could not retrieve bricks list"); +            (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) || +            (type == GF_CLUSTER_TYPE_DISPERSE)) { +                if ((ret = dict_get_str (options, "bricks", +                                         &brick_list)) != 0) { +                        gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " +                                                     "not retrieve bricks " +                                                     "list");                          goto out;                  } -                if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) { -                        gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " -                                                     "Could not retrieve brick count"); +                if ((ret = dict_get_int32 (options, "count", +                                           &brick_count)) != 0) { +                        gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " +                                                     "not retrieve brick " +                                                     "count");                          goto out;                  } -                if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) { -                        gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : " -                                                    "Could not retrieve replica count"); -                        goto out; + +                if (type != GF_CLUSTER_TYPE_DISPERSE) { +                    if ((ret = dict_get_int32 (options, "replica-count", +                                               &sub_count)) != 0) { +                            gf_log ("cli", GF_LOG_ERROR, "Bricks check : " +                                                         "Could not retrieve " +                                                         "replica count"); +                            goto out; +                    } +                    gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found." +                                                " Checking brick order."); +                } else { +                    ret = dict_get_int32 (options, "disperse-count", +                                          &sub_count); +                    if (ret) { +                            gf_log ("cli", GF_LOG_ERROR, "Bricks check : " +                                                         "Could not retrieve " +                                                         "disperse count"); +                            goto out; +                    } +                    gf_log ("cli", GF_LOG_INFO, "Disperse cluster type found. " +                                                "Checking brick order.");                  } -                gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found." -                                            " Checking brick order."); -                ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count); +                ret = cli_cmd_check_brick_order (state, brick_list, +                                                 brick_count, sub_count);                  if (ret) { -                        gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order"); +                        gf_log("cli", GF_LOG_INFO, "Not creating volume " +                                                   "because of bad brick " +                                                   "order");                          goto out;                  }          } -          ret = dict_get_str (options, "transport", &trans_type);          if (ret) {                  gf_log("cli", GF_LOG_ERROR, "Unable to get transport type"); @@ -2328,6 +2351,7 @@ struct cli_cmd volume_cmds[] = {            "list information of all volumes"},          { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] " +          "[disperse [<COUNT>]] [redundancy <COUNT>] "            "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"  #ifdef HAVE_BD_XLATOR            "?<vg_name>" diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index c077622c0f1..43db8358bcf 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -59,9 +59,11 @@ char *cli_vol_type_str[] = {"Distribute",                              "Stripe",                              "Replicate",                              "Striped-Replicate", +                            "Disperse",                              "Distributed-Stripe",                              "Distributed-Replicate",                              "Distributed-Striped-Replicate", +                            "Distributed-Disperse",                             };  char *cli_vol_status_str[] = {"Created", @@ -518,6 +520,8 @@ gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,          int32_t                    dist_count           = 0;          int32_t                    stripe_count         = 0;          int32_t                    replica_count        = 0; +        int32_t                    disperse_count       = 0; +        int32_t                    redundancy_count     = 0;          int32_t                    vol_type             = 0;          int32_t                    transport            = 0;          char                      *volume_id_str        = NULL; @@ -671,6 +675,16 @@ xml_output:                  if (ret)                          goto out; +                snprintf (key, 256, "volume%d.disperse_count", i); +                ret = dict_get_int32 (dict, key, &disperse_count); +                if (ret) +                        goto out; + +                snprintf (key, 256, "volume%d.redundancy_count", i); +                ret = dict_get_int32 (dict, key, &redundancy_count); +                if (ret) +                        goto out; +                  snprintf (key, 256, "volume%d.transport", i);                  ret = dict_get_int32 (dict, key, &transport);                  if (ret) @@ -685,7 +699,7 @@ xml_output:                  // Distributed (stripe/replicate/stripe-replica) setups                  if ((type > 0) && ( dist_count < brick_count)) -                        vol_type = type + 3; +                        vol_type = type + 4;                  cli_out ("Volume Name: %s", volname);                  cli_out ("Type: %s", cli_vol_type_str[vol_type]); @@ -734,6 +748,11 @@ next:                                   brick_count);                  } else if (type == GF_CLUSTER_TYPE_NONE) {                          cli_out ("Number of Bricks: %d", brick_count); +                } else if (type == GF_CLUSTER_TYPE_DISPERSE) { +                        cli_out ("Number of Bricks: %d x (%d + %d) = %d", +                                 (brick_count / dist_count), +                                 disperse_count - redundancy_count, +                                 redundancy_count, brick_count);                  } else {                          /* For both replicate and stripe, dist_count is                             good enough */ diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index b16c238f7fc..1bf4e874647 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -2528,6 +2528,8 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)          int                     dist_count = 0;          int                     stripe_count = 0;          int                     replica_count = 0; +        int                     disperse_count = 0; +        int                     redundancy_count = 0;          int                     transport = 0;          char                    *brick = NULL;          char                    key[1024] = {0,}; @@ -2622,13 +2624,35 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)                  XML_RET_CHECK_AND_GOTO (ret, out);                  memset (key, 0, sizeof (key)); +                snprintf (key, sizeof (key), "volume%d.disperse_count", i); +                ret = dict_get_int32 (dict, key, &disperse_count); +                if (ret) +                        goto out; +                ret = xmlTextWriterWriteFormatElement (local->writer, +                                                       (xmlChar *)"disperseCount", +                                                       "%d", disperse_count); +                XML_RET_CHECK_AND_GOTO (ret, out); + +                memset (key, 0, sizeof (key)); +                snprintf (key, sizeof (key), "volume%d.redundancy_count", i); +                ret = dict_get_int32 (dict, key, &redundancy_count); +                if (ret) +                        goto out; +                ret = xmlTextWriterWriteFormatElement (local->writer, +                                                       (xmlChar *)"redundancyCount", +                                                       "%d", redundancy_count); +                XML_RET_CHECK_AND_GOTO (ret, out); + +                memset (key, 0, sizeof (key));                  snprintf (key, sizeof (key), "volume%d.type", i);                  ret = dict_get_int32 (dict, key, &type);                  if (ret)                          goto out; -                /* For Distributed-(stripe,replicate,stipe-replicate) types */ +                /* For Distributed-(stripe,replicate,stipe-replicate,disperse) +                   types +                 */                  if ((type > 0) && (dist_count < brick_count)) -                        type += 3; +                        type += 4;                  ret = xmlTextWriterWriteFormatElement (local->writer,                                                         (xmlChar *)"type",                                                         "%d", type); diff --git a/cli/src/cli.h b/cli/src/cli.h index 69a7e82bf63..a1a78eca2bc 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -221,8 +221,8 @@ cli_submit_request (struct rpc_clnt *rpc, void *req, call_frame_t *frame,                      xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);  int32_t -cli_cmd_volume_create_parse (const char **words, int wordcount, -                             dict_t **options); +cli_cmd_volume_create_parse (struct cli_state *state, const char **words, +                             int wordcount, dict_t **options);  int32_t  cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt); diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 3c43e374d95..3a9841934cb 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -23,7 +23,8 @@          GF_CLUSTER_TYPE_NONE = 0,          GF_CLUSTER_TYPE_STRIPE,          GF_CLUSTER_TYPE_REPLICATE, -        GF_CLUSTER_TYPE_STRIPE_REPLICATE +        GF_CLUSTER_TYPE_STRIPE_REPLICATE, +        GF_CLUSTER_TYPE_DISPERSE  };   enum gf1_cli_replace_op { diff --git a/tests/basic/ec/ec-12-4.t b/tests/basic/ec/ec-12-4.t new file mode 100644 index 00000000000..9ab47018617 --- /dev/null +++ b/tests/basic/ec/ec-12-4.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=12 +REDUNDANCY=4 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=634 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-3-1.t b/tests/basic/ec/ec-3-1.t new file mode 100644 index 00000000000..5769c202289 --- /dev/null +++ b/tests/basic/ec/ec-3-1.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=3 +REDUNDANCY=1 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=238 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-4-1.t b/tests/basic/ec/ec-4-1.t new file mode 100644 index 00000000000..d34e1fb4e95 --- /dev/null +++ b/tests/basic/ec/ec-4-1.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=4 +REDUNDANCY=1 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=282 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-5-1.t b/tests/basic/ec/ec-5-1.t new file mode 100644 index 00000000000..61d1cb6ce48 --- /dev/null +++ b/tests/basic/ec/ec-5-1.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=5 +REDUNDANCY=1 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=326 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-5-2.t b/tests/basic/ec/ec-5-2.t new file mode 100644 index 00000000000..4dc1c186f02 --- /dev/null +++ b/tests/basic/ec/ec-5-2.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=5 +REDUNDANCY=2 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=326 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-6-2.t b/tests/basic/ec/ec-6-2.t new file mode 100644 index 00000000000..23ec84e60e9 --- /dev/null +++ b/tests/basic/ec/ec-6-2.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=6 +REDUNDANCY=2 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=370 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-7-3.t b/tests/basic/ec/ec-7-3.t new file mode 100644 index 00000000000..4ebba2a1de3 --- /dev/null +++ b/tests/basic/ec/ec-7-3.t @@ -0,0 +1,14 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks basic dispersed volume functionality and cli interface + +DISPERSE=7 +REDUNDANCY=3 + +# This must be equal to 44 * $DISPERSE + 106 +TESTS_EXPECTED_IN_LOOP=414 + +. $(dirname $0)/ec-common diff --git a/tests/basic/ec/ec-common b/tests/basic/ec/ec-common new file mode 100644 index 00000000000..95f53f250bc --- /dev/null +++ b/tests/basic/ec/ec-common @@ -0,0 +1,143 @@ + +SIZE_LIST="1048576 1000 12345 0" + +LAST_BRICK=$(($DISPERSE - 1)) + +function fragment_size +{ +    local fragments=$(($DISPERSE - $REDUNDANCY)) +    local block_size=$((128 * $fragments)) +    local size=$(($1 + $block_size - 1)) + +    echo $((( $size - ( $size ) % $block_size ) / $fragments)) +} + +cleanup + +tmp=`mktemp -d` +if [ ! -d $tmp ]; then +    exit 1 +fi + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{0..$LAST_BRICK} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +TEST dd if=/dev/urandom of=$tmp/small bs=1024 count=1 +TEST dd if=/dev/urandom of=$tmp/big bs=1024 count=4096 + +cs_small=$(sha1sum $tmp/small | awk '{ print $1 }') +cs_big=$(sha1sum $tmp/big | awk '{ print $1 }') +cp $tmp/small $tmp/small1 +for size in $SIZE_LIST; do +    truncate -s $size $tmp/small1 +    eval cs_small_truncate[$size]=$(sha1sum $tmp/small1 | awk '{ print $1 }') +done +cp $tmp/big $tmp/big1 +for size in $SIZE_LIST; do +    truncate -s $size $tmp/big1 +    eval cs_big_truncate[$size]=$(sha1sum $tmp/big1 | awk '{ print $1 }') +done + +TEST df -h +TEST stat $M0 + +for idx in `seq 0 $LAST_BRICK`; do +    brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx) +done + +cd $M0 +EXPECT "2" echo $(ls -a1 | wc -l) +TEST mkdir dir1 +TEST [ -d dir1 ] +TEST touch file1 +TEST [ -f file1 ] + +for dir in . dir1; do +    TEST cp $tmp/small $dir/small +    TEST [ -f $dir/small ] +    fsize=$(fragment_size 1024) +    EXPECT "1024" stat -c "%s" $dir/small +    for idx in `seq 0 $LAST_BRICK`; do +        EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small +    done + +    EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }') + +    TEST cp $tmp/big $dir/big +    TEST [ -f $dir/big ] +    fsize=$(fragment_size 4194304) +    EXPECT "4194304" stat -c "%s" $dir/big +    for idx in `seq 0 $LAST_BRICK`; do +        EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big +    done + +    EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }') + +    for idx in `seq 0 $LAST_BRICK`; do +        TEST kill_brick $V0 $H0 $B0/$V0$idx + +        EXPECT "1024" stat -c "%s" $dir/small +        EXPECT "4194304" stat -c "%s" $dir/big +        EXPECT "$cs_small" echo $(sha1sum $dir/small | awk '{ print $1 }') +        EXPECT "$cs_big" echo $(sha1sum $dir/big | awk '{ print $1 }') + +        cd +        TEST umount $M0 +        TEST $CLI volume stop $V0 force +        TEST $CLI volume start $V0 +        TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +        cd $M0 +    done + +    for size in $SIZE_LIST; do +        TEST truncate -s $size $dir/small +        TEST [ -f $dir/small ] +        fsize=$(fragment_size $size) +        EXPECT "$size" stat -c "%s" $dir/small +        for idx in `seq 0 $LAST_BRICK`; do +            EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small +        done + +        EXPECT "${cs_small_truncate[$size]}" echo $(sha1sum $dir/small | awk '{ print $1 }') + +        TEST truncate -s $size $dir/big +        TEST [ -f $dir/big ] +        EXPECT "$size" stat -c "%s" $dir/big +        for idx in `seq 0 $LAST_BRICK`; do +            EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big +        done + +        EXPECT "${cs_big_truncate[$size]}" echo $(sha1sum $dir/big | awk '{ print $1 }') +    done + +    TEST rm -f $dir/small +    TEST [ ! -e $dir/small ] +    for idx in `seq 0 $LAST_BRICK`; do +        TEST [ ! -e ${brick[$idx]}/$dir/small ] +    done + +    TEST rm -f $dir/big +    TEST [ ! -e $dir/big ] +    for idx in `seq 0 $LAST_BRICK`; do +        TEST [ ! -e ${brick[$idx]}/$dir/big ] +    done +done + +TEST rmdir dir1 +TEST [ ! -e dir1 ] +for idx in `seq 0 $LAST_BRICK`; do +    TEST [ ! -e ${brick[$idx]}/dir1 ] +done + +TEST rm -f file1 +TEST [ ! -e file1 ] +for idx in `seq 0 $LAST_BRICK`; do +    TEST [ ! -e ${brick[$idx]}/file1 ] +done + +rm -rf $tmp + +cleanup diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t new file mode 100644 index 00000000000..e81de0d97bd --- /dev/null +++ b/tests/basic/ec/ec.t @@ -0,0 +1,233 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +TEST_USER=test-ec-user +TEST_UID=27341 + +function my_getfattr { +    getfattr --only-values -e text $* 2> /dev/null +} + +function get_rep_count { +    v=$(my_getfattr -n trusted.nsr.rep-count $1) +    #echo $v > /dev/tty +    echo $v +} + +function create_file { +    dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null +} + +function setup_perm_file { +    mkdir $1/perm_dir               || return 1 +    chown ${TEST_USER} $1/perm_dir              || return 1 +    su ${TEST_USER} -c "touch $1/perm_dir/perm_file"    || return 1 +    return 0 +} + +# Functions to check repair for specific operation types. + +function check_create_write { +    for b in $*; do +        cmp $tmpdir/create-write $b/create-write || return 1 +    done +    return 0 +} + +function check_truncate { +    truncate --size=8192 $tmpdir/truncate +    for b in $*; do +        cmp $tmpdir/truncate $b/truncate || return 1 +    done +    return 0 +} + +function check_hard_link { +    for b in $*; do +        inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1) +        inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1) +        [ "$inum1" = "$inum2" ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_soft_link { +    for b in $*; do +        [ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_unlink { +    for b in $*; do +        [ ! -e $b/unlink ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_mkdir { +    for b in $*; do +        [ -d $b/mkdir ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_rmdir { +    for b in $*; do +        [ ! -e $b/rmdir ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_setxattr { +    for b in $*; do +        v=$(my_getfattr -n user.foo $b/setxattr) +        [ "$v" = "ash_nazg_durbatuluk" ] || return 1 +    done +    echo "Y" +    return 0 +} + +function check_removexattr { +    for b in $*; do +        my_getfattr -n user.bar $b/removexattr 2> /dev/null +        [ $? = 0 ] && return 1 +    done +    echo "Y" +    return 0 +} + +function check_perm_file { +    b1=$1 +    shift 1 +    ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file) +    #echo "first u/g/a = $ftext" > /dev/tty +    for b in $*; do +        btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file) +        #echo "  next u/a/a = $btext" > /dev/tty +        if [ x"$btext" != x"$ftext" ]; then +            return 1 +        fi +    done +    echo "Y" +    return 0 +} + +cleanup + +TEST useradd -o -M -u ${TEST_UID} ${TEST_USER} +trap "userdel --force ${TEST_USER}" EXIT + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9} +TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9} + +EXPECT "$V0" volinfo_field $V0 'Volume Name' +EXPECT 'Created' volinfo_field $V0 'Status' +EXPECT '10' brick_count $V0 + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +# Mount FUSE with caching disabled +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Create local files for comparisons etc. +tmpdir=$(mktemp -d) +trap "rm -rf $tmpdir" EXIT +TEST create_file $tmpdir/create-write 10 +TEST create_file $tmpdir/truncate 10 + +# Prepare files and directories we'll need later. +TEST cp $tmpdir/truncate $M0/ +TEST touch $M0/hard-link-1 +TEST touch $M0/unlink +TEST mkdir $M0/rmdir +TEST touch $M0/setxattr +TEST touch $M0/removexattr +TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr + +# Kill a couple of bricks and allow some time for things to settle. +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}8 +sleep 10 + +# Test create+write +TEST cp $tmpdir/create-write $M0/ +# Test truncate +TEST truncate --size=8192 $M0/truncate +# Test hard link +TEST ln $M0/hard-link-1 $M0/hard-link-2 +# Test soft link +TEST ln -s soft-link-tgt $M0/soft-link +# Test unlink +TEST rm $M0/unlink +# Test rmdir +TEST rmdir $M0/rmdir +# Test mkdir +TEST mkdir $M0/mkdir +# Test setxattr +TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr +# Test removexattr +TEST setfattr -x user.bar $M0/removexattr +# Test uid/gid behavior +TEST setup_perm_file $M0 + +# Unmount/remount so that create/write and truncate don't see cached data. +TEST umount $M0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Test create/write and truncate *before* the bricks are brought back. +TEST check_create_write $M0 +TEST check_truncate $M0 + +# Restart the bricks and allow repair to occur. +TEST $CLI volume start $V0 force +sleep 10 + +# Unmount/remount again, same reason as before. +TEST umount $M0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +# Make sure everything is as it should be.  Most tests check for consistency +# between the bricks and the front end.  This is not valid for disperse, so we +# check the mountpoint state instead. + +TEST check_create_write $M0 +TEST check_truncate $M0 + +TEST stat $M0/hard-link-1 +TEST stat $M0/hard-link-2 +TEST stat $M0/soft-link +TEST ! stat $M0/unlink +TEST ! stat $M0/rmdir +TEST stat $M0/mkdir +TEST stat $M0/setxattr +TEST stat $M0/removexattr +TEST stat $M0/perm_dir +TEST stat $M0/perm_dir/perm_file + +EXPECT_WITHIN 5 "Y" check_hard_link $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_soft_link $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_unlink $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_rmdir $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_mkdir $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_setxattr $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_removexattr $B0/${V0}{0..9} +EXPECT_WITHIN 5 "Y" check_perm_file $B0/${V0}{0..9} + +rm -rf $tmpdir +userdel --force ${TEST_USER} + +cleanup + diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t new file mode 100644 index 00000000000..99cfd9420aa --- /dev/null +++ b/tests/basic/ec/self-heal.t @@ -0,0 +1,123 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test checks self-healing feature of dispersed volumes + +cleanup + +tmp=`mktemp -d` +if [ ! -d $tmp ]; then +    exit 1 +fi + +TESTS_EXPECTED_IN_LOOP=85 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024 + +cs=$(sha1sum $tmp/test | awk '{ print $1 }') + +TEST df -h +TEST stat $M0 + +for idx in {0..5}; do +    brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx) +done + +cd $M0 +TEST cp $tmp/test test +TEST chmod 644 test +EXPECT "-rw-r--r--" stat -c "%A" test + +for idx1 in {0..5}; do +    TEST chmod 666 ${brick[$idx1]}/test +    sleep 1 +    EXPECT "-rw-r--r--" stat -c "%A" test +    EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test +done + +for idx1 in {0..4}; do +    for idx2 in `seq $(($idx1 + 1)) 5`; do +        if [ $idx1 -ne $idx2 ]; then +            TEST chmod 666 ${brick[$idx1]}/test +            TEST chmod 600 ${brick[$idx2]}/test +            sleep 1 +            EXPECT "-rw-r--r--" stat -c "%A" test +            EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test +            EXPECT_WITHIN 5 "-rw-r--r--" stat -c "%A" ${brick[$idx2]}/test +        fi +    done +done + +TEST truncate -s 0 ${brick[0]}/test +TEST truncate -s 2097152 ${brick[1]}/test +TEST setfattr -n user.test -v "test1" ${brick[0]}/test +TEST setfattr -n user.test -v "test2" ${brick[1]}/test +TEST chmod 600 ${brick[0]}/test +TEST chmod 666 ${brick[1]}/test +sleep 1 + +EXPECT "1048576" stat -c "%s" test +TEST ! getfattr -n user.test test + +EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test +EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test +TEST ! getfattr -n user.test ${brick[0]}/test +TEST ! getfattr -n user.test ${brick[1]}/test +EXPECT "-rw-r--r--" stat -c "%A" ${brick[0]}/test +EXPECT "-rw-r--r--" stat -c "%A" ${brick[1]}/test + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST cp $tmp/test test2 +EXPECT "1048576" stat -c "%s" test2 +TEST chmod 777 test2 +EXPECT "-rwxrwxrwx" stat -c "%A" test2 + +TEST mkdir dir1 +TEST ls -al dir1 + +TEST ln -s test2 test3 +TEST [ -h test3 ] + +TEST ln test2 test4 +TEST [ -f test4 ] +EXPECT "2" stat -c "%h" test2 +EXPECT "2" stat -c "%h" test4 + +cd +TEST umount $M0 +TEST $CLI volume stop $V0 force +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 + +EXPECT "1048576" stat -c "%s" test2 +EXPECT "-rwxrwxrwx" stat -c "%A" test2 +EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[0]}/test2 +EXPECT_WITHIN 5 "262144" stat -c "%s" ${brick[1]}/test2 +EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[0]}/test2 +EXPECT "-rwxrwxrwx" stat -c "%A" ${brick[1]}/test2 + +TEST ls -al dir1 +EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[0]}/dir1 ]; then echo 1; fi" +EXPECT_WITHIN 5 "1" eval "if [ -d ${brick[1]}/dir1 ]; then echo 1; fi" + +TEST [ -h test3 ] +EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[0]}/test3 ]; then echo 1; fi" +EXPECT_WITHIN 5 "1" eval "if [ -h ${brick[1]}/test3 ]; then echo 1; fi" + +EXPECT "2" stat -c "%h" test4 +EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[0]}/test4 +EXPECT_WITHIN 5 "3" stat -c "%h" ${brick[1]}/test4 + +rm -rf $tmp + +cleanup diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 452df759ad4..089c7d637c9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -169,6 +169,12 @@ gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,                          }                  }                  break; +        case GF_CLUSTER_TYPE_DISPERSE: +                snprintf (err_str, err_len, "Volume %s cannot be converted " +                                            "from dispersed to striped-" +                                            "dispersed", volinfo->volname); +                gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str); +                goto out;          }  out: @@ -259,6 +265,12 @@ gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,                          }                  }                  break; +        case GF_CLUSTER_TYPE_DISPERSE: +                snprintf (err_str, err_len, "Volume %s cannot be converted " +                                            "from dispersed to replicated-" +                                            "dispersed", volinfo->volname); +                gf_log(THIS->name, GF_LOG_ERROR, "%s", err_str); +                goto out;          }  out:          return ret; @@ -276,6 +288,7 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,          switch (volinfo->type) {          case GF_CLUSTER_TYPE_NONE:          case GF_CLUSTER_TYPE_STRIPE: +        case GF_CLUSTER_TYPE_DISPERSE:                  snprintf (err_str, err_len,                            "replica count (%d) option given for non replicate "                            "volume %s", replica_count, volinfo->volname); @@ -737,6 +750,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)                  strcpy (vol_type, "stripe");          } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {                  strcpy (vol_type, "stripe-replicate"); +        } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { +                strcpy (vol_type, "disperse");          } else {                  strcpy (vol_type, "distribute");          } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index ed4bd60f88b..e10dc22b56b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -398,6 +398,16 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,          if (ret)                  goto out; +        snprintf (key, 256, "volume%d.disperse_count", count); +        ret = dict_set_int32 (volumes, key, volinfo->disperse_count); +        if (ret) +                goto out; + +        snprintf (key, 256, "volume%d.redundancy_count", count); +        ret = dict_set_int32 (volumes, key, volinfo->redundancy_count); +        if (ret) +                goto out; +          snprintf (key, 256, "volume%d.transport", count);          ret = dict_set_int32 (volumes, key, volinfo->transport_type);          if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index c31d8a8ad71..086a6550a72 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -844,6 +844,18 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)          if (ret)                  goto out; +        snprintf (buf, sizeof (buf), "%d", volinfo->disperse_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, +                                   buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->redundancy_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, +                                   buf); +        if (ret) +                goto out; +          snprintf (buf, sizeof (buf), "%d", volinfo->version);          ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf);          if (ret) @@ -2618,6 +2630,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)                  } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,                                       strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {                          volinfo->replica_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, +                                     strlen (GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) { +                        volinfo->disperse_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, +                                     strlen (GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) { +                        volinfo->redundancy_count = atoi (value);                  } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,                                       strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {                          volinfo->transport_type = atoi (value); @@ -2754,6 +2772,11 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)                                  GF_ASSERT (volinfo->replica_count > 0);                          break; +                        case GF_CLUSTER_TYPE_DISPERSE: +                                GF_ASSERT (volinfo->disperse_count > 0); +                                GF_ASSERT (volinfo->redundancy_count > 0); +                        break; +                          default:                                  GF_ASSERT (0);                          break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 89cf24de789..fb7de7b1b10 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -44,6 +44,8 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_VOL_SUB_COUNT        "sub_count"  #define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT       "stripe_count"  #define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT      "replica_count" +#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT     "disperse_count" +#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT   "redundancy_count"  #define GLUSTERD_STORE_KEY_VOL_BRICK            "brick"  #define GLUSTERD_STORE_KEY_VOL_VERSION          "version"  #define GLUSTERD_STORE_KEY_VOL_TRANSPORT        "transport-type" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index dc923b1eeb4..aff2356eb4f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -548,6 +548,8 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,          new_volinfo->type = volinfo->type;          new_volinfo->replica_count = volinfo->replica_count;          new_volinfo->stripe_count = volinfo->stripe_count; +        new_volinfo->disperse_count = volinfo->disperse_count; +        new_volinfo->redundancy_count = volinfo->redundancy_count;          new_volinfo->dist_leaf_count = volinfo->dist_leaf_count;          new_volinfo->sub_count = volinfo->sub_count;          new_volinfo->transport_type = volinfo->transport_type; @@ -2525,6 +2527,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                  goto out;          memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); +        ret = dict_set_int32 (dict, key, volinfo->disperse_count); +        if (ret) +                goto out; + +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); +        ret = dict_set_int32 (dict, key, volinfo->redundancy_count); +        if (ret) +                goto out; + +        memset (key, 0, sizeof (key));          snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count);          ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count);          if (ret) @@ -4206,6 +4220,24 @@ glusterd_import_volinfo (dict_t *peer_data, int count,                  gf_log (THIS->name, GF_LOG_INFO,                          "peer is possibly old version"); +        /* not having a 'disperse_count' key is not a error +           (as peer may be of old version) */ +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "%s%d.disperse_count", prefix, count); +        ret = dict_get_int32 (peer_data, key, &new_volinfo->disperse_count); +        if (ret) +                gf_log (THIS->name, GF_LOG_INFO, +                        "peer is possibly old version"); + +        /* not having a 'redundancy_count' key is not a error +           (as peer may be of old version) */ +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "%s%d.redundancy_count", prefix, count); +        ret = dict_get_int32 (peer_data, key, &new_volinfo->redundancy_count); +        if (ret) +                gf_log (THIS->name, GF_LOG_INFO, +                        "peer is possibly old version"); +          /* not having a 'dist_count' key is not a error             (as peer may be of old version) */          memset (key, 0, sizeof (key)); @@ -6932,6 +6964,9 @@ glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo)      int rcount = volinfo->replica_count;      int scount = volinfo->stripe_count; +    if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) +        return volinfo->disperse_count; +      return (rcount ? rcount : 1) * (scount ? scount : 1);  } @@ -11694,6 +11729,13 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo)                  }          } +        if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) { +                if (volinfo->op_version < GD_OP_VERSION_3_6_0) +                        volinfo->op_version = GD_OP_VERSION_3_6_0; +                if (volinfo->client_op_version < GD_OP_VERSION_3_6_0) +                        volinfo->client_op_version = GD_OP_VERSION_3_6_0; +        } +          return;  } @@ -12774,7 +12816,7 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,                  goto out;          } -        up_count = volinfo->replica_count - down_count; +        up_count = volinfo->dist_leaf_count - down_count;          if (quorum_type && !strcmp (quorum_type, "fixed")) {                  if (up_count >= quorum_count) { @@ -12782,7 +12824,8 @@ glusterd_volume_quorum_calculate (glusterd_volinfo_t *volinfo, dict_t *dict,                          goto out;                  }          } else { -                if (volinfo->replica_count % 2 == 0) { +                if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) && +                    (volinfo->dist_leaf_count % 2 == 0)) {                          if ((up_count > quorum_count) ||                              ((up_count == quorum_count) && first_brick_on)) {                                  quorum_met = _gf_true; @@ -12835,8 +12878,9 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,                  goto out;          } -        if (!glusterd_is_volume_replicate (volinfo) || -            volinfo->replica_count < 3) { +        if ((!glusterd_is_volume_replicate (volinfo) || +             volinfo->replica_count < 3) && +            (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {                  for (i = 0; i < volinfo->brick_count ; i++) {                          /* for a pure distribute volume, and replica volume                             with replica count 2, quorum is not met if even @@ -12858,7 +12902,8 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,                  ret = 0;                  quorum_met = _gf_true;          } else { -             distribute_subvols = volinfo->brick_count / volinfo->replica_count; +             distribute_subvols = volinfo->brick_count / +                                  volinfo->dist_leaf_count;               for (j = 0; j < distribute_subvols; j++) {                          // by default assume quorum is not met                          /* TODO: Handle distributed striped replicate volumes @@ -12867,11 +12912,11 @@ glusterd_volume_quorum_check (glusterd_volinfo_t *volinfo, int64_t index,                          */                          ret = 1;                          quorum_met = _gf_false; -                        for (i = 0; i < volinfo->replica_count; i++) { +                        for (i = 0; i < volinfo->dist_leaf_count; i++) {                                  snprintf (key, sizeof (key),                                            "%s%"PRId64".brick%"PRId64".status", key_prefix,                                            index, -                                          (j * volinfo->replica_count) + i); +                                          (j * volinfo->dist_leaf_count) + i);                                  ret = dict_get_int32 (dict, key, &brick_online);                                  if (ret || !brick_online) {                                          if (i == 0) @@ -13043,6 +13088,9 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,                          else                                  quorum_count =                                          volinfo->replica_count/2 + 1; +                } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { +                        quorum_count = volinfo->disperse_count - +                                       volinfo->redundancy_count;                  } else {                          quorum_count = volinfo->brick_count;                  } @@ -13061,8 +13109,22 @@ glusterd_snap_quorum_check_for_create (dict_t *dict, gf_boolean_t snap_volume,                             if the quorum-type option is not set to auto,                             the behavior is set to the default behavior)                          */ -                        if (!ret) -                                quorum_count = tmp; +                        if (!ret) { +                                /* for dispersed volumes, only allow quorums +                                   equal or larger than minimum functional +                                   value. +                                */ +                                if ((GF_CLUSTER_TYPE_DISPERSE != +                                                              volinfo->type) || +                                    (tmp >= quorum_count)) { +                                        quorum_count = tmp; +                                } else { +                                        gf_log(this->name, GF_LOG_INFO, +                                               "Ignoring small quorum-count " +                                               "(%d) on dispersed volume", tmp); +                                        quorum_type = NULL; +                                } +                        }                          else                                  quorum_type = NULL;                  } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 6ab899a16cf..9701c6b939c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -2684,10 +2684,14 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,                                                         "%s-replicate-%d"};          char                    *stripe_args[]      = {"cluster/stripe",                                                         "%s-stripe-%d"}; +        char                    *disperse_args[]    = {"cluster/disperse", +                                                       "%s-disperse-%d"}; +        char                    option[32]          = "";          int                     rclusters           = 0;          int                     clusters            = 0;          int                     dist_count          = 0;          int                     ret                 = -1; +        xlator_t *              ec                  = NULL;          if (!volinfo->dist_leaf_count)                  goto out; @@ -2737,6 +2741,26 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,                  if (clusters < 0)                          goto out;                  break; +        case GF_CLUSTER_TYPE_DISPERSE: +                clusters = volgen_graph_build_clusters (graph, volinfo, +                                                        disperse_args[0], +                                                        disperse_args[1], +                                                        volinfo->brick_count, +                                                        volinfo->disperse_count); +                if (clusters < 0) +                        goto out; + +                sprintf(option, "%d", volinfo->redundancy_count); +                ec = first_of (graph); +                while (clusters-- > 0) { +                        ret = xlator_set_option (ec, "redundancy", option); +                        if (ret) +                                goto out; + +                        ec = ec->next; +                } + +                break;          default:                  gf_log ("", GF_LOG_ERROR, "volume inconsistency: "                          "unrecognized clustering type"); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 53beebe0555..f23a9eb96b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1689,6 +1689,27 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)                                  "replica count for volume %s", volname);                          goto out;                  } +        } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) { +                ret = dict_get_int32 (dict, "disperse-count", +                                      &volinfo->disperse_count); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to get " +                                 "disperse count for volume %s", volname); +                        goto out; +                } +                ret = dict_get_int32 (dict, "redundancy-count", +                                      &volinfo->redundancy_count); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to get " +                                 "redundancy count for volume %s", volname); +                        goto out; +                } +                if (priv->op_version < GD_OP_VERSION_3_6_0) { +                        gf_log (this->name, GF_LOG_ERROR, "Disperse volume " +                                "needs op-version 3.6.0 or higher"); +                        ret = -1; +                        goto out; +                }          }          /* dist-leaf-count is the count of brick nodes for a given diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index a8ecb505a5b..ddbb2c81338 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -336,6 +336,8 @@ struct glusterd_volinfo_ {          int                       sub_count;  /* backward compatibility */          int                       stripe_count;          int                       replica_count; +        int                       disperse_count; +        int                       redundancy_count;          int                       subvol_count; /* Number of subvolumes in a                                                   distribute volume */          int                       dist_leaf_count; /* Number of bricks in one  | 
