diff options
-rw-r--r-- | cli/src/cli-cmd-volume.c | 203 | ||||
-rw-r--r-- | cli/src/cli-cmd.h | 11 | ||||
-rw-r--r-- | doc/admin-guide/en-US/markdown/admin_setting_volumes.md | 38 | ||||
-rwxr-xr-x | tests/bugs/bug-1091935-brick-order-check-from-cli-to-glusterd.t | 27 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 237 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 11 |
6 files changed, 308 insertions, 219 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 52b5adb6129..15f448b5ba2 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -187,154 +187,6 @@ out: return ret; } -gf_ai_compare_t -cli_cmd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next) -{ - int ret = -1; - struct addrinfo *tmp1 = NULL; - struct addrinfo *tmp2 = NULL; - char firstip[NI_MAXHOST] = {0.}; - char nextip[NI_MAXHOST] = {0,}; - - for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { - ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip, - NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { - ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen, nextip, - NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - if (ret) - return GF_AI_COMPARE_ERROR; - if (!strcmp (firstip, nextip)) { - return GF_AI_COMPARE_MATCH; - } - } - } - return GF_AI_COMPARE_NO_MATCH; -} - -/* Check for non optimal brick order for replicate : - * Checks if bricks belonging to a replicate volume - * are present on the same server - */ -int32_t -cli_cmd_check_brick_order (struct cli_state *state, const char *bricks, - int brick_count, int sub_count) -{ - int ret = -1; - int i = 0; - int j = 0; - int k = 0; - addrinfo_list_t *ai_list = NULL; - addrinfo_list_t *ai_list_tmp1 = NULL; - addrinfo_list_t *ai_list_tmp2 = NULL; - char *brick = NULL; - char *brick_list = NULL; - char *brick_list_dup = NULL; - char *tmpptr = NULL; - struct addrinfo *ai_info = NULL; - gf_answer_t answer = GF_ANSWER_NO; - const char *failed_question = NULL; - const char *found_question = NULL; - failed_question = "Failed to perform brick order check. " - "Do you want to continue creating the volume? "; - found_question = "Multiple bricks of a replicate volume are present" - " on the same server. This setup is not optimal.\n" - "Do you still want to continue creating the volume? "; - - GF_ASSERT (bricks); - GF_ASSERT (brick_count > 0); - GF_ASSERT (sub_count > 0); - - ai_list = malloc (sizeof (addrinfo_list_t)); - ai_list->info = NULL; - INIT_LIST_HEAD (&ai_list->list); - brick_list = gf_strdup (bricks); - if (brick_list == NULL) { - gf_log ("cli", GF_LOG_DEBUG, "failed to allocate memory"); - goto check_failed; - } - brick_list_dup = brick_list; - /* Resolve hostnames and get addrinfo */ - while (i < brick_count) { - ++i; - brick = strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - if (brick == NULL) - goto check_failed; - brick = strtok_r (brick, ":", &tmpptr); - if (brick == NULL) - goto check_failed; - ret = getaddrinfo (brick, NULL, NULL, &ai_info); - if (ret) - goto check_failed; - ai_list_tmp1 = malloc (sizeof (addrinfo_list_t)); - if (ai_list_tmp1 == NULL) - goto check_failed; - ai_list_tmp1->info = ai_info; - list_add_tail (&ai_list_tmp1->list, &ai_list->list); - ai_list_tmp1 = NULL; - } - - i = 0; - ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list); - - /* Check for bad brick order */ - while (i < brick_count) { - ++i; - ai_info = ai_list_tmp1->info; - ai_list_tmp1 = list_entry (ai_list_tmp1->list.next, - addrinfo_list_t, list); - if ( 0 == i % sub_count) { - j = 0; - continue; - } - ai_list_tmp2 = ai_list_tmp1; - k = j; - while (k < sub_count - 1) { - ++k; - ret = cli_cmd_compare_addrinfo (ai_info, - ai_list_tmp2->info); - if (GF_AI_COMPARE_ERROR == ret) - goto check_failed; - if (GF_AI_COMPARE_MATCH == ret) - goto found_bad_brick_order; - ai_list_tmp2 = list_entry (ai_list_tmp2->list.next, - addrinfo_list_t, list); - } - ++j; - } - gf_log ("cli", GF_LOG_INFO, "Brick order okay"); - ret = 0; - goto out; - -check_failed: - gf_log ("cli", GF_LOG_INFO, "Failed bad brick order check"); - answer = cli_cmd_get_confirmation(state, failed_question); - if (GF_ANSWER_YES == answer) - ret = 0; - goto out; - -found_bad_brick_order: - gf_log ("cli", GF_LOG_INFO, "Bad brick order found"); - answer = cli_cmd_get_confirmation (state, found_question); - if (GF_ANSWER_YES == answer) - ret = 0; -out: - ai_list_tmp2 = NULL; - i = 0; - GF_FREE (brick_list_dup); - list_for_each_entry (ai_list_tmp1, &ai_list->list, list) { - if (ai_list_tmp1->info) - freeaddrinfo (ai_list_tmp1->info); - free (ai_list_tmp2); - ai_list_tmp2 = ai_list_tmp1; - } - free (ai_list_tmp2); - return ret; -} - int cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, const char **words, int wordcount) @@ -369,61 +221,6 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, parse_error = 1; goto out; } - /*Check brick order if type is replicate*/ - ret = dict_get_int32 (options, "type", &type); - if (ret) { - gf_log ("cli", GF_LOG_ERROR, "Could not get brick type"); - goto out; - } - if ((type == GF_CLUSTER_TYPE_REPLICATE) || - (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) || - (type == GF_CLUSTER_TYPE_DISPERSE)) { - if ((ret = dict_get_str (options, "bricks", - &brick_list)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " - "not retrieve bricks " - "list"); - goto out; - } - if ((ret = dict_get_int32 (options, "count", - &brick_count)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Bricks check : Could " - "not retrieve brick " - "count"); - goto out; - } - - if (type != GF_CLUSTER_TYPE_DISPERSE) { - if ((ret = dict_get_int32 (options, "replica-count", - &sub_count)) != 0) { - gf_log ("cli", GF_LOG_ERROR, "Bricks check : " - "Could not retrieve " - "replica count"); - goto out; - } - gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found." - " Checking brick order."); - } else { - ret = dict_get_int32 (options, "disperse-count", - &sub_count); - if (ret) { - gf_log ("cli", GF_LOG_ERROR, "Bricks check : " - "Could not retrieve " - "disperse count"); - goto out; - } - gf_log ("cli", GF_LOG_INFO, "Disperse cluster type found. " - "Checking brick order."); - } - ret = cli_cmd_check_brick_order (state, brick_list, - brick_count, sub_count); - if (ret) { - gf_log("cli", GF_LOG_INFO, "Not creating volume " - "because of bad brick " - "order"); - goto out; - } - } ret = dict_get_str (options, "transport", &trans_type); if (ret) { diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h index 91d15b7e170..cf036928ddf 100644 --- a/cli/src/cli-cmd.h +++ b/cli/src/cli-cmd.h @@ -74,17 +74,6 @@ typedef struct cli_profile_info_ { double percentage_avg_latency; } cli_profile_info_t; -typedef struct addrinfo_list { - struct list_head list; - struct addrinfo *info; -} addrinfo_list_t; - -typedef enum { - GF_AI_COMPARE_NO_MATCH = 0, - GF_AI_COMPARE_MATCH = 1, - GF_AI_COMPARE_ERROR = 2 -} gf_ai_compare_t; - typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t; int cli_cmd_volume_register (struct cli_state *state); diff --git a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md index 455238048be..028cd30647a 100644 --- a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md +++ b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md @@ -155,9 +155,17 @@ high-availability and high-reliability are critical. auth.allow or auth.reject. > **Note**: - > Make sure you start your volumes before you try to mount them or + + > - Make sure you start your volumes before you try to mount them or > else client operations after the mount will hang. + > - GlusterFS will fail to create a replicate volume if more than one brick of a replica set is present on the same peer. For eg. four node replicated volume with a more that one brick of a replica set is present on the same peer. + > ``` + # gluster volume create <volname> replica 4 server1:/brick1 server1:/brick2 server2:/brick3 server4:/brick4 + volume create: <volname>: failed: Multiple bricks of a replicate volume are present on the same server. This setup is not optimal. Use 'force' at the end of the command if you want to override this behavior.``` + + > Use the `force` option at the end of command if you want to create the volume in this case. + ##Creating Striped Volumes Striped volumes stripes data across bricks in the volume. For best @@ -275,9 +283,17 @@ environments. auth.allow or auth.reject. > **Note**: - > Make sure you start your volumes before you try to mount them or + > - Make sure you start your volumes before you try to mount them or > else client operations after the mount will hang. + > - GlusterFS will fail to create a distribute replicate volume if more than one brick of a replica set is present on the same peer. For eg. four node distribute (replicated) volume with a more than one brick of a replica set is present on the same peer. + > ``` + # gluster volume create <volname> replica 2 server1:/brick1 server1:/brick2 server2:/brick3 server4:/brick4 + volume create: <volname>: failed: Multiple bricks of a replicate volume are present on the same server. This setup is not optimal. Use 'force' at the end of the command if you want to override this behavior.``` + + > Use the `force` option at the end of command if you want to create the volume in this case. + + ##Creating Distributed Striped Replicated Volumes Distributed striped replicated volumes distributes striped data across @@ -312,9 +328,16 @@ Map Reduce workloads. auth.allow or auth.reject. > **Note**: - > Make sure you start your volumes before you try to mount them or + > - Make sure you start your volumes before you try to mount them or > else client operations after the mount will hang. + > - GlusterFS will fail to create a distribute replicate volume if more than one brick of a replica set is present on the same peer. For eg. four node distribute (replicated) volume with a more than one brick of a replica set is present on the same peer. + > ``` + # gluster volume create <volname> stripe 2 replica 2 server1:/brick1 server1:/brick2 server2:/brick3 server4:/brick4 + volume create: <volname>: failed: Multiple bricks of a replicate volume are present on the same server. This setup is not optimal. Use 'force' at the end of the command if you want to override this behavior.``` + + > Use the `force` option at the end of command if you want to create the volume in this case. + ##Creating Striped Replicated Volumes Striped replicated volumes stripes data across replicated bricks in the @@ -356,9 +379,16 @@ of this volume type is supported only for Map Reduce workloads. auth.allow or auth.reject. > **Note**: - > Make sure you start your volumes before you try to mount them or + > - Make sure you start your volumes before you try to mount them or > else client operations after the mount will hang. + > - GlusterFS will fail to create a distribute replicate volume if more than one brick of a replica set is present on the same peer. For eg. four node distribute (replicated) volume with a more than one brick of replica set is present on the same peer. + > ``` + # gluster volume create <volname> stripe 2 replica 2 server1:/brick1 server1:/brick2 server2:/brick3 server4:/brick4 + volume create: <volname>: failed: Multiple bricks of a replicate volume are present on the same server. This setup is not optimal. Use 'force' at the end of the command if you want to override this behavior.``` + + > Use the `force` option at the end of command if you want to create the volume in this case. + ##Starting Volumes You must start your volumes before you try to mount them. diff --git a/tests/bugs/bug-1091935-brick-order-check-from-cli-to-glusterd.t b/tests/bugs/bug-1091935-brick-order-check-from-cli-to-glusterd.t new file mode 100755 index 00000000000..d5edabda806 --- /dev/null +++ b/tests/bugs/bug-1091935-brick-order-check-from-cli-to-glusterd.t @@ -0,0 +1,27 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +cli1=$(echo $CLI | sed 's/ --wignore//') + +# Creating volume with non resolvable host name +TEST ! $cli1 volume create $V0 replica 2 $H0:$B0/${V0}0 redhat:$B0/${V0}1 \ + $H0:$B0/${V0}2 redhat:$B0/${V0}3 + +# Creating distribute-replica volume with bad brick order. It will fail +# due to bad brick order. +TEST ! $cli1 volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ + $H0:$B0/${V0}2 $H0:$B0/${V0}3 + +# Now with force at the end of command it will bypass brick-order check +# for replicate or distribute-replicate volume. and it will create volume +TEST $cli1 volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ + $H0:$B0/${V0}2 $H0:$B0/${V0}3 force + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 334aef9f412..ac8407f077d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -28,12 +28,222 @@ #include "glusterd-messages.h" #include "run.h" +#include <stdint.h> +#include <sys/socket.h> +#include <netdb.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <stdlib.h> + #define glusterd_op_start_volume_args_get(dict, volname, flags) \ glusterd_op_stop_volume_args_get (dict, volname, flags) extern int _get_slave_status (dict_t *this, char *key, data_t *value, void *data); +gf_ai_compare_t +glusterd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next) +{ + int ret = -1; + struct addrinfo *tmp1 = NULL; + struct addrinfo *tmp2 = NULL; + char firstip[NI_MAXHOST] = {0.}; + char nextip[NI_MAXHOST] = {0,}; + + for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) { + ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip, + NI_MAXHOST, NULL, 0, NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) { + ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen, + nextip, NI_MAXHOST, NULL, 0, + NI_NUMERICHOST); + if (ret) + return GF_AI_COMPARE_ERROR; + if (!strcmp (firstip, nextip)) { + return GF_AI_COMPARE_MATCH; + } + } + } + return GF_AI_COMPARE_NO_MATCH; +} + +/* Check for non optimal brick order for replicate : + * Checks if bricks belonging to a replicate volume + * are present on the same server + */ +int32_t +glusterd_check_brick_order(dict_t *dict, char *err_str) +{ + int ret = -1; + int i = 0; + int j = 0; + int k = 0; + xlator_t *this = NULL; + addrinfo_list_t *ai_list = NULL; + addrinfo_list_t *ai_list_tmp1 = NULL; + addrinfo_list_t *ai_list_tmp2 = NULL; + char *brick = NULL; + char *brick_list = NULL; + char *brick_list_dup = NULL; + char *tmpptr = NULL; + char *volname = NULL; + int32_t brick_count = 0; + int32_t type = GF_CLUSTER_TYPE_NONE; + int32_t sub_count = 0; + struct addrinfo *ai_info = NULL; + + const char failed_string[2048] = "Failed to perform brick order " + "check. Use 'force' at the end of the command" + " if you want to override this behavior. "; + const char found_string[2048] = "Multiple bricks of a replicate " + "volume are present on the same server. This " + "setup is not optimal. Use 'force' at the " + "end of the command if you want to override " + "this behavior. "; + + this = THIS; + + GF_ASSERT(this); + + ai_list = malloc (sizeof (addrinfo_list_t)); + ai_list->info = NULL; + INIT_LIST_HEAD (&ai_list->list); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret) { + snprintf (err_str, 512, "Unable to get type of volume %s", + volname); + gf_log (this->name, GF_LOG_WARNING, "%s", err_str); + goto out; + } + + ret = dict_get_str (dict, "bricks", &brick_list); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could not " + "retrieve bricks list"); + goto out; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could not " + "retrieve brick count"); + goto out; + } + + if (type != GF_CLUSTER_TYPE_DISPERSE) { + ret = dict_get_int32 (dict, "replica-count", &sub_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could" + " not retrieve replica count"); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Replicate cluster type " + "found. Checking brick order."); + } else { + ret = dict_get_int32 (dict, "disperse-count", &sub_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Bricks check : Could" + " not retrieve disperse count"); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Disperse cluster type" + " found. Checking brick order."); + } + + brick_list_dup = gf_strdup(brick_list); + /* Resolve hostnames and get addrinfo */ + while (i < brick_count) { + ++i; + brick = strtok_r (brick_list_dup, " \n", &tmpptr); + brick_list = tmpptr; + if (brick == NULL) + goto check_failed; + brick = strtok_r (brick, ":", &tmpptr); + if (brick == NULL) + goto check_failed; + ret = getaddrinfo (brick, NULL, NULL, &ai_info); + if (ret != 0) { + ret = 0; + gf_log (this->name, GF_LOG_ERROR, "unable to resolve " + "host name"); + goto out; + } + ai_list_tmp1 = malloc (sizeof (addrinfo_list_t)); + if (ai_list_tmp1 == NULL) { + ret = 0; + gf_log (this->name, GF_LOG_ERROR, "failed to allocate " + "memory"); + goto out; + } + ai_list_tmp1->info = ai_info; + list_add_tail (&ai_list_tmp1->list, &ai_list->list); + ai_list_tmp1 = NULL; + } + + i = 0; + ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list); + + /* Check for bad brick order */ + while (i < brick_count) { + ++i; + ai_info = ai_list_tmp1->info; + ai_list_tmp1 = list_entry (ai_list_tmp1->list.next, + addrinfo_list_t, list); + if (0 == i % sub_count) { + j = 0; + continue; + } + ai_list_tmp2 = ai_list_tmp1; + k = j; + while (k < sub_count - 1) { + ++k; + ret = glusterd_compare_addrinfo (ai_info, + ai_list_tmp2->info); + if (GF_AI_COMPARE_ERROR == ret) + goto check_failed; + if (GF_AI_COMPARE_MATCH == ret) + goto found_bad_brick_order; + ai_list_tmp2 = list_entry (ai_list_tmp2->list.next, + addrinfo_list_t, list); + } + ++j; + } + gf_log (this->name, GF_LOG_DEBUG, "Brick order okay"); + ret = 0; + goto out; + +check_failed: + gf_log (this->name, GF_LOG_ERROR, "Failed bad brick order check"); + snprintf (err_str, sizeof (failed_string), failed_string); + ret = -1; + goto out; + +found_bad_brick_order: + gf_log (this->name, GF_LOG_INFO, "Bad brick order found"); + snprintf (err_str, sizeof (found_string), found_string); + ret = -1; +out: + ai_list_tmp2 = NULL; + GF_FREE (brick_list_dup); + list_for_each_entry (ai_list_tmp1, &ai_list->list, list) { + if (ai_list_tmp1->info) + freeaddrinfo (ai_list_tmp1->info); + free (ai_list_tmp2); + ai_list_tmp2 = ai_list_tmp1; + } + free (ai_list_tmp2); + return ret; +} + int __glusterd_handle_create_volume (rpcsvc_request_t *req) { @@ -724,11 +934,12 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr, char *bricks = NULL; char *brick_list = NULL; char *free_ptr = NULL; - char key[PATH_MAX] = ""; + char key[PATH_MAX] = ""; glusterd_brickinfo_t *brick_info = NULL; int32_t brick_count = 0; int32_t local_brick_count = 0; int32_t i = 0; + int32_t type = 0; char *brick = NULL; char *tmpptr = NULL; xlator_t *this = NULL; @@ -881,6 +1092,30 @@ glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr, brick_info = NULL; } + /*Check brick order if the volume type is replicate or disperse. If + * force at the end of command not given then check brick order. + */ + ret = dict_get_int32 (dict, "type", &type); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to get type of volume %s", + volname); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + goto out; + } + + if (!is_force) { + if ((type == GF_CLUSTER_TYPE_REPLICATE) || + (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) || + (type == GF_CLUSTER_TYPE_DISPERSE)) { + ret = glusterd_check_brick_order(dict, msg); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Not creating " + "volume because of bad brick order"); + goto out; + } + } + } + ret = dict_set_int32 (rsp_dict, "brick_count", local_brick_count); if (ret) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 45656bef8e1..846603585b1 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -465,6 +465,17 @@ enum glusterd_vol_comp_status_ { GLUSTERD_VOL_COMP_RJT, }; +typedef struct addrinfo_list { + struct list_head list; + struct addrinfo *info; +} addrinfo_list_t; + +typedef enum { + GF_AI_COMPARE_NO_MATCH = 0, + GF_AI_COMPARE_MATCH = 1, + GF_AI_COMPARE_ERROR = 2 +} gf_ai_compare_t; + #define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT #define GLUSTERD_INFO_FILE "glusterd.info" #define GLUSTERD_VOLUME_QUOTA_CONFIG "quota.conf" |