diff options
| -rw-r--r-- | cli/src/cli-cmd-parser.c | 145 | ||||
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 4 | ||||
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 15 | ||||
| -rw-r--r-- | doc/gluster.8 | 2 | ||||
| -rw-r--r-- | heal/src/glfs-heal.c | 3 | ||||
| -rw-r--r-- | tests/basic/glusterd/thin-arbiter-volume-probe.t | 25 | ||||
| -rw-r--r-- | tests/basic/glusterd/thin-arbiter-volume.t | 45 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 27 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 236 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 253 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 7 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 144 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 74 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 2 | 
16 files changed, 961 insertions, 31 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index a6ce49035d9..decdd10cb50 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -80,6 +80,95 @@ str_getunamb(const char *tok, char **opwords)  }  int32_t +cli_cmd_ta_brick_parse(const char **words, int wordcount, char **ta_brick) +{ +    char *host_name = NULL; +    char *tmp_host = NULL; +    char *delimiter = NULL; +    cli_brick_t *brick = NULL; +    int ret = 0; + +    GF_ASSERT(words); +    GF_ASSERT(wordcount); + +    if (validate_brick_name((char *)words[wordcount - 1])) { +        cli_err( +            "Wrong brick type: %s, use <HOSTNAME>:" +            "<export-dir-abs-path>", +            words[wordcount - 1]); +        ret = -1; +        goto out; +    } else { +        delimiter = strrchr(words[wordcount - 1], ':'); +        ret = gf_canonicalize_path(delimiter + 1); +        if (ret) +            goto out; +    } + +    tmp_host = gf_strdup((char *)words[wordcount - 1]); +    if (!tmp_host) { +        gf_log("cli", GF_LOG_ERROR, "Out of memory"); +        ret = -1; +        goto out; +    } +    get_host_name(tmp_host, &host_name); +    if (!host_name) { +        ret = -1; +        gf_log("cli", GF_LOG_ERROR, +               "Unable to retrieve " +               "hostname"); +        goto out; +    } + +    if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") && +          strncmp(host_name, "0.", 2))) { +        cli_err( +            "Please provide a valid hostname/ip other " +            "than localhost, 127.0.0.1 or loopback " +            "address (0.0.0.0 to 0.255.255.255)."); +        ret = -1; +        goto out; +    } +    if (!valid_internet_address(host_name, _gf_false, _gf_false)) { +        cli_err( +            "internet address '%s' does not conform to " +            "standards", +            host_name); +    } + +    brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node); +    if (brick == NULL) { +        ret = -1; +        gf_log("cli", GF_LOG_ERROR, "Out of memory"); +        goto out; +    } + +    brick->name = words[wordcount - 1]; +    brick->len = strlen(words[wordcount - 1]); +    *ta_brick = GF_MALLOC(brick->len + 3, gf_common_mt_char); +    if (*ta_brick == NULL) { +        ret = -1; +        gf_log("cli", GF_LOG_ERROR, "Out of memory"); +        goto out; +    } + +    strcat(*ta_brick, " "); +    strcat(*ta_brick, brick->name); +    strcat(*ta_brick, " "); +out: +    if (tmp_host) { +        GF_FREE(tmp_host); +        tmp_host = NULL; +    } +    if (brick) { +        GF_FREE(brick); +        brick = NULL; +    } + +    return ret; +} + +int32_t  cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,                       char **bricks, int *brick_count)  { @@ -476,14 +565,17 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,      char *trans_type = NULL;      int32_t index = 0;      char *bricks = NULL; +    char *ta_brick = NULL;      int32_t brick_count = 0; -    char *opwords[] = {"replica",    "stripe",        "transport", "disperse", -                       "redundancy", "disperse-data", "arbiter",   NULL}; +    char *opwords[] = {"replica",  "stripe",       "transport", +                       "disperse", "redundancy",   "disperse-data", +                       "arbiter",  "thin-arbiter", NULL};      char *w = NULL;      int op_count = 0;      int32_t replica_count = 1;      int32_t arbiter_count = 0; +    int32_t thin_arbiter_count = 0;      int32_t stripe_count = 1;      int32_t disperse_count = -1;      int32_t redundancy_count = -1; @@ -581,6 +673,25 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,                      if (ret)                          goto out;                      index += 2; +                } else if (!strcmp(words[index], "thin-arbiter")) { +                    ret = gf_string2int(words[index + 1], &thin_arbiter_count); +                    if ((ret == -1) || (thin_arbiter_count != 1)) { +                        cli_err( +                            "For thin-arbiter " +                            "configuration, " +                            "replica count must be" +                            " 2 and thin-arbiter count " +                            "must be 1. The 3rd " +                            "brick of the replica " +                            "will be the thin-arbiter brick"); +                        ret = -1; +                        goto out; +                    } +                    ret = dict_set_int32(dict, "thin-arbiter-count", +                                         thin_arbiter_count); +                    if (ret) +                        goto out; +                    index += 2;                  }              } @@ -589,7 +700,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,              if ((arbiter_count == 1) && (replica_count == 2))                  replica_count += arbiter_count; -            if (replica_count == 2) { +            if (replica_count == 2 && thin_arbiter_count == 0) {                  if (strcmp(words[wordcount - 1], "force")) {                      question =                          "Replica 2 volumes are prone" @@ -657,6 +768,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,                  "option.");              ret = -1;              goto out; +        } else if ((strcmp(w, "thin-arbiter") == 0)) { +            cli_err( +                "thin-arbiter option must be preceded by replica " +                "option."); +            ret = -1; +            goto out;          } else {              GF_ASSERT(!"opword mismatch");              ret = -1; @@ -680,7 +797,20 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,          wc = wordcount - 1;      } -    ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, &brick_count); +    // Exclude the thin-arbiter-brick i.e. last brick in the bricks list +    if (thin_arbiter_count == 1) { +        ret = cli_cmd_bricks_parse(words, wc - 1, brick_index, &bricks, +                                   &brick_count); +        if (ret) +            goto out; + +        ret = cli_cmd_ta_brick_parse(words, wc, &ta_brick); + +    } else { +        ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, +                                   &brick_count); +    } +      if (ret)          goto out; @@ -739,6 +869,12 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,      if (ret)          goto out; +    if (thin_arbiter_count == 1) { +        ret = dict_set_dynstr(dict, "ta-brick", ta_brick); +        if (ret) +            goto out; +    } +      ret = dict_set_int32(dict, "count", brick_count);      if (ret)          goto out; @@ -752,6 +888,7 @@ cli_cmd_volume_create_parse(struct cli_state *state, const char **words,  out:      if (ret) {          GF_FREE(bricks); +        GF_FREE(ta_brick);          gf_log("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");          if (dict)              dict_unref(dict); diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 713a047dd0b..f454b097aa7 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -2999,9 +2999,9 @@ struct cli_cmd volume_cmds[] = {       "list information of all volumes"},      {"volume create <NEW-VOLNAME> [stripe <COUNT>] " -     "[replica <COUNT> [arbiter <COUNT>]] " +     "[[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] "       "[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] " -     "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>" +     "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> <TA-BRICK>"       "... [force]",       cli_cmd_volume_create_cbk, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 16d5f73983f..35985ab44c6 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -723,10 +723,12 @@ gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,      int32_t redundancy_count = 0;      int32_t arbiter_count = 0;      int32_t snap_count = 0; +    int32_t thin_arbiter_count = 0;      int32_t vol_type = 0;      int32_t transport = 0;      char *volume_id_str = NULL;      char *volname = NULL; +    char *ta_brick = NULL;      dict_t *dict = NULL;      cli_local_t *local = NULL;      char key[1024] = {0}; @@ -903,6 +905,11 @@ xml_output:          if (ret)              goto out; +        snprintf(key, 256, "volume%d.thin_arbiter_count", i); +        ret = dict_get_int32(dict, key, &thin_arbiter_count); +        if (ret) +            goto out; +          // Distributed (stripe/replicate/stripe-replica) setups          vol_type = get_vol_type(type, dist_count, brick_count); @@ -929,6 +936,14 @@ xml_output:          if (ret)              goto out; +        if (thin_arbiter_count) { +            snprintf(key, 1024, "volume%d.thin_arbiter_brick", i); +            ret = dict_get_str(dict, key, &ta_brick); +            if (ret) +                goto out; +            cli_out("Thin-arbiter-path: %s", ta_brick); +        } +          snprintf(key, 256, "volume%d.opt_count", i);          ret = dict_get_int32(dict, key, &opt_count);          if (ret) diff --git a/doc/gluster.8 b/doc/gluster.8 index 4f36c13d45f..99a8d5e5048 100644 --- a/doc/gluster.8 +++ b/doc/gluster.8 @@ -41,7 +41,7 @@ List all volumes in cluster  \fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR  Display status of all or specified volume(s)/brick  .TP -\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR +\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR  Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).  To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.  .TP diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index ce6925a281b..3ebf79eee14 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -1144,7 +1144,8 @@ glfsh_gather_heal_info(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,      while (xl->next)          xl = xl->next;      while (xl) { -        if (strcmp(xl->type, "protocol/client") == 0) { +        if (strcmp(xl->type, "protocol/client") == 0 && +            !strstr(xl->name, "-ta-")) {              heal_xl = _get_ancestor(xl, heal_op);              if (heal_xl) {                  old_THIS = THIS; diff --git a/tests/basic/glusterd/thin-arbiter-volume-probe.t b/tests/basic/glusterd/thin-arbiter-volume-probe.t new file mode 100644 index 00000000000..acc6943806d --- /dev/null +++ b/tests/basic/glusterd/thin-arbiter-volume-probe.t @@ -0,0 +1,25 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../cluster.rc + +#This tests if the thin-arbiter-count is transferred to the other peer. +function check_peers { +        $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +cleanup; + +TEST launch_cluster 2; +TEST $CLI_1 peer probe $H2; + +EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers + +kill_glusterd 2 +$CLI_1 volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{1..3} +TEST $glusterd_2 +EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers +EXPECT "1 x 2 = 2" volinfo_field_1 $V0 "Number of Bricks" +EXPECT "1 x 2 = 2" volinfo_field_2 $V0 "Number of Bricks" + +cleanup; diff --git a/tests/basic/glusterd/thin-arbiter-volume.t b/tests/basic/glusterd/thin-arbiter-volume.t new file mode 100644 index 00000000000..4e813890a45 --- /dev/null +++ b/tests/basic/glusterd/thin-arbiter-volume.t @@ -0,0 +1,45 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../ volume.rc +. $(dirname $0)/../../thin-arbiter.rc + +#This command tests the volume create command validation for thin-arbiter volumes. + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3 +EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks" +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +TEST touch $M0/a.txt +TEST ls $B0/b1/a.txt  +TEST ls $B0/b2/a.txt  +TEST ! ls $B0/b3/a.txt + +TEST umount $M0 +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{4..8} +EXPECT "2 x 2 = 4" volinfo_field $V0 "Number of Bricks" + +TEST $CLI volume delete $V0 + +TEST rm -rf $B0/b{1..3} + +TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3 +EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks" + +TEST killall -15 glusterd +TEST glusterd +TEST pidof glusterd +EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks" + +cleanup + diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 576cae7b57a..a0bf409d872 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -357,6 +357,7 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,      };      int keylen;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      char *buf = NULL;      int i = 1;      dict_t *dict = NULL; @@ -368,6 +369,10 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,      xlator_t *this = NULL;      int32_t len = 0; +    char ta_brick[4096] = { +        0, +    }; +      GF_ASSERT(volinfo);      GF_ASSERT(volumes); @@ -431,6 +436,11 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,      if (ret)          goto out; +    keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count); +    ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count); +    if (ret) +        goto out; +      volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));      if (!volume_id_str)          goto out; @@ -481,6 +491,23 @@ glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,          i++;      } +    if (volinfo->thin_arbiter_count == 1) { +        ta_brickinfo = list_first_entry(&volinfo->ta_bricks, +                                        glusterd_brickinfo_t, brick_list); +        len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s", +                       ta_brickinfo->hostname, ta_brickinfo->path); +        if ((len < 0) || (len >= sizeof(ta_brick))) { +            ret = -1; +            goto out; +        } +        buf = gf_strdup(ta_brick); +        keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", +                          count); +        ret = dict_set_dynstrn(volumes, key, keylen, buf); +        if (ret) +            goto out; +    } +      ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);      if (ret)          goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 708b0977900..bc2ebd610de 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -239,7 +239,8 @@ out:  int32_t  glusterd_store_volinfo_brick_fname_write(int vol_fd,                                           glusterd_brickinfo_t *brickinfo, -                                         int32_t brick_count) +                                         int32_t brick_count, +                                         int is_thin_arbiter)  {      char key[64] = {          0, @@ -249,8 +250,13 @@ glusterd_store_volinfo_brick_fname_write(int vol_fd,      };      int32_t ret = -1; -    snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, -             brick_count); +    if (!is_thin_arbiter) { +        snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, +                 brick_count); +    } else { +        snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_TA_BRICK, +                 brick_count); +    }      glusterd_store_brickinfofname_set(brickinfo, brickfname,                                        sizeof(brickfname));      ret = gf_store_save_value(vol_fd, key, brickfname); @@ -471,14 +477,14 @@ glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo)          ret = -1;          goto out;      } -      ret = glusterd_store_brickinfo_write(fd, brickinfo);      if (ret)          goto out;  out: -    if (ret && (fd > 0)) +    if (ret && (fd > 0)) {          gf_store_unlink_tmppath(brickinfo->shandle); +    }      gf_msg_debug(THIS->name, 0, "Returning %d", ret);      return ret;  } @@ -526,15 +532,15 @@ out:  static int32_t  glusterd_store_brickinfo(glusterd_volinfo_t *volinfo,                           glusterd_brickinfo_t *brickinfo, int32_t brick_count, -                         int vol_fd) +                         int vol_fd, int is_thin_arbiter)  {      int32_t ret = -1;      GF_ASSERT(volinfo);      GF_ASSERT(brickinfo); -    ret = glusterd_store_volinfo_brick_fname_write(vol_fd, brickinfo, -                                                   brick_count); +    ret = glusterd_store_volinfo_brick_fname_write( +        vol_fd, brickinfo, brick_count, is_thin_arbiter);      if (ret)          goto out; @@ -961,6 +967,18 @@ glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)          total_len += ret;      } +    if ((conf->op_version >= GD_OP_VERSION_7_0) && +        volinfo->thin_arbiter_count) { +        ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n", +                       GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT, +                       volinfo->thin_arbiter_count); +        if (ret < 0 || ret >= sizeof(buf) - total_len) { +            ret = -1; +            goto out; +        } +        total_len += ret; +    } +      ret = gf_store_save_items(fd, buf);      if (ret)          goto out; @@ -1293,17 +1311,29 @@ glusterd_store_brickinfos(glusterd_volinfo_t *volinfo, int vol_fd)  {      int32_t ret = 0;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      int32_t brick_count = 0; +    int32_t ta_brick_count = 0;      GF_ASSERT(volinfo);      cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)      { -        ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd); +        ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd, +                                       0);          if (ret)              goto out;          brick_count++;      } +    if (volinfo->thin_arbiter_count == 1) { +        ta_brickinfo = list_first_entry(&volinfo->ta_bricks, +                                        glusterd_brickinfo_t, brick_list); +        ret = glusterd_store_brickinfo(volinfo, ta_brickinfo, ta_brick_count, +                                       vol_fd, 1); +        if (ret) +            goto out; +    } +  out:      gf_msg_debug(THIS->name, 0, "Returning %d", ret);      return ret; @@ -1480,6 +1510,7 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)  {      int ret = -1;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      GF_ASSERT(volinfo); @@ -1489,6 +1520,15 @@ glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)          if (ret)              goto out;      } + +    if (volinfo->thin_arbiter_count == 1) { +        ta_brickinfo = list_first_entry(&volinfo->ta_bricks, +                                        glusterd_brickinfo_t, brick_list); +        ret = gf_store_rename_tmppath(ta_brickinfo->shandle); +        if (ret) +            goto out; +    } +  out:      return ret;  } @@ -1643,6 +1683,7 @@ glusterd_store_volinfo(glusterd_volinfo_t *volinfo,  unlock:      pthread_mutex_unlock(&volinfo->store_volinfo_lock);      pthread_mutex_unlock(&ctx->cleanup_lock); +      if (ret)          glusterd_store_volume_cleanup_tmp(volinfo); @@ -2408,6 +2449,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)  {      int32_t ret = 0;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      gf_store_iter_t *iter = NULL;      char *key = NULL;      char *value = NULL; @@ -2419,6 +2461,7 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)      };      glusterd_conf_t *priv = NULL;      int32_t brick_count = 0; +    int32_t ta_brick_count = 0;      char tmpkey[4096] = {          0,      }; @@ -2428,6 +2471,10 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)      struct pmap_registry *pmap = NULL;      xlator_t *this = NULL;      int brickid = 0; +    /* ta_brick_id initialization with 2 since ta-brick id starts with +     * volname-ta-2 +     */ +    int ta_brick_id = 2;      gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;      int32_t len = 0; @@ -2721,6 +2768,175 @@ glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)          brick_count++;      } +    ret = gf_store_iter_new(volinfo->shandle, &tmpiter); + +    if (ret) +        goto out; + +    if (volinfo->thin_arbiter_count == 1) { +        while (ta_brick_count < volinfo->subvol_count) { +            ret = glusterd_brickinfo_new(&ta_brickinfo); +            if (ret) +                goto out; + +            snprintf(tmpkey, sizeof(tmpkey), "%s-%d", +                     GLUSTERD_STORE_KEY_VOL_TA_BRICK, 0); + +            ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue); + +            len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue); +            if ((len < 0) || (len >= sizeof(path))) { +                ret = -1; +                goto out; +            } + +            ret = gf_store_handle_retrieve(path, &ta_brickinfo->shandle); + +            if (ret) +                goto out; + +            ret = gf_store_iter_new(ta_brickinfo->shandle, &iter); + +            if (ret) +                goto out; + +            ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); +            if (ret) { +                gf_msg("glusterd", GF_LOG_ERROR, op_errno, +                       GD_MSG_STORE_ITER_GET_FAIL, +                       "Unable to iterate " +                       "the store for brick: %s", +                       path); +                goto out; +            } + +            while (!ret) { +                if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, +                             SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) { +                    if (snprintf(ta_brickinfo->hostname, +                                 sizeof(ta_brickinfo->hostname), "%s", +                                 value) >= sizeof(ta_brickinfo->hostname)) { +                        gf_msg("glusterd", GF_LOG_ERROR, op_errno, +                               GD_MSG_PARSE_BRICKINFO_FAIL, +                               "brick hostname truncated: %s", +                               ta_brickinfo->hostname); +                        goto out; +                    } +                } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH, +                                    SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) { +                    if (snprintf(ta_brickinfo->path, sizeof(ta_brickinfo->path), +                                 "%s", value) >= sizeof(ta_brickinfo->path)) { +                        gf_msg("glusterd", GF_LOG_ERROR, op_errno, +                               GD_MSG_PARSE_BRICKINFO_FAIL, +                               "brick path truncated: %s", ta_brickinfo->path); +                        goto out; +                    } +                } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH, +                                    SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) { +                    if (snprintf(ta_brickinfo->real_path, +                                 sizeof(ta_brickinfo->real_path), "%s", +                                 value) >= sizeof(ta_brickinfo->real_path)) { +                        gf_msg("glusterd", GF_LOG_ERROR, op_errno, +                               GD_MSG_PARSE_BRICKINFO_FAIL, +                               "real_path truncated: %s", +                               ta_brickinfo->real_path); +                        goto out; +                    } +                } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT, +                                    SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) { +                    ret = gf_string2int(value, &ta_brickinfo->port); +                    if (ret == -1) { +                        gf_msg(this->name, GF_LOG_ERROR, EINVAL, +                               GD_MSG_INCOMPATIBLE_VALUE, +                               "Failed to convert " +                               "string to integer"); +                    } + +                    if (ta_brickinfo->port < priv->base_port) { +                        /* This is required to adhere to the +                        IANA standards */ +                        ta_brickinfo->port = 0; +                    } +                } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, +                                    SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) { +                    ret = gf_string2int(value, &ta_brickinfo->rdma_port); +                    if (ret == -1) { +                        gf_msg(this->name, GF_LOG_ERROR, EINVAL, +                               GD_MSG_INCOMPATIBLE_VALUE, +                               "Failed to convert " +                               "string to integer"); +                    } + +                    if (ta_brickinfo->rdma_port < priv->base_port) { +                        /* This is required to adhere to the +                        IANA standards */ +                        ta_brickinfo->rdma_port = 0; +                    } +                } else if (!strncmp( +                               key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, +                               SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { +                    ret = gf_string2int(value, &ta_brickinfo->decommissioned); +                    if (ret == -1) { +                        gf_msg(this->name, GF_LOG_ERROR, EINVAL, +                               GD_MSG_INCOMPATIBLE_VALUE, +                               "Failed to convert " +                               "string to integer"); +                    } + +                } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) { +                    if (snprintf(ta_brickinfo->brick_id, +                                 sizeof(ta_brickinfo->brick_id), "%s", +                                 value) >= sizeof(ta_brickinfo->brick_id)) { +                        gf_msg("glusterd", GF_LOG_ERROR, op_errno, +                               GD_MSG_PARSE_BRICKINFO_FAIL, +                               "brick_id truncated: %s", +                               ta_brickinfo->brick_id); +                        goto out; +                    } +                } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID, +                                    SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) { +                    ret = gf_string2uint64(value, &ta_brickinfo->statfs_fsid); +                    if (ret) { +                        gf_msg(this->name, GF_LOG_ERROR, 0, +                               GD_MSG_INVALID_ENTRY, +                               "%s " +                               "is not a valid uint64_t value", +                               value); +                    } +                } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) { +                    gf_uuid_parse(value, brickinfo->uuid); +                } else if (!strncmp( +                               key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, +                               SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { +                    ret = gf_string2int(value, &ta_brickinfo->snap_status); +                    if (ret == -1) { +                        gf_msg(this->name, GF_LOG_ERROR, EINVAL, +                               GD_MSG_INCOMPATIBLE_VALUE, +                               "Failed to convert " +                               "string to integer"); +                    } + +                } else { +                    gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, +                           "Unknown key: %s", key); +                } + +                GF_FREE(key); +                GF_FREE(value); +                key = NULL; +                value = NULL; +                ret = gf_store_iter_get_next(iter, &key, &value, &op_errno); +            } + +            GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, +                                                    ta_brick_id); +            ta_brick_id += 3; + +            cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks); +            ta_brick_count++; +        } +    } +      assign_brick_groups(volinfo);      ret = 0; @@ -2967,6 +3183,8 @@ glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)              volinfo->replica_count = atoi(value);          } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) {              volinfo->arbiter_count = atoi(value); +        } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT)) { +            volinfo->thin_arbiter_count = atoi(value);          } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,                              SLEN(GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {              volinfo->disperse_count = atoi(value); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 59aee880487..45aba64ff8d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -42,7 +42,9 @@ typedef enum glusterd_store_ver_ac_ {  #define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"  #define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"  #define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count" +#define GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT "thin_arbiter_count"  #define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_TA_BRICK "ta-brick"  #define GLUSTERD_STORE_KEY_VOL_VERSION "version"  #define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"  #define GLUSTERD_STORE_KEY_VOL_ID "volume-id" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 4a8cadee0ab..87f924a6617 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -619,6 +619,7 @@ glusterd_volinfo_new(glusterd_volinfo_t **volinfo)      CDS_INIT_LIST_HEAD(&new_volinfo->vol_list);      CDS_INIT_LIST_HEAD(&new_volinfo->snapvol_list);      CDS_INIT_LIST_HEAD(&new_volinfo->bricks); +    CDS_INIT_LIST_HEAD(&new_volinfo->ta_bricks);      CDS_INIT_LIST_HEAD(&new_volinfo->snap_volumes);      new_volinfo->dict = dict_new(); @@ -1506,6 +1507,37 @@ out:  }  int32_t +glusterd_volume_ta_brickinfo_get(uuid_t uuid, char *hostname, char *path, +                                 glusterd_volinfo_t *volinfo, +                                 glusterd_brickinfo_t **ta_brickinfo) +{ +    glusterd_brickinfo_t *ta_brickiter = NULL; +    int32_t ret = -1; +    xlator_t *this = NULL; + +    this = THIS; + +    ret = -1; + +    cds_list_for_each_entry(ta_brickiter, &volinfo->ta_bricks, brick_list) +    { +        if (strcmp(ta_brickiter->path, path) == 0 && +            strcmp(ta_brickiter->hostname, hostname) == 0) { +            gf_msg_debug(this->name, 0, LOGSTR_FOUND_BRICK, +                         ta_brickiter->hostname, ta_brickiter->path, +                         volinfo->volname); +            ret = 0; +            if (ta_brickinfo) +                *ta_brickinfo = ta_brickiter; +            break; +        } +    } + +    gf_msg_debug(this->name, 0, "Returning %d", ret); +    return ret; +} + +int32_t  glusterd_volume_brickinfo_get_by_brick(char *brick, glusterd_volinfo_t *volinfo,                                         glusterd_brickinfo_t **brickinfo,                                         gf_boolean_t construct_real_path) @@ -2808,6 +2840,7 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,      char key[64] = "";      int keylen;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      int32_t i = 1;      char *volume_id_str = NULL;      char *str = NULL; @@ -2858,6 +2891,11 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,      if (ret)          goto out; +    keylen = snprintf(key, sizeof(key), "%s.subvol_count", pfx); +    ret = dict_set_int32n(dict, key, keylen, volinfo->subvol_count); +    if (ret) +        goto out; +      keylen = snprintf(key, sizeof(key), "%s.stripe_count", pfx);      ret = dict_set_int32n(dict, key, keylen, volinfo->stripe_count);      if (ret) @@ -2873,6 +2911,11 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,      if (ret)          goto out; +    keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", pfx); +    ret = dict_set_int32n(dict, key, keylen, volinfo->thin_arbiter_count); +    if (ret) +        goto out; +      keylen = snprintf(key, sizeof(key), "%s.disperse_count", pfx);      ret = dict_set_int32n(dict, key, keylen, volinfo->disperse_count);      if (ret) @@ -3035,6 +3078,44 @@ glusterd_add_volume_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,          i++;      } +    i = 1; +    if (volinfo->thin_arbiter_count == 1) { +        cds_list_for_each_entry(ta_brickinfo, &volinfo->ta_bricks, brick_list) +        { +            keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.hostname", pfx, +                              i); +            ret = dict_set_strn(dict, key, keylen, ta_brickinfo->hostname); +            if (ret) +                goto out; + +            keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.path", pfx, i); +            ret = dict_set_strn(dict, key, keylen, ta_brickinfo->path); +            if (ret) +                goto out; + +            keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.decommissioned", +                              pfx, i); +            ret = dict_set_int32n(dict, key, keylen, +                                  ta_brickinfo->decommissioned); +            if (ret) +                goto out; + +            keylen = snprintf(key, sizeof(key), "%s.ta-brick%d.brick_id", pfx, +                              i); +            ret = dict_set_strn(dict, key, keylen, ta_brickinfo->brick_id); +            if (ret) +                goto out; + +            snprintf(key, sizeof(key), "%s.ta-brick%d.uuid", pfx, i); +            ret = dict_set_dynstr_with_alloc(dict, key, +                                             uuid_utoa(ta_brickinfo->uuid)); +            if (ret) +                goto out; + +            i++; +        } +    } +      /* Add volume op-versions to dict. This prevents volume inconsistencies       * in the cluster       */ @@ -3723,6 +3804,100 @@ out:      return ret;  } +static int32_t +glusterd_import_new_ta_brick(dict_t *peer_data, int32_t vol_count, +                             int32_t brick_count, +                             glusterd_brickinfo_t **ta_brickinfo, char *prefix) +{ +    char key[128]; +    char key_prefix[64]; +    int keylen; +    int ret = -1; +    char *hostname = NULL; +    char *path = NULL; +    char *brick_id = NULL; +    int decommissioned = 0; +    glusterd_brickinfo_t *new_ta_brickinfo = NULL; +    char msg[256] = ""; +    char *brick_uuid_str = NULL; + +    GF_ASSERT(peer_data); +    GF_ASSERT(vol_count >= 0); +    GF_ASSERT(ta_brickinfo); +    GF_ASSERT(prefix); + +    ret = snprintf(key_prefix, sizeof(key_prefix), "%s%d.ta-brick%d", prefix, +                   vol_count, brick_count); + +    if (ret < 0 || ret >= sizeof(key_prefix)) { +        ret = -1; +        snprintf(msg, sizeof(msg), "key_prefix too long"); +        goto out; +    } + +    keylen = snprintf(key, sizeof(key), "%s.hostname", key_prefix); +    ret = dict_get_strn(peer_data, key, keylen, &hostname); +    if (ret) { +        snprintf(msg, sizeof(msg), "%s missing in payload", key); +        goto out; +    } + +    keylen = snprintf(key, sizeof(key), "%s.path", key_prefix); +    ret = dict_get_strn(peer_data, key, keylen, &path); +    if (ret) { +        snprintf(msg, sizeof(msg), "%s missing in payload", key); +        goto out; +    } + +    keylen = snprintf(key, sizeof(key), "%s.brick_id", key_prefix); +    ret = dict_get_strn(peer_data, key, keylen, &brick_id); + +    keylen = snprintf(key, sizeof(key), "%s.decommissioned", key_prefix); +    ret = dict_get_int32n(peer_data, key, keylen, &decommissioned); +    if (ret) { +        /* For backward compatibility */ +        ret = 0; +    } + +    ret = glusterd_brickinfo_new(&new_ta_brickinfo); +    if (ret) +        goto out; + +    ret = snprintf(new_ta_brickinfo->path, sizeof(new_ta_brickinfo->path), "%s", +                   path); +    if (ret < 0 || ret >= sizeof(new_ta_brickinfo->path)) { +        ret = -1; +        goto out; +    } +    ret = snprintf(new_ta_brickinfo->hostname, +                   sizeof(new_ta_brickinfo->hostname), "%s", hostname); +    if (ret < 0 || ret >= sizeof(new_ta_brickinfo->hostname)) { +        ret = -1; +        goto out; +    } +    new_ta_brickinfo->decommissioned = decommissioned; +    if (brick_id) +        (void)snprintf(new_ta_brickinfo->brick_id, +                       sizeof(new_ta_brickinfo->brick_id), "%s", brick_id); +    keylen = snprintf(key, sizeof(key), "%s.uuid", key_prefix); +    ret = dict_get_strn(peer_data, key, keylen, &brick_uuid_str); +    if (ret) +        goto out; +    gf_uuid_parse(brick_uuid_str, new_ta_brickinfo->uuid); + +    *ta_brickinfo = new_ta_brickinfo; + +out: +    if (msg[0]) { +        gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s", +               msg); +        gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;ta-brick=%s", +                 new_ta_brickinfo->hostname, new_ta_brickinfo->path); +    } +    gf_msg_debug("glusterd", 0, "Returning with %d", ret); +    return ret; +} +  /* The prefix represents the type of volume to be added.   * It will be "volume" for normal volumes, and snap# like   * snap1, snap2, for snapshot volumes @@ -3834,8 +4009,10 @@ glusterd_import_bricks(dict_t *peer_data, int32_t vol_count,  {      int ret = -1;      int brick_count = 1; +    int ta_brick_count = 1;      int brickid = 0;      glusterd_brickinfo_t *new_brickinfo = NULL; +    glusterd_brickinfo_t *new_ta_brickinfo = NULL;      GF_ASSERT(peer_data);      GF_ASSERT(vol_count >= 0); @@ -3854,6 +4031,19 @@ glusterd_import_bricks(dict_t *peer_data, int32_t vol_count,          cds_list_add_tail(&new_brickinfo->brick_list, &new_volinfo->bricks);          brick_count++;      } + +    if (new_volinfo->thin_arbiter_count == 1) { +        while (ta_brick_count <= new_volinfo->subvol_count) { +            ret = glusterd_import_new_ta_brick(peer_data, vol_count, +                                               ta_brick_count, +                                               &new_ta_brickinfo, prefix); +            if (ret) +                goto out; +            cds_list_add_tail(&new_ta_brickinfo->brick_list, +                              &new_volinfo->ta_bricks); +            ta_brick_count++; +        } +    }      ret = 0;  out:      gf_msg_debug("glusterd", 0, "Returning with %d", ret); @@ -4132,6 +4322,14 @@ glusterd_import_volinfo(dict_t *peer_data, int count,          goto out;      } +    keylen = snprintf(key, sizeof(key), "%s.subvol_count", key_prefix); +    ret = dict_get_int32n(peer_data, key, keylen, &new_volinfo->subvol_count); +    if (ret) { +        snprintf(msg, sizeof(msg), "%s missing in payload for %s", key, +                 volname); +        goto out; +    } +      /* not having a 'stripe_count' key is not a error         (as peer may be of old version) */      keylen = snprintf(key, sizeof(key), "%s.stripe_count", key_prefix); @@ -4156,6 +4354,15 @@ glusterd_import_volinfo(dict_t *peer_data, int count,          gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,                 "peer is possibly old version"); +    /* not having a 'thin_arbiter_count' key is not a error +       (as peer may be of old version) */ +    keylen = snprintf(key, sizeof(key), "%s.thin_arbiter_count", key_prefix); +    ret = dict_get_int32n(peer_data, key, keylen, +                          &new_volinfo->thin_arbiter_count); +    if (ret) +        gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED, +               "peer is possibly old version"); +      /* not having a 'disperse_count' key is not a error         (as peer may be of old version) */      keylen = snprintf(key, sizeof(key), "%s.disperse_count", key_prefix); @@ -4369,6 +4576,8 @@ glusterd_volinfo_copy_brickinfo(glusterd_volinfo_t *old_volinfo,  {      glusterd_brickinfo_t *new_brickinfo = NULL;      glusterd_brickinfo_t *old_brickinfo = NULL; +    glusterd_brickinfo_t *new_ta_brickinfo = NULL; +    glusterd_brickinfo_t *old_ta_brickinfo = NULL;      glusterd_conf_t *priv = NULL;      int ret = 0;      xlator_t *this = NULL; @@ -4417,6 +4626,46 @@ glusterd_volinfo_copy_brickinfo(glusterd_volinfo_t *old_volinfo,              }          }      } +    if (new_volinfo->thin_arbiter_count == 1) { +        cds_list_for_each_entry(new_ta_brickinfo, &new_volinfo->ta_bricks, +                                brick_list) +        { +            ret = glusterd_volume_ta_brickinfo_get( +                new_ta_brickinfo->uuid, new_ta_brickinfo->hostname, +                new_ta_brickinfo->path, old_volinfo, &old_ta_brickinfo); +            if (ret == 0) { +                new_ta_brickinfo->port = old_ta_brickinfo->port; + +                if (old_ta_brickinfo->real_path[0] == '\0') { +                    if (!realpath(new_ta_brickinfo->path, abspath)) { +                        /* Here an ENOENT should also be a +                         * failure as the brick is expected to +                         * be in existence +                         */ +                        gf_msg(this->name, GF_LOG_CRITICAL, errno, +                               GD_MSG_BRICKINFO_CREATE_FAIL, +                               "realpath () failed for brick " +                               "%s. The underlying filesystem " +                               "may be in bad state", +                               new_brickinfo->path); +                        ret = -1; +                        goto out; +                    } +                    if (strlen(abspath) >= +                        sizeof(new_ta_brickinfo->real_path)) { +                        ret = -1; +                        goto out; +                    } +                    (void)strncpy(new_ta_brickinfo->real_path, abspath, +                                  sizeof(new_ta_brickinfo->real_path)); +                } else { +                    (void)strncpy(new_ta_brickinfo->real_path, +                                  old_ta_brickinfo->real_path, +                                  sizeof(new_ta_brickinfo->real_path)); +                } +            } +        } +    }      ret = 0;  out: @@ -4585,8 +4834,8 @@ gd_check_and_update_rebalance_info(glusterd_volinfo_t *old_volinfo,      new->rebalance_time = old->rebalance_time;      /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume -     * import -     * a new defrag object should come to life with rebalance being restarted +     * import a new defrag object should come to life with rebalance being +     * restarted       */  out:      return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index c506da32950..2312d426051 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -32,6 +32,13 @@                  brickid);                                                      \      } while (0) +#define GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo,         \ +                                                brickid)                       \ +    do {                                                                       \ +        sprintf(ta_brickinfo->brick_id, "%s-ta-%d", volinfo->volname,          \ +                brickid);                                                      \ +    } while (0) +  #define ALL_VOLUME_OPTION_CHECK(volname, get_opt, key, ret, op_errstr, label)  \      do {                                                                       \          gf_boolean_t _all = !strcmp("all", volname);                           \ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 479ae779f66..8243548f881 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -566,7 +566,13 @@ no_filter_option_handler(volgen_graph_t *graph, struct volopt_map_entry *vme,      for (trav = first_of(graph); trav; trav = trav->next) {          if (strcmp(trav->type, vme->voltype) != 0)              continue; - +        if (strcmp(vme->option, "ta-remote-port") == 0) { +            if (strstr(trav->name, "-ta-") != NULL) { +                ret = xlator_set_option(trav, "remote-port", +                                        strlen(vme->option), vme->value); +            } +            continue; +        }          ret = xlator_set_option(trav, vme->option, strlen(vme->option),                                  vme->value);          if (ret) @@ -3185,7 +3191,10 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          0,      };      glusterd_brickinfo_t *brick = NULL; +    glusterd_brickinfo_t *ta_brick = NULL;      xlator_t *xl = NULL; +    int subvol_index = 0; +    int thin_arbiter_index = 0;      if (volinfo->brick_count == 0) {          gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY, @@ -3212,6 +3221,30 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,      i = 0;      cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)      { +        /* insert ta client xlator entry. +         * eg - If subvol count is > 1, then after every two client xlator +         * entries there should be a ta client xlator entry in the volfile. ta +         * client xlator indexes are - 2, 5, 8 etc depending on the index of +         * subvol. +         */ +        if (volinfo->thin_arbiter_count && +            (i + 1) % (volinfo->replica_count + 1) == 0) { +            thin_arbiter_index = 0; +            cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) +            { +                if (thin_arbiter_index == subvol_index) { +                    xl = volgen_graph_build_client( +                        graph, volinfo, ta_brick->hostname, NULL, +                        ta_brick->path, ta_brick->brick_id, transt, set_dict); +                    if (!xl) { +                        ret = -1; +                        goto out; +                    } +                } +                thin_arbiter_index++; +            } +            subvol_index++; +        }          xl = volgen_graph_build_client(graph, volinfo, brick->hostname, NULL,                                         brick->path, brick->brick_id, transt,                                         set_dict); @@ -3223,6 +3256,28 @@ volgen_graph_build_clients(volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          i++;      } +    /* Add ta client xlator entry for last subvol +     * Above loop will miss out on making the ta client +     * xlator entry for the last subvolume in the volfile +     */ +    if (volinfo->thin_arbiter_count) { +        thin_arbiter_index = 0; +        cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) +        { +            if (thin_arbiter_index == subvol_index) { +                xl = volgen_graph_build_client( +                    graph, volinfo, ta_brick->hostname, NULL, ta_brick->path, +                    ta_brick->brick_id, transt, set_dict); +                if (!xl) { +                    ret = -1; +                    goto out; +                } +            } + +            thin_arbiter_index++; +        } +    } +      if (i != volinfo->brick_count) {          gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLUME_INCONSISTENCY,                 "volume inconsistency: actual number of bricks (%d) " @@ -3599,12 +3654,15 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,      xlator_t *this = NULL;      glusterd_conf_t *conf = NULL;      glusterd_brickinfo_t *brick = NULL; +    glusterd_brickinfo_t *ta_brick = NULL;      char *ptr = NULL;      int i = 0;      int index = -1;      int ret = 0;      char *afr_xattrs_list = NULL;      int list_size = -1; +    int ta_brick_index = 0; +    int subvol_index = 0;      this = THIS;      GF_VALIDATE_OR_GOTO("glusterd", this, out); @@ -3643,6 +3701,26 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,              break;          strncat(ptr, brick->brick_id, strlen(brick->brick_id));          if (i == volinfo->replica_count) { +            /* add ta client xlator in afr-pending-xattrs before making entries +             * for client xlators in volfile. +             * ta client xlator indexes are - 2, 5, 8 depending on the index of +             * subvol. e.g- For first subvol ta client xlator id is volname-ta-2 +             */ +            ta_brick_index = 0; +            if (volinfo->thin_arbiter_count == 1) { +                ptr[strlen(brick->brick_id)] = ','; +                cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, +                                        brick_list) +                { +                    if (ta_brick_index == subvol_index) { +                        break; +                    } +                    ta_brick_index++; +                } + +                strncat(ptr, ta_brick->brick_id, strlen(ta_brick->brick_id)); +            } +              ret = xlator_set_fixed_option(afr_xlators_list[index++],                                            "afr-pending-xattr", afr_xattrs_list);              if (ret) @@ -3650,6 +3728,7 @@ set_afr_pending_xattrs_option(volgen_graph_t *graph,              memset(afr_xattrs_list, 0, list_size);              ptr = afr_xattrs_list;              i = 1; +            subvol_index++;              continue;          }          ptr[strlen(brick->brick_id)] = ','; @@ -3674,6 +3753,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,      char *replicate_name = "%s-replicate-%d";      xlator_t *afr = NULL;      char option[32] = {0}; +    glusterd_brickinfo_t *ta_brick = NULL; +    int ta_brick_index = 0; +    int ta_replica_offset = 0; +    int ta_brick_offset = 0; +    char ta_option[4096] = { +        0, +    };      if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) {          replicate_type = "experimental/jbrc"; @@ -3681,9 +3767,20 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,          replicate_type = "cluster/replicate";      } +    /* In thin-arbiter case brick count and replica count remain same +     * but due to additional entries of ta client xlators in the volfile, +     * GD1 is manipulated to include these client xlators while linking them to +     * afr/cluster entry in the volfile. +     */ +    if (volinfo->thin_arbiter_count == 1) { +        ta_replica_offset = 1; +        ta_brick_offset = volinfo->subvol_count; +    } +      clusters = volgen_link_bricks_from_list_tail( -        graph, volinfo, replicate_type, replicate_name, volinfo->brick_count, -        volinfo->replica_count); +        graph, volinfo, replicate_type, replicate_name, +        volinfo->brick_count + ta_brick_offset, +        volinfo->replica_count + ta_replica_offset);      if (clusters < 0)          goto out; @@ -3693,18 +3790,43 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph,          clusters = -1;          goto out;      } -    if (!volinfo->arbiter_count) +    if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count)          goto out;      afr = first_of(graph); -    sprintf(option, "%d", volinfo->arbiter_count); -    for (i = 0; i < clusters; i++) { -        ret = xlator_set_fixed_option(afr, "arbiter-count", option); -        if (ret) { -            clusters = -1; -            goto out; + +    if (volinfo->arbiter_count) { +        sprintf(option, "%d", volinfo->arbiter_count); +        for (i = 0; i < clusters; i++) { +            ret = xlator_set_fixed_option(afr, "arbiter-count", option); +            if (ret) { +                clusters = -1; +                goto out; +            } + +            afr = afr->next; +        } +    } + +    if (volinfo->thin_arbiter_count == 1) { +        for (i = 0; i < clusters; i++) { +            ta_brick_index = 0; +            cds_list_for_each_entry(ta_brick, &volinfo->ta_bricks, brick_list) +            { +                if (ta_brick_index == i) { +                    break; +                } +                ta_brick_index++; +            } +            snprintf(ta_option, sizeof(ta_option), "%s:%s", ta_brick->hostname, +                     ta_brick->path); +            ret = xlator_set_fixed_option(afr, "thin-arbiter", ta_option); +            if (ret) { +                clusters = -1; +                goto out; +            } +            afr = afr->next;          } -        afr = afr->next;      }  out:      return clusters; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 7eb74d7b4fd..4624fe1c8d0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -276,6 +276,7 @@ __glusterd_handle_create_volume(rpcsvc_request_t *req)      char *bricks = NULL;      char *volname = NULL;      int brick_count = 0; +    int thin_arbiter_count = 0;      void *cli_rsp = NULL;      char err_str[2048] = {          0, @@ -435,6 +436,21 @@ __glusterd_handle_create_volume(rpcsvc_request_t *req)          goto out;      } +    ret = dict_get_int32n(dict, "thin-arbiter-count", +                          SLEN("thin-arbiter-count"), &thin_arbiter_count); +    if (thin_arbiter_count && conf->op_version < GD_OP_VERSION_7_0) { +        snprintf(err_str, sizeof(err_str), +                 "Cannot execute command. " +                 "The cluster is operating at version %d. " +                 "Thin-arbiter volume creation is unavailable in " +                 "this version", +                 conf->op_version); +        gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, "%s", +               err_str); +        ret = -1; +        goto out; +    } +      if (!dict_getn(dict, "force", SLEN("force"))) {          gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,                 "Failed to get 'force' flag"); @@ -2028,14 +2044,20 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)      glusterd_volinfo_t *volinfo = NULL;      gf_boolean_t vol_added = _gf_false;      glusterd_brickinfo_t *brickinfo = NULL; +    glusterd_brickinfo_t *ta_brickinfo = NULL;      xlator_t *this = NULL;      char *brick = NULL; +    char *ta_brick = NULL;      int32_t count = 0;      int32_t i = 1;      char *bricks = NULL; +    char *ta_bricks = NULL;      char *brick_list = NULL; +    char *ta_brick_list = NULL;      char *free_ptr = NULL; +    char *ta_free_ptr = NULL;      char *saveptr = NULL; +    char *ta_saveptr = NULL;      char *trans_type = NULL;      char *str = NULL;      char *username = NULL; @@ -2153,6 +2175,20 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)          /* coverity[unused_value] arbiter count is optional */          ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),                                &volinfo->arbiter_count); +        ret = dict_get_int32n(dict, "thin-arbiter-count", +                              SLEN("thin-arbiter-count"), +                              &volinfo->thin_arbiter_count); +        if (volinfo->thin_arbiter_count) { +            ret = dict_get_strn(dict, "ta-brick", SLEN("ta-brick"), &ta_bricks); +            if (ret) { +                gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, +                       "Unable to get thin arbiter brick for " +                       "volume %s", +                       volname); +                goto out; +            } +        } +      } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {          ret = dict_get_int32n(dict, "disperse-count", SLEN("disperse-count"),                                &volinfo->disperse_count); @@ -2241,6 +2277,38 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)          volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA;      } +    if (ta_bricks) { +        ta_brick_list = gf_strdup(ta_bricks); +        ta_free_ptr = ta_brick_list; +    } + +    if (volinfo->thin_arbiter_count) { +        ta_brick = strtok_r(ta_brick_list + 1, " \n", &ta_saveptr); + +        count = 1; +        brickid = volinfo->replica_count; +        /* assign brickid to ta_bricks +         * Following loop runs for number of subvols times. Although +         * there is only one ta-brick for a volume but the volume fuse volfile +         * requires an entry of ta-brick for each subvolume. Also, the ta-brick +         * id needs to be adjusted according to the subvol count. +         * For eg- For first subvolume ta-brick id is volname-ta-2, for second +         * subvol ta-brick id is volname-ta-5. +         */ +        while (count <= volinfo->subvol_count) { +            ret = glusterd_brickinfo_new_from_brick(ta_brick, &ta_brickinfo, +                                                    _gf_false, op_errstr); +            if (ret) +                goto out; + +            GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo, +                                                    brickid); +            cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks); +            count++; +            brickid += volinfo->replica_count + 1; +        } +    } +      if (bricks) {          brick_list = gf_strdup(bricks);          free_ptr = brick_list; @@ -2259,7 +2327,10 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)                                                  op_errstr);          if (ret)              goto out; - +        if (volinfo->thin_arbiter_count == 1 && +            (brickid + 1) % (volinfo->replica_count + 1) == 0) { +            brickid = brickid + 1; +        }          GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++);          ret = glusterd_resolve_brick(brickinfo); @@ -2350,6 +2421,7 @@ glusterd_op_create_volume(dict_t *dict, char **op_errstr)  out:      GF_FREE(free_ptr); +    GF_FREE(ta_free_ptr);      if (!vol_added && volinfo)          glusterd_volinfo_unref(volinfo);      return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 5622afbe47d..11ec4500453 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1503,6 +1503,14 @@ struct volopt_map_entry glusterd_volopt_map[] = {       .value = "9",       .flags = VOLOPT_FLAG_CLIENT_OPT}, +    /* Although the following option is named ta-remote-port but it will be +     * added as remote-port in client volfile for ta-bricks only. +     */ +    {.key = "client.ta-brick-port", +     .voltype = "protocol/client", +     .option = "ta-remote-port", +     .op_version = GD_OP_VERSION_7_0}, +      /* Server xlator options */      {.key = "network.tcp-window-size",       .voltype = "protocol/server", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index cf8d0395aa5..c34e037786f 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -440,6 +440,7 @@ struct glusterd_volinfo_ {      /* This is a current pointer for         glusterd_volinfo_t->snap_volumes */      struct cds_list_head bricks; +    struct cds_list_head ta_bricks;      struct cds_list_head snap_volumes;      /* TODO : Need to remove this, as this       * is already part of snapshot object. @@ -449,6 +450,7 @@ struct glusterd_volinfo_ {      int stripe_count;      int replica_count;      int arbiter_count; +    int thin_arbiter_count;      int disperse_count;      int redundancy_count;      int subvol_count;    /* Number of subvolumes in a  | 
