diff options
| author | Dan Lambright <dlambrig@redhat.com> | 2015-03-30 14:27:44 -0400 | 
|---|---|---|
| committer | Kaleb KEITHLEY <kkeithle@redhat.com> | 2015-04-08 07:28:21 +0000 | 
| commit | a8260044291cb6eee44974d8c52caa9f4cfb3993 (patch) | |
| tree | 3b9b0f83bd6ae869a26a75dafdc988196a62d049 | |
| parent | f0cd1d73c63001740cd7691a77df7631c9b8e8dc (diff) | |
glusterd: Support distributed replicated volumes on hot tier
We did not set up the graph properly for hot tiers with replicated
subvolumes. Also add check that the file has not already been moved
by another replicated brick on the same node.
Change-Id: I9adef565ab60f6774810962d912168b77a6032fa
BUG: 1206517
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/10054
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
| -rw-r--r-- | cli/src/cli-cmd-parser.c | 5 | ||||
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 7 | ||||
| -rw-r--r-- | cli/src/cli.h | 2 | ||||
| -rwxr-xr-x | tests/basic/tier/tier.t | 22 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.c | 39 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/tier.h | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 7 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 49 | 
8 files changed, 97 insertions, 35 deletions
| diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 7c03ae228fd..a334fd931bf 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -1441,7 +1441,7 @@ out:  int32_t  cli_cmd_volume_add_brick_parse (const char **words, int wordcount, -                                dict_t **options) +                                dict_t **options, int *ret_type)  {          dict_t  *dict = NULL;          char    *volname = NULL; @@ -1559,6 +1559,9 @@ parse_bricks:          *options = dict;  out: +        if (ret_type) +                *ret_type = type; +          if (ret) {                  gf_log ("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");                  if (dict) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 5436c76bcc2..9c61c3f541f 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -787,7 +787,7 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,          if (!frame)                  goto out; -        ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options); +        ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, 0);          if (ret) {                  cli_usage_out (word->pattern);                  parse_error = 1; @@ -847,12 +847,13 @@ cli_cmd_volume_attach_tier_cbk (struct cli_state *state,          int                     parse_error = 0;          gf_answer_t             answer = GF_ANSWER_NO;          cli_local_t             *local = NULL; +        int                     type = 0;          frame = create_frame (THIS, THIS->ctx->pool);          if (!frame)                  goto out; -        ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options); +        ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options, &type);          if (ret) {                  cli_usage_out (word->pattern);                  parse_error = 1; @@ -872,7 +873,7 @@ cli_cmd_volume_attach_tier_cbk (struct cli_state *state,          if (ret)                  goto out; -        ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER); +        ret = dict_set_int32 (options, "type", type);          if (ret)                  goto out; diff --git a/cli/src/cli.h b/cli/src/cli.h index ed2bc4aba8a..60bbcb9f620 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -249,7 +249,7 @@ cli_cmd_ganesha_parse (struct cli_state *state, const char **words,  int32_t  cli_cmd_volume_add_brick_parse (const char **words, int wordcount, -                                dict_t **options); +                                dict_t **options, int *type);  int32_t  cli_cmd_volume_remove_brick_parse (const char **words, int wordcount, diff --git a/tests/basic/tier/tier.t b/tests/basic/tier/tier.t index 58846412358..d1e1041f87c 100755 --- a/tests/basic/tier/tier.t +++ b/tests/basic/tier/tier.t @@ -3,6 +3,12 @@  . $(dirname $0)/../../include.rc  . $(dirname $0)/../../volume.rc +LAST_BRICK=3 +CACHE_BRICK_FIRST=4 +CACHE_BRICK_LAST=5 +DEMOTE_TIMEOUT=12 +PROMOTE_TIMEOUT=5 +  function file_on_slow_tier {      s=$(md5sum $1)      for i in `seq 0 $LAST_BRICK`; do @@ -19,8 +25,9 @@ function file_on_fast_tier {      local ret="1"      s1=$(md5sum $1) -    s2=$(md5sum $B0/${V0}${CACHE_BRICK}/$1) -    if [ -e $B0/${V0}${CACHE_BRICK}/$1 ] && ! [ "$s1" == "$s2" ]; then +    s2=$(md5sum $B0/${V0}${CACHE_BRICK_FIRST}/$1) + +    if [ -e $B0/${V0}${CACHE_BRICK_FIRST}/$1 ] && ! [ "$s1" == "$s2" ]; then          echo "0"      else          echo "1" @@ -45,17 +52,14 @@ function confirm_vol_stopped {      fi  } -LAST_BRICK=1 -CACHE_BRICK=2 -DEMOTE_TIMEOUT=12 -PROMOTE_TIMEOUT=5  cleanup  TEST glusterd  TEST pidof glusterd -TEST $CLI volume create $V0 $H0:$B0/${V0}{0..$LAST_BRICK} -TEST $CLI volume attach-tier $V0 $H0:$B0/${V0}${CACHE_BRICK} + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} +TEST $CLI volume attach-tier $V0 replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST  TEST $CLI volume start $V0  TEST $CLI volume set $V0 features.ctr-enabled on  TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; @@ -120,7 +124,7 @@ TEST $CLI volume detach-tier $V0  # temporarily comment out  #TEST ! [ -e $M0/d1/data.txt ] -EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK} +EXPECT "0" confirm_tier_removed ${V0}${CACHE_BRICK_FIRST}  EXPECT_WITHIN $REBALANCE_TIMEOUT "0" confirm_vol_stopped $V0 diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index f8b32f1619f..fe18d7b91a9 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -142,12 +142,17 @@ tier_migrate_using_query_file (void *_args)          int total_status                        = 0;          FILE *queryFILE                         = NULL;          char *link_str                          = NULL; +        xlator_t *src_subvol                    = NULL; +        dht_conf_t   *conf                      = NULL;          GF_VALIDATE_OR_GOTO ("tier", query_cbk_args, out);          GF_VALIDATE_OR_GOTO ("tier", query_cbk_args->this, out);          this = query_cbk_args->this;          GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->defrag, out);          GF_VALIDATE_OR_GOTO (this->name, query_cbk_args->queryFILE, out); +        GF_VALIDATE_OR_GOTO (this->name, this->private, out); + +        conf = this->private;          defrag = query_cbk_args->defrag; @@ -300,8 +305,29 @@ tier_migrate_using_query_file (void *_args)                          inode_unref (loc.inode);                          loc.inode = linked_inode; +                        /* +                         * Do not promote/demote if file already is where it +                         * should be. This shall become a skipped count. +                         */ +                        src_subvol = dht_subvol_get_cached(this, loc.inode); + +                        if (query_cbk_args->is_promotion && +                             src_subvol == conf->subvolumes[1]) { +                                per_link_status = -1; +                                goto error; +                        } + +                        if (!query_cbk_args->is_promotion && +                            src_subvol == conf->subvolumes[0]) { +                                per_link_status = -1; +                                goto error; +                        } +                          gf_msg (this->name, GF_LOG_INFO, 0, -                                DHT_MSG_LOG_TIER_STATUS, "Tier migrate file %s", +                                DHT_MSG_LOG_TIER_STATUS, "Tier %d" +                                " src_subvol %s file %s", +                                query_cbk_args->is_promotion, +                                src_subvol->name,                                  loc.name);                          if (tier_check_same_node (this, &loc, defrag)) { @@ -587,6 +613,7 @@ tier_demote (void *args)          query_cbk_args.this = demotion_args->this;          query_cbk_args.defrag = demotion_args->defrag; +        query_cbk_args.is_promotion = 0;          /*Build the query file using bricklist*/          ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, @@ -622,6 +649,7 @@ static void          query_cbk_args.this = promotion_args->this;          query_cbk_args.defrag = promotion_args->defrag; +        query_cbk_args.is_promotion = 1;          /*Build the query file using bricklist*/          ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, @@ -641,7 +669,7 @@ out:          return NULL;  } -static void +static int  tier_get_bricklist (xlator_t *xl, dict_t *bricklist)  {          xlator_list_t  *child = NULL; @@ -690,14 +718,17 @@ tier_get_bricklist (xlator_t *xl, dict_t *bricklist)                          if (dict_add_dynstr_with_alloc(bricklist, "brick",                                                         db_path))                                  goto out; + +                        ret = 0; +                        goto out;                  }          }          for (child = xl->children; child; child = child->next) { -                tier_get_bricklist(child->xlator, bricklist); +                ret = tier_get_bricklist(child->xlator, bricklist);          }  out: -        return; +        return ret;  }  int diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h index 6dc830dabfa..7d4ea6b7ebe 100644 --- a/xlators/cluster/dht/src/tier.h +++ b/xlators/cluster/dht/src/tier.h @@ -50,6 +50,7 @@ typedef struct _query_cbk_args {          xlator_t *this;          gf_defrag_info_t *defrag;          FILE *queryFILE; +        int is_promotion;  } query_cbk_args_t;  int diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index b32dbb89134..165622f87ef 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -465,6 +465,9 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)                          ret = -1;                          goto out;                  } + +                ret = dict_get_int32 (dict, "type", &type); +                  goto brick_val;          } @@ -1835,6 +1838,7 @@ glusterd_op_perform_attach_tier (dict_t *dict,  {          int                                     ret = 0;          int                                     replica_count = 0; +        int                                     type = 0;          /*           * Store the new (cold) tier's structure until the graph is generated. @@ -1853,6 +1857,9 @@ glusterd_op_perform_attach_tier (dict_t *dict,          else                  volinfo->tier_info.hot_replica_count  = 1;          volinfo->tier_info.hot_brick_count     = count; +        ret = dict_get_int32 (dict, "type", &type); +        volinfo->tier_info.hot_type      = type; +        ret = dict_set_int32 (dict, "type", GF_CLUSTER_TYPE_TIER);          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 8a61da2e8ec..bbac2a2283b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3107,11 +3107,13 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,          /* All other cases, it will have one or the other cluster type */          switch (volinfo->type) {          case GF_CLUSTER_TYPE_REPLICATE: -                clusters = volgen_link_bricks_from_list_tail (graph, volinfo, -                                                        replicate_args[0], -                                                        replicate_args[1], -                                                        volinfo->brick_count, -                                                        volinfo->replica_count); +                clusters = volgen_link_bricks_from_list_tail +                        (graph, volinfo, +                         replicate_args[0], +                         replicate_args[1], +                         volinfo->brick_count, +                         volinfo->replica_count); +                  if (clusters < 0)                          goto out;                  break; @@ -3284,12 +3286,12 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,          volinfo->type           = volinfo->tier_info.cold_type;          sprintf (volinfo->volname, "%s-cold", st_volname); -        ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); +        ret = volume_volgen_graph_build_clusters (graph, volinfo, is_quotad);          if (ret)                  goto out;          cxl = first_of(graph); -        volinfo->type           = GF_CLUSTER_TYPE_TIER; +        volinfo->type           = volinfo->tier_info.hot_type;          volinfo->brick_count    = volinfo->tier_info.hot_brick_count;          volinfo->replica_count  = volinfo->tier_info.hot_replica_count;          volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); @@ -3297,21 +3299,34 @@ volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,          sprintf (volinfo->volname, "%s-hot", st_volname); -        if (volinfo->dist_leaf_count == 1) { -                dist_count = volinfo->brick_count / volinfo->dist_leaf_count; -                ret = volgen_link_bricks_from_list_head (graph,  volinfo, -                                                         "cluster/distribute", -                                                         "%s-dht", -                                                         dist_count, -                                                         dist_count); +        dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + +        if (volinfo->dist_leaf_count != 1) { +                ret = volgen_link_bricks_from_list_head +                        (graph, volinfo, +                         "cluster/replicate", +                         "%s-replicate-%d", +                         volinfo->brick_count, +                         volinfo->replica_count); +                if (ret != -1) +                        volgen_link_bricks_from_list_tail (graph,  volinfo, +                                                           "cluster/distribute", +                                                           "%s-dht", +                                                           dist_count, +                                                           dist_count);          } else { -                ret = volume_volgen_graph_build_clusters (graph, -                                                          volinfo, -                                                          _gf_false); +                ret = volgen_link_bricks_from_list_head (graph,  volinfo, +                                                 "cluster/distribute", +                                                 "%s-dht", +                                                 dist_count, +                                                 dist_count);          } +        if (ret == -1) +                goto out;          hxl = first_of(graph); +        volinfo->type           = GF_CLUSTER_TYPE_TIER;          xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",                                        "tier-dht", 0);          gf_asprintf(&rule, "%s-hot-dht", st_volname); | 
