diff options
| -rw-r--r-- | cli/src/cli-cmd-parser.c | 23 | ||||
| -rw-r--r-- | cli/src/cli-cmd-volume.c | 144 | ||||
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 215 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 2 | ||||
| -rw-r--r-- | rpc/xdr/src/cli1-xdr.x | 10 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 135 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 10 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 15 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 83 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 8 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 181 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 | 
15 files changed, 821 insertions, 43 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 5520c9e46b1..54a57008457 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -355,6 +355,10 @@ cli_validate_disperse_volume (char *word, gf1_cluster_type type,                  cli_err ("striped-replicated-dispersed volume "                           "is not supported");                  goto out; +        case GF_CLUSTER_TYPE_TIER: +                cli_err ("tier-dispersed volume is not " +                         "supported"); +                goto out;          case GF_CLUSTER_TYPE_STRIPE:                  cli_err ("striped-dispersed volume is not "                           "supported"); @@ -490,6 +494,11 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,                          case GF_CLUSTER_TYPE_STRIPE:                                  type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;                                  break; +                        case GF_CLUSTER_TYPE_TIER: +                                cli_err ("replicated-tiered volume is not " +                                         "supported"); +                                goto out; +                                break;                          case GF_CLUSTER_TYPE_DISPERSE:                                  cli_err ("replicated-dispersed volume is not "                                           "supported"); @@ -529,6 +538,10 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words,                                  cli_err ("striped-dispersed volume is not "                                           "supported");                                  goto out; +                        case GF_CLUSTER_TYPE_TIER: +                                cli_err ("striped-tier volume is not " +                                         "supported"); +                                goto out;                          }                          if (wordcount < (index + 2)) {                                  ret = -1; @@ -3384,6 +3397,16 @@ cli_cmd_volume_defrag_parse (const char **words, int wordcount,                  if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&                      strcmp (words[3], "status"))                              goto out; +        } else if ((strcmp (words[3], "tier") == 0) && +               (strcmp (words[4], "start") == 0)) { +                volname = (char *) words[2]; +                cmd = GF_DEFRAG_CMD_START_TIER; +                goto done; +        } else if ((strcmp (words[3], "tier") == 0) && +               (strcmp (words[4], "status") == 0)) { +                volname = (char *) words[2]; +                cmd = GF_DEFRAG_CMD_STATUS_TIER; +                goto done;          } else {                  if (strcmp (words[3], "fix-layout") &&                      strcmp (words[3], "start")) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 6c950da4e97..3098d74491c 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -840,6 +840,142 @@ out:          return ret;  } +int +cli_cmd_volume_attach_tier_cbk (struct cli_state *state, +                                struct cli_cmd_word *word, const char **words, +                                int wordcount) +{ +        int                     ret = -1; +        rpc_clnt_procedure_t    *proc = NULL; +        call_frame_t            *frame = NULL; +        dict_t                  *options = NULL; +        int                     sent = 0; +        int                     parse_error = 0; +        gf_answer_t             answer = GF_ANSWER_NO; +        cli_local_t             *local = NULL; + +        frame = create_frame (THIS, THIS->ctx->pool); +        if (!frame) +                goto out; + +        ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options); +        if (ret) { +                cli_usage_out (word->pattern); +                parse_error = 1; +                goto out; +        } + +        if (state->mode & GLUSTER_MODE_WIGNORE) { +                ret = dict_set_int32 (options, "force", _gf_true); +                if (ret) { +                        gf_log ("cli", GF_LOG_ERROR, "Failed to set force " +                                "option"); +                        goto out; +                } +        } + +        ret = dict_set_int32 (options, "attach-tier", 1); +        if (ret) +                goto out; + +        ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER); +        if (ret) +                goto out; + +        proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ATTACH_TIER]; + +        CLI_LOCAL_INIT (local, words, frame, options); + +        if (proc->fn) { +                ret = proc->fn (frame, THIS, options); +        } + +out: +        if (ret) { +                cli_cmd_sent_status_get (&sent); +                if ((sent == 0) && (parse_error == 0)) +                        cli_out ("attach-tier failed"); +        } + +        CLI_STACK_DESTROY (frame); + +        return ret; +} + +int +cli_cmd_volume_detach_tier_cbk (struct cli_state *state, +                              struct cli_cmd_word *word, const char **words, +                              int wordcount) +{ +        int                     ret = -1; +        rpc_clnt_procedure_t    *proc = NULL; +        call_frame_t            *frame = NULL; +        dict_t                  *options = NULL; +        int                     sent = 0; +        int                     parse_error = 0; +        gf_answer_t             answer = GF_ANSWER_NO; +        cli_local_t             *local = NULL; +        int                     need_question = 0; + +        const char *question = "Removing tier can result in data loss. " +                               "Do you want to Continue?"; + +        if (wordcount != 3) +                goto out; + +        frame = create_frame (THIS, THIS->ctx->pool); +        if (!frame) +                goto out; + +        options = dict_new (); +        if (!options) +                goto out; + +        ret = dict_set_int32 (options, "force", 1); +        if (ret) +                goto out; + +        ret = dict_set_int32 (options, "command", GF_OP_CMD_DETACH); +        if (ret) +                goto out; + +        ret = dict_set_str (options, "volname", (char *)words[2]); +        if (ret) +                goto out; + +        ret = dict_set_int32 (options, "count", 1); +        if (ret) +                goto out; + +        if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) { +                /* we need to ask question only in case of 'commit or force' */ +                answer = cli_cmd_get_confirmation (state, question); +                if (GF_ANSWER_NO == answer) { +                        ret = 0; +                        goto out; +                } +        } + +        proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DETACH_TIER]; + +        CLI_LOCAL_INIT (local, words, frame, options); + +        if (proc->fn) { +                ret = proc->fn (frame, THIS, options); +        } + +out: +        if (ret) { +                cli_cmd_sent_status_get (&sent); +                if ((sent == 0) && (parse_error == 0)) +                        cli_out ("Volume detach-tier failed"); +        } + +        CLI_STACK_DESTROY (frame); + +        return ret; +} +  static int  gf_cli_create_auxiliary_mount (char *volname)  { @@ -2435,6 +2571,14 @@ struct cli_cmd volume_cmds[] = {            cli_cmd_volume_rename_cbk,            "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/ +        { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...", +          cli_cmd_volume_attach_tier_cbk, +          "attach tier to volume <VOLNAME>"}, + +        { "volume detach-tier <VOLNAME>", +          cli_cmd_volume_detach_tier_cbk, +          "detach tier from volume <VOLNAME>"}, +          { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",            cli_cmd_volume_add_brick_cbk,            "add brick to volume <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 6e66e377ed5..c9b01694436 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -61,6 +61,7 @@ char *cli_vol_type_str[] = {"Distribute",                              "Replicate",                              "Striped-Replicate",                              "Disperse", +                            "Tier",                              "Distributed-Stripe",                              "Distributed-Replicate",                              "Distributed-Striped-Replicate", @@ -739,8 +740,9 @@ xml_output:                  vol_type = type;                  // Distributed (stripe/replicate/stripe-replica) setups -                if ((type > 0) && ( dist_count < brick_count)) -                        vol_type = type + 4; +                if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) && +                    (dist_count < brick_count)) +                       vol_type = type + 5;                  cli_out ("Volume Name: %s", volname);                  cli_out ("Type: %s", cli_vol_type_str[vol_type]); @@ -1441,6 +1443,134 @@ out:  }  int +gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type) +{ +        int                ret          = -1; +        int                count        = 0; +        int                i            = 1; +        char               key[256]     = {0,}; +        gf_defrag_status_t status_rcd   = GF_DEFRAG_STATUS_NOT_STARTED; +        uint64_t           files        = 0; +        uint64_t           size         = 0; +        uint64_t           lookup       = 0; +        char               *node_name   = NULL; +        uint64_t           failures     = 0; +        uint64_t           skipped      = 0; +        double             elapsed      = 0; +        char               *status_str  = NULL; +        char               *size_str    = NULL; + +        ret = dict_get_int32 (dict, "count", &count); +        if (ret) { +                gf_log ("cli", GF_LOG_ERROR, "count not set"); +                goto out; +        } + + +        cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node", +                 "Rebalanced-files", "size", "scanned", "failures", "skipped", +                 "status", "run time in secs"); +        cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------", +                 "-----------", "-----------", "-----------", "-----------", +                 "-----------", "------------", "--------------"); +        for (i = 1; i <= count; i++) { +                /* Reset the variables to prevent carryover of values */ +                node_name = NULL; +                files = 0; +                size = 0; +                lookup = 0; +                skipped = 0; +                status_str = NULL; +                elapsed = 0; + +                /* Check if status is NOT_STARTED, and continue early */ +                memset (key, 0, 256); +                snprintf (key, 256, "status-%d", i); +                ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd); +                if (ret) { +                        gf_log ("cli", GF_LOG_TRACE, "failed to get status"); +                        goto out; +                } +                if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd) +                        continue; + + +                snprintf (key, 256, "node-name-%d", i); +                ret = dict_get_str (dict, key, &node_name); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, "failed to get node-name"); + +                memset (key, 0, 256); +                snprintf (key, 256, "files-%d", i); +                ret = dict_get_uint64 (dict, key, &files); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get file count"); + +                memset (key, 0, 256); +                snprintf (key, 256, "size-%d", i); +                ret = dict_get_uint64 (dict, key, &size); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get size of xfer"); + +                memset (key, 0, 256); +                snprintf (key, 256, "lookups-%d", i); +                ret = dict_get_uint64 (dict, key, &lookup); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get lookedup file count"); + +                memset (key, 0, 256); +                snprintf (key, 256, "failures-%d", i); +                ret = dict_get_uint64 (dict, key, &failures); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get failures count"); + +                memset (key, 0, 256); +                snprintf (key, 256, "skipped-%d", i); +                ret = dict_get_uint64 (dict, key, &skipped); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, +                                "failed to get skipped count"); + +                /* For remove-brick include skipped count into failure count*/ +                if (task_type != GF_TASK_TYPE_REBALANCE) { +                        failures += skipped; +                        skipped = 0; +                } + +                memset (key, 0, 256); +                snprintf (key, 256, "run-time-%d", i); +                ret = dict_get_double (dict, key, &elapsed); +                if (ret) +                        gf_log ("cli", GF_LOG_TRACE, "failed to get run-time"); + +                /* Check for array bound */ +                if (status_rcd >= GF_DEFRAG_STATUS_MAX) +                        status_rcd = GF_DEFRAG_STATUS_MAX; + +                status_str = cli_vol_task_status_str[status_rcd]; +                size_str = gf_uint64_2human_readable(size); +                if (size_str) { +                        cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13" +                                 PRIu64" %13"PRIu64 " %20s %18.2f", node_name, +                                 files, size_str, lookup, failures, skipped, +                                 status_str, elapsed); +                } else { +                        cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13"PRIu64 +                                 " %13"PRIu64" %13"PRIu64 " %20s %18.2f", +                                 node_name, files, size, lookup, failures, +                                 skipped, status_str, elapsed); +                } +                GF_FREE(size_str); +        } +out: +        return ret; +} + +int  gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                               int count, void *myframe)  { @@ -1504,7 +1634,9 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                  }          } -        if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) && +        if (!((cmd == GF_DEFRAG_CMD_STOP) || +              (cmd == GF_DEFRAG_CMD_STATUS) || +              (cmd == GF_DEFRAG_CMD_STATUS_TIER)) &&               !(global_state->mode & GLUSTER_MODE_XML)) {                  /* All other possibilites are about starting a rebalance */                  ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); @@ -1577,7 +1709,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,                  goto out;          } -        ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REBALANCE); +        if (cmd == GF_DEFRAG_CMD_STATUS_TIER) +                ret = gf_cli_print_tier_status (dict, GF_TASK_TYPE_REBALANCE); +        else +                ret = gf_cli_print_rebalance_status (dict, +                                                     GF_TASK_TYPE_REBALANCE); +          if (ret)                  gf_log ("cli", GF_LOG_ERROR,                          "Failed to print rebalance status"); @@ -3616,7 +3753,7 @@ int32_t  gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,                          void *data)  { -        gf_cli_req              req =  {{0,}}; +        gf_cli_req              req =  {{0,} };          int                     ret = 0;          dict_t                  *dict = NULL; @@ -3665,7 +3802,7 @@ int32_t  gf_cli_set_volume (call_frame_t *frame, xlator_t *this,                           void *data)  { -        gf_cli_req              req =  {{0,}}; +        gf_cli_req              req =  {{0,} };          int                     ret = 0;          dict_t                  *dict = NULL; @@ -3691,7 +3828,7 @@ int32_t  gf_cli_add_brick (call_frame_t *frame, xlator_t *this,                           void *data)  { -        gf_cli_req              req =  {{0,}}; +        gf_cli_req              req =  {{0,} };          int                     ret = 0;          dict_t                  *dict = NULL;          char                    *volname = NULL; @@ -3726,6 +3863,66 @@ out:  }  int32_t +gf_cli_attach_tier (call_frame_t *frame, xlator_t *this, +                    void *data) +{ +        gf_cli_req              req =  {{0,} }; +        int                     ret = 0; +        dict_t                  *dict = NULL; + +        if (!frame || !this ||  !data) { +                ret = -1; +                goto out; +        } + +        dict = data; + +        if (ret) +                goto out; + +        ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk, +                               (xdrproc_t) xdr_gf_cli_req, dict, +                               GLUSTER_CLI_ATTACH_TIER, this, +                               cli_rpc_prog, NULL); +out: +        gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + +        GF_FREE (req.dict.dict_val); +        return ret; +} + +int32_t +gf_cli_detach_tier (call_frame_t *frame, xlator_t *this, +                    void *data) +{ +        gf_cli_req              req =  {{0,} }; +        int                     ret = 0; +        dict_t                  *dict = NULL; +        char                    *volname = NULL; + +        if (!frame || !this ||  !data) { +                ret = -1; +                goto out; +        } + +        dict = data; + +        ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk, +                              (xdrproc_t) xdr_gf_cli_req, dict, +                               GLUSTER_CLI_DETACH_TIER, this, +                               cli_rpc_prog, NULL); + + +out: +        gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + +        GF_FREE (req.dict.dict_val); + +        return ret; +} + + +int32_t  gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,                           void *data)  { @@ -9965,7 +10162,9 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {          [GLUSTER_CLI_BARRIER_VOLUME]   = {"BARRIER VOLUME", gf_cli_barrier_volume},          [GLUSTER_CLI_GANESHA]          = {"GANESHA", gf_cli_ganesha},          [GLUSTER_CLI_GET_VOL_OPT]      = {"GET_VOL_OPT", gf_cli_get_vol_opt}, -        [GLUSTER_CLI_BITROT]           = {"BITROT", gf_cli_bitrot} +        [GLUSTER_CLI_BITROT]           = {"BITROT", gf_cli_bitrot}, +        [GLUSTER_CLI_ATTACH_TIER]      = {"ATTACH_TIER", gf_cli_attach_tier}, +        [GLUSTER_CLI_DETACH_TIER]      = {"DETACH_TIER", gf_cli_detach_tier}  };  struct rpc_clnt_program cli_prog = { diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index bf68366f5dd..60697b8fa66 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -183,6 +183,8 @@ enum gluster_cli_procnum {          GLUSTER_CLI_GET_VOL_OPT,          GLUSTER_CLI_GANESHA,          GLUSTER_CLI_BITROT, +        GLUSTER_CLI_ATTACH_TIER, +        GLUSTER_CLI_DETACH_TIER,          GLUSTER_CLI_MAXVALUE,  }; diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 925700699ab..72581b0c5d5 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -3,7 +3,9 @@          GF_DEFRAG_CMD_STOP,          GF_DEFRAG_CMD_STATUS,          GF_DEFRAG_CMD_START_LAYOUT_FIX, -        GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */ +        GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */ +        GF_DEFRAG_CMD_START_TIER, +        GF_DEFRAG_CMD_STATUS_TIER  };   enum gf_defrag_status_t { @@ -24,7 +26,8 @@          GF_CLUSTER_TYPE_STRIPE,          GF_CLUSTER_TYPE_REPLICATE,          GF_CLUSTER_TYPE_STRIPE_REPLICATE, -        GF_CLUSTER_TYPE_DISPERSE +        GF_CLUSTER_TYPE_DISPERSE, +        GF_CLUSTER_TYPE_TIER  };   enum gf1_cli_replace_op { @@ -53,7 +56,8 @@ enum gf_bitrot_type {          GF_OP_CMD_COMMIT,          GF_OP_CMD_STOP,          GF_OP_CMD_STATUS, -        GF_OP_CMD_COMMIT_FORCE +        GF_OP_CMD_COMMIT_FORCE, +        GF_OP_CMD_DETACH  };  enum gf_quota_type { diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index fd4618bb78c..fa5e533f135 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,          int replica_nodes = 0;          switch (volinfo->type) { +        case GF_CLUSTER_TYPE_TIER: +                ret = 1; +                goto out; +          case GF_CLUSTER_TYPE_NONE:          case GF_CLUSTER_TYPE_STRIPE:          case GF_CLUSTER_TYPE_DISPERSE: @@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)          int32_t                         replica_count = 0;          int32_t                         stripe_count = 0;          int                             type = 0; -          this = THIS;          GF_ASSERT(this); @@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)          total_bricks = volinfo->brick_count + brick_count; +        if (dict_get (dict, "attach-tier")) { +                if (volinfo->type == GF_CLUSTER_TYPE_TIER) { +                        snprintf (err_str, sizeof (err_str), +                                  "Volume %s is already a tier.", volname); +                        gf_log (this->name, GF_LOG_ERROR, "%s", err_str); +                        ret = -1; +                        goto out; +                } +                goto brick_val; +        } +          if (!stripe_count && !replica_count) {                  if (volinfo->type == GF_CLUSTER_TYPE_NONE)                          goto brick_val; @@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols)          GF_FREE (subvols);  } +static int +glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo) +{ +        char key[256] = {0,}; +        char value[256] = {0,}; +        int brick_num = 0; +        int hot_brick_num = 0; +        glusterd_brickinfo_t *brickinfo; +        int ret = 0; + +        /* cold tier bricks at tail of list so use reverse iteration */ +        cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks, +                                         brick_list) { +                brick_num++; +                if (brick_num > volinfo->tier_info.cold_brick_count) { +                        hot_brick_num++; +                        sprintf (key, "brick%d", hot_brick_num); +                        snprintf (value, 256, "%s:%s", +                                  brickinfo->hostname, +                                  brickinfo->path); + +                        ret = dict_set_str (dict, key, strdup(value)); +                        if (ret) +                                break; +                } +        } + +        ret = dict_set_int32(dict, "count", hot_brick_num); +        if (ret) +                return -1; + +        return hot_brick_num; +} +  int  __glusterd_handle_remove_brick (rpcsvc_request_t *req)  { @@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)  	/* Do not allow remove-brick if the bricks given is less than             the replica count or stripe count */ -        if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { +        if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) && +            (volinfo->type != GF_CLUSTER_TYPE_TIER))  {                  if (volinfo->dist_leaf_count &&                      (count % volinfo->dist_leaf_count)) {                          snprintf (err_str, sizeof (err_str), "Remove brick " @@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)                  goto out;          } +          strcpy (brick_list, " ");          if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && @@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)                          goto out;          } +        if (volinfo->type == GF_CLUSTER_TYPE_TIER) +                count = glusterd_set_detach_bricks(dict, volinfo); +          while ( i <= count) {                  snprintf (key, sizeof (key), "brick%d", i);                  ret = dict_get_str (dict, key, &brick); @@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)                  ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,                                                               &brickinfo); +                  if (ret) {                          snprintf (err_str, sizeof (err_str), "Incorrect brick "                                    "%s for volume %s", brick, volname); @@ -883,7 +937,8 @@ out:          } -        GF_FREE (brick_list); +        if (brick_list) +                GF_FREE (brick_list);          subvol_matcher_destroy (subvols);          free (cli_req.dict.dict_val); //its malloced by xdr @@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,                  ret = glusterd_resolve_brick (brickinfo);                  if (ret)                          goto out; -                if (stripe_count || replica_count) { + +                /* hot tier bricks are added to head of brick list */ +                if (dict_get (dict, "attach-tier")) { +                        cds_list_add (&brickinfo->brick_list, &volinfo->bricks); +                } else if (stripe_count || replica_count) {                          add_brick_at_right_order (brickinfo, volinfo, (i - 1),                                                    stripe_count, replica_count);                  } else { @@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)                  break; +        case GF_OP_CMD_DETACH:          case GF_OP_CMD_COMMIT_FORCE:                  break;          } @@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,          return ret;  } +static int +glusterd_op_perform_attach_tier (dict_t *dict, +                                 glusterd_volinfo_t *volinfo, +                                 int count, +                                 char *bricks) +{ +        int                                     ret = 0; +        int                                     replica_count = 0; + +        /* +         * Store the new (cold) tier's structure until the graph is generated. +         * If there is a failure before the graph is generated the +         * structure will revert to its original state. +         */ +        volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count; +        volinfo->tier_info.cold_type           = volinfo->type; +        volinfo->tier_info.cold_brick_count    = volinfo->brick_count; +        volinfo->tier_info.cold_replica_count  = volinfo->replica_count; +        volinfo->tier_info.cold_disperse_count = volinfo->disperse_count; + +        ret = dict_get_int32 (dict, "replica-count", &replica_count); +        if (!ret) +                volinfo->tier_info.hot_replica_count  = replica_count; +        else +                volinfo->tier_info.hot_replica_count  = 1; +        volinfo->tier_info.hot_brick_count     = count; + +        return ret; +}  int  glusterd_op_add_brick (dict_t *dict, char **op_errstr) @@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)          xlator_t                                *this = NULL;          char                                    *bricks = NULL;          int32_t                                 count = 0; +        int32_t                                 replica_count = 0;          this = THIS;          GF_ASSERT (this); @@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)                  goto out;          } +        if (dict_get(dict, "attach-tier")) { +                gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier"); +                glusterd_op_perform_attach_tier (dict, volinfo, count, bricks); +        } +          ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);          if (ret) {                  gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); @@ -1829,6 +1924,14 @@ out:          return ret;  } +static void +glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo) +{ +        volinfo->type           = volinfo->tier_info.cold_type; +        volinfo->replica_count  = volinfo->tier_info.cold_replica_count; +        volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; +} +  int  glusterd_op_remove_brick (dict_t *dict, char **op_errstr)  { @@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  force = 1;                  break; +        case GF_OP_CMD_DETACH: +                glusterd_op_perform_detach_tier (volinfo); +                /* fall through */ +          case GF_OP_CMD_COMMIT_FORCE:                  if (volinfo->decommission_in_progress) { @@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  volinfo->sub_count = replica_count;                  volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); -                if (replica_count == 1) { +                /* +                 * volinfo->type and sub_count have already been set for +                 * volumes undergoing a detach operation, they should not +                 * be modified here. +                 */ +                if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) {                          if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {                                  volinfo->type = GF_CLUSTER_TYPE_NONE;                                  /* backward compatibility */ @@ -2224,3 +2336,16 @@ out:          gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } + +int +glusterd_handle_attach_tier (rpcsvc_request_t *req) +{ +        return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req) +{ +        return glusterd_big_locked_handler (req, +                                            __glusterd_handle_remove_brick); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 77fa96400ba..a41b36b9715 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {          [GLUSTER_CLI_DELETE_VOLUME]      = { "DELETE_VOLUME",     GLUSTER_CLI_DELETE_VOLUME,    glusterd_handle_cli_delete_volume,     NULL, 0, DRC_NA},          [GLUSTER_CLI_GET_VOLUME]         = { "GET_VOLUME",        GLUSTER_CLI_GET_VOLUME,       glusterd_handle_cli_get_volume,        NULL, 0, DRC_NA},          [GLUSTER_CLI_ADD_BRICK]          = { "ADD_BRICK",         GLUSTER_CLI_ADD_BRICK,        glusterd_handle_add_brick,             NULL, 0, DRC_NA}, +        [GLUSTER_CLI_ATTACH_TIER]        = { "ATTACH_TIER",       GLUSTER_CLI_ATTACH_TIER,      glusterd_handle_attach_tier,           NULL, 0, DRC_NA}, +        [GLUSTER_CLI_DETACH_TIER]        = { "DETACH_TIER",       GLUSTER_CLI_DETACH_TIER,      glusterd_handle_detach_tier,           NULL, 0, DRC_NA},          [GLUSTER_CLI_REPLACE_BRICK]      = { "REPLACE_BRICK",     GLUSTER_CLI_REPLACE_BRICK,    glusterd_handle_replace_brick,         NULL, 0, DRC_NA},          [GLUSTER_CLI_REMOVE_BRICK]       = { "REMOVE_BRICK",      GLUSTER_CLI_REMOVE_BRICK,     glusterd_handle_remove_brick,          NULL, 0, DRC_NA},          [GLUSTER_CLI_LOG_ROTATE]         = { "LOG FILENAME",      GLUSTER_CLI_LOG_ROTATE,       glusterd_handle_log_rotate,            NULL, 0, DRC_NA}, diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 75756518f28..c5fcb7698e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin          char                    name[1024] = {0,};          gf_xl_afr_op_t          heal_op = GF_SHD_OP_INVALID;          xlator_t                *this = NULL; +        glusterd_volinfo_t        *volinfo      = NULL;          this = THIS;          GF_ASSERT (this); @@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin                  ret = dict_get_str (dict, "volname", &volname);                  if (ret)                          goto out; -                snprintf (name, 1024, "%s-dht",volname); +                ret = glusterd_volinfo_find (volname, &volinfo); +                if (volinfo->type == GF_CLUSTER_TYPE_TIER) +                        snprintf (name, 1024, "tier-dht"); +                else +                        snprintf (name, 1024, "%s-dht", volname);                  brick_req->name = gf_strdup (name);                  break; @@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,          while ( i <= count) {                  snprintf (key, 256, "brick%d", i); +                  ret = dict_get_str (dict, key, &brick);                  if (ret) {                          gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); @@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,                  ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,                                                                &brickinfo); +                  if (ret)                          goto out; +                  if (glusterd_is_brick_started (brickinfo)) {                          pending_node = GF_CALLOC (1, sizeof (*pending_node),                                                    gf_gld_mt_pending_node_t); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index ba67df436ff..0d66571300f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,                           "--xlator-option", "*replicate*.readdir-failover=off",                           "--xlator-option", "*dht.readdir-optimize=on",                           NULL); + +        if (volinfo->type == GF_CLUSTER_TYPE_TIER) { +                runner_add_arg (&runner, "--xlator-option"); +                runner_argprintf (&runner, +                                  "*tier-dht.xattr-name=trusted.tier-gfid"); +        } +          runner_add_arg (&runner, "--xlator-option");          runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);          runner_add_arg (&runner, "--xlator-option"); @@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req)                  goto out;          if ((cmd == GF_DEFRAG_CMD_STATUS) || +            (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||                (cmd == GF_DEFRAG_CMD_STOP)) {                  ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,                                           dict, msg, sizeof (msg)); @@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)          switch (cmd) {          case GF_DEFRAG_CMD_START:          case GF_DEFRAG_CMD_START_LAYOUT_FIX: +        case GF_DEFRAG_CMD_START_TIER:                  /* Check if the connected clients are all of version                   * glusterfs-3.6 and higher. This is needed to prevent some data                   * loss issues that could occur when older clients are connected @@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          /* Set task-id, if available, in op_ctx dict for operations other than           * start           */ -        if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { +        if (cmd == GF_DEFRAG_CMD_STATUS || +            cmd == GF_DEFRAG_CMD_STOP || +            cmd == GF_DEFRAG_CMD_STATUS_TIER) {                  if (!uuid_is_null (volinfo->rebal.rebalance_id)) {                          ctx = glusterd_op_get_ctx ();                          if (!ctx) { @@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          case GF_DEFRAG_CMD_START:          case GF_DEFRAG_CMD_START_LAYOUT_FIX:          case GF_DEFRAG_CMD_START_FORCE: +        case GF_DEFRAG_CMD_START_TIER:                  /* Reset defrag status to 'NOT STARTED' whenever a                   * remove-brick/rebalance command is issued to remove                   * stale information from previous run. @@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)                  break;          case GF_DEFRAG_CMD_STATUS: +        case GF_DEFRAG_CMD_STATUS_TIER:                  break;          default:                  break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 5b2b14503ae..5696229572d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -812,6 +812,63 @@ out:                          " for volume %s", volinfo->volname);          return ret;  } + +int32_t +glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo) +{ +        int32_t      ret            = -1; +        char         buf[PATH_MAX]  = ""; + +        if (volinfo->type != GF_CLUSTER_TYPE_TIER) { +                ret = 0; +                goto out; +        } + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", +                  volinfo->tier_info.cold_replica_count); +        ret = gf_store_save_value (fd, +                                   GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, +                                   buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, +                                   buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT, +                                   buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, +                                   buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf); +        if (ret) +                goto out; + + out: +        return ret; +} +  int32_t  glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)  { @@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)                          goto out;          } +        ret = glusterd_volume_write_tier_details (fd, volinfo); +          ret = glusterd_volume_write_snap_details (fd, volinfo);  out: @@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)                                  strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {                          strncpy (volinfo->parent_volname, value,                                   sizeof(volinfo->parent_volname) - 1); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT, +                                     strlen (key))) { +                        volinfo->tier_info.cold_brick_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, +                                     strlen (key))) { +                        volinfo->tier_info.cold_replica_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, +                                     strlen (key))) { +                        volinfo->tier_info.cold_disperse_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT, +                                     strlen (key))) { +                        volinfo->tier_info.cold_brick_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, +                                     strlen (key))) { +                        volinfo->tier_info.cold_replica_count = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE, +                                     strlen (key))) { +                        volinfo->tier_info.hot_type = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE, +                                     strlen (key))) { +                        volinfo->tier_info.cold_type = atoi (value);                  } else {                          if (is_key_glusterd_hooks_friendly (key)) { @@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)                                  GF_ASSERT (volinfo->redundancy_count > 0);                          break; +                        case GF_CLUSTER_TYPE_TIER: +                        break; +                          default:                                  GF_ASSERT (0);                          break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index afa96be77cf..45ed86a4163 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_VOL_OP_VERSION       "op-version"  #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" +#define GLUSTERD_STORE_KEY_COLD_TYPE            "cold_type" +#define GLUSTERD_STORE_KEY_COLD_COUNT           "cold_count" +#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT   "cold_replica_count" +#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT  "cold_disperse_count" +#define GLUSTERD_STORE_KEY_HOT_TYPE             "hot_type" +#define GLUSTERD_STORE_KEY_HOT_COUNT            "hot_count" +#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT    "hot_replica_count" +  #define GLUSTERD_STORE_KEY_SNAP_NAME            "name"  #define GLUSTERD_STORE_KEY_SNAP_ID              "snap-id"  #define GLUSTERD_STORE_KEY_SNAP_DESC            "desc" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 727a19d24d1..27357955fe8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,          new_volinfo->sub_count = volinfo->sub_count;          new_volinfo->transport_type = volinfo->transport_type;          new_volinfo->brick_count = volinfo->brick_count; +        new_volinfo->tier_info = volinfo->tier_info;          dict_copy (volinfo->dict, new_volinfo->dict);          dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 114e57485fc..79da432bafe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,  out:          return ret;  } -  static int  brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2712,24 +2711,22 @@ out:  }  static int -volgen_graph_build_clusters (volgen_graph_t *graph, -                             glusterd_volinfo_t *volinfo, char *xl_type, -                             char *xl_namefmt, size_t child_count, -                             size_t sub_count) +volgen_link_bricks (volgen_graph_t *graph, +                    glusterd_volinfo_t *volinfo, char *xl_type, +                    char *xl_namefmt, size_t child_count, +                    size_t sub_count, +                    xlator_t *trav)  {          int             i = 0;          int             j = 0; -        xlator_t        *txl = NULL;          xlator_t        *xl  = NULL; -        xlator_t        *trav = NULL;          char            *volname = NULL;          int             ret     = -1;          if (child_count == 0)                  goto out;          volname = volinfo->volname; -        txl = first_of (graph); -        for (trav = txl; --child_count; trav = trav->next); +          for (;; trav = trav->prev) {                  if ((i % sub_count) == 0) {                          xl = volgen_graph_add_nolink (graph, xl_type, @@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph,                  if (ret)                          goto out; -                if (trav == txl) -                        break; -                  i++; +                if (i == child_count) +                        break;          }          ret = j; @@ -2756,6 +2752,46 @@ out:          return ret;  } +static int +volgen_link_bricks_from_list_tail (volgen_graph_t *graph, +                             glusterd_volinfo_t *volinfo, +                             char *xl_type, +                             char *xl_namefmt, size_t child_count, +                             size_t sub_count) +{ +        xlator_t *trav = NULL; +        size_t     cnt = child_count; + +        for (trav = first_of(graph); --cnt; trav = trav->next) +                ; + +        return volgen_link_bricks (graph,  volinfo, +                                   xl_type, +                                   xl_namefmt, +                                   child_count, +                                   sub_count, +                                   trav); +} + +static int +volgen_link_bricks_from_list_head (volgen_graph_t *graph, +                                   glusterd_volinfo_t *volinfo, char *xl_type, +                                   char *xl_namefmt, size_t child_count, +                                   size_t sub_count) +{ +        xlator_t *trav = NULL; + +        for (trav = first_of(graph); trav->next; trav = trav->next) +                ; + +        return volgen_link_bricks (graph,  volinfo, +                                   xl_type, +                                   xl_namefmt, +                                   child_count, +                                   sub_count, +                                   trav); +} +  /**   * This is the build graph function for user-serviceable snapshots.   * Generates  snapview-client @@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph,          else                  name_fmt = "%s-dht"; -        clusters = volgen_graph_build_clusters (graph,  volinfo, +        clusters = volgen_link_bricks_from_list_tail (graph,  volinfo,                                                  voltype,                                                  name_fmt,                                                  child_count, @@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph,          xlator_t                *ec                 = NULL;          char                    option[32]          = {0}; -        clusters = volgen_graph_build_clusters (graph, volinfo, +        clusters = volgen_link_bricks_from_list_tail (graph, volinfo,                                                  disperse_args[0],                                                  disperse_args[1],                                                  volinfo->brick_count, @@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,  {          char                    *replicate_args[]   = {"cluster/replicate",                                                         "%s-replicate-%d"}; +        char                    *tier_args[]        = {"cluster/tier", +                                                       "%s-tier-%d"};          char                    *stripe_args[]      = {"cluster/stripe",                                                         "%s-stripe-%d"}; +        char                    *disperse_args[]    = {"cluster/disperse", +                                                       "%s-disperse-%d"}; +        char                    option[32]          = "";          int                     rclusters           = 0;          int                     clusters            = 0;          int                     dist_count          = 0;          int                     ret                 = -1; +        xlator_t               *ec                  = NULL; +        xlator_t               *client              = NULL;          if (!volinfo->dist_leaf_count)                  goto out; @@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,          /* All other cases, it will have one or the other cluster type */          switch (volinfo->type) {          case GF_CLUSTER_TYPE_REPLICATE: -                clusters = volgen_graph_build_clusters (graph, volinfo, +                clusters = volgen_link_bricks_from_list_tail (graph, volinfo,                                                          replicate_args[0],                                                          replicate_args[1],                                                          volinfo->brick_count, @@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,                          goto out;                  break;          case GF_CLUSTER_TYPE_STRIPE: -                clusters = volgen_graph_build_clusters (graph, volinfo, +                clusters = volgen_link_bricks_from_list_tail (graph, volinfo,                                                          stripe_args[0],                                                          stripe_args[1],                                                          volinfo->brick_count, @@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,                  if (clusters < 0)                          goto out;                  break; +        case GF_CLUSTER_TYPE_TIER: +                ret = volgen_link_bricks_from_list_head (graph, volinfo, +                                                         tier_args[0], +                                                         tier_args[1], +                                                         volinfo->brick_count, +                                                         volinfo->replica_count); +                break;          case GF_CLUSTER_TYPE_STRIPE_REPLICATE:                  /* Replicate after the clients, then stripe */                  if (volinfo->replica_count == 0)                          goto out; -                clusters = volgen_graph_build_clusters (graph, volinfo, +                clusters = volgen_link_bricks_from_list_tail (graph, volinfo,                                                          replicate_args[0],                                                          replicate_args[1],                                                          volinfo->brick_count, @@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,                  rclusters = volinfo->brick_count / volinfo->replica_count;                  GF_ASSERT (rclusters == clusters); -                clusters = volgen_graph_build_clusters (graph, volinfo, +                clusters = volgen_link_bricks_from_list_tail (graph, volinfo,                                                          stripe_args[0],                                                          stripe_args[1],                                                          rclusters, @@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,                          "log-buf-size option");          ret = volgen_graph_set_options_generic (graph, set_dict, "client", -                                            &log_flush_timeout_option_handler); +                                                &log_flush_timeout_option_handler);          if (ret)                  gf_log (this->name, GF_LOG_WARNING, "Failed to change "                          "log-flush-timeout option"); @@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,  }  static int +volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, +                                         glusterd_volinfo_t *volinfo, +                                         gf_boolean_t is_quotad) +{ +        int                ret = -1; +        xlator_t          *root; +        xlator_t          *xl, *hxl, *cxl; +        glusterd_brickinfo_t    *brick              = NULL; +        char              *rule; +        int                st_brick_count = 0; +        int                st_replica_count = 0; +        int                st_disperse_count = 0; +        int                st_dist_leaf_count = 0; +        int                st_type = 0; +        char               st_volname[GD_VOLUME_NAME_MAX]; +        int                dist_count = 0; + +        st_brick_count     = volinfo->brick_count; +        st_replica_count   = volinfo->replica_count; +        st_disperse_count  = volinfo->disperse_count; +        st_type            = volinfo->type; +        st_dist_leaf_count = volinfo->dist_leaf_count; +        strcpy(st_volname, volinfo->volname); + +        volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count; +        volinfo->brick_count    = volinfo->tier_info.cold_brick_count; +        volinfo->replica_count  = volinfo->tier_info.cold_replica_count; +        volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; +        volinfo->type           = volinfo->tier_info.cold_type; +        sprintf (volinfo->volname, "%s-cold", st_volname); + +        ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); +        if (ret) +                goto out; +        cxl = first_of(graph); + +        volinfo->type           = GF_CLUSTER_TYPE_TIER; +        volinfo->brick_count    = volinfo->tier_info.hot_brick_count; +        volinfo->replica_count  = volinfo->tier_info.hot_replica_count; +        volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); +        volinfo->disperse_count = 0; + +        sprintf (volinfo->volname, "%s-hot", st_volname); + +        if (volinfo->dist_leaf_count == 1) { +                dist_count = volinfo->brick_count / volinfo->dist_leaf_count; +                ret = volgen_link_bricks_from_list_head (graph,  volinfo, +                                                         "cluster/distribute", +                                                         "%s-dht", +                                                         dist_count, +                                                         dist_count); +        } else { +                ret = volume_volgen_graph_build_clusters (graph, +                                                          volinfo, +                                                          _gf_false); +        } + +        hxl = first_of(graph); + +        xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s", +                                      "tier-dht", 0); +        gf_asprintf(&rule, "%s-hot-dht", st_volname); +        xlator_set_option(xl, "rule", rule); +        xlator_set_option(xl, "xattr-name", "trusted.tier-gfid"); + +        ret = volgen_xlator_link (xl, cxl); +        ret = volgen_xlator_link (xl, hxl); + +        st_type = GF_CLUSTER_TYPE_TIER; + + out: +        volinfo->brick_count     = st_brick_count; +        volinfo->replica_count   = st_replica_count; +        volinfo->disperse_count  = st_disperse_count; +        volinfo->type            = st_type; +        volinfo->dist_leaf_count = st_dist_leaf_count; +        strcpy(volinfo->volname, st_volname); + +        return ret; +} + +static int  client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                        dict_t *set_dict, void *param)  { @@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          GF_ASSERT (conf);          volname = volinfo->volname; -        ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); +        ret = volgen_graph_build_clients (graph, volinfo, set_dict, +                                          param);          if (ret)                  goto out; -        ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); +        if (volinfo->type == GF_CLUSTER_TYPE_TIER) +                ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false); +        else +                ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); +          if (ret == -1)                  goto out; @@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph,          char               *replicate_args[]   = {"cluster/replicate",                                                    "%s-replicate-%d"}; -        return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate", +        return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate",                                              "%s-replicate-%d",                                              volinfo->brick_count,                                              volinfo->replica_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ae866b7ccfc..ada814bb25d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .voltype     = "features/trash",            .op_version  = GD_OP_VERSION_3_7_0,          }, + +        /* tier translator - global tunables */ +        { .key         = "cluster.write-freq-thresold", +          .voltype     = "cluster/tier", +          .option      = "write-freq-thresold", +          .op_version  = GD_OP_VERSION_3_7_0, +          .flags       = OPT_FLAG_CLIENT_OPT +        }, +        { .key         = "cluster.read-freq-thresold", +          .voltype     = "cluster/tier", +          .option      = "read-freq-thresold", +          .op_version  = GD_OP_VERSION_3_7_0, +          .flags       = OPT_FLAG_CLIENT_OPT +        }, +        { .key         = "cluster.tier-promote-frequency", +          .voltype     = "cluster/tier", +          .option      = "tier-promote-frequency", +          .op_version  = GD_OP_VERSION_3_7_0, +          .flags       = OPT_FLAG_CLIENT_OPT +        }, +        { .key         = "cluster.tier-demote-frequency", +          .voltype     = "cluster/tier", +          .option      = "tier-demote-frequency", +          .op_version  = GD_OP_VERSION_3_7_0, +          .flags       = OPT_FLAG_CLIENT_OPT +        },          { .key         = "features.ctr-enabled",            .voltype     = "features/changetimerecorder",            .value       = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ad280eda053..bac1598598b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -302,9 +302,6 @@ typedef struct tier_info_ {          int                       hot_type;          int                       hot_brick_count;          int                       hot_replica_count; -        int                       hot_disperse_count; -        /*Commented for now Dan's DHT Tier patch will have it*/ -        /*tier_group_t             *root;*/  } gd_tier_info_t;  struct glusterd_volinfo_ { @@ -814,6 +811,12 @@ int  glusterd_handle_add_brick (rpcsvc_request_t *req);  int +glusterd_handle_attach_tier (rpcsvc_request_t *req); + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req); + +int  glusterd_handle_replace_brick (rpcsvc_request_t *req);  int  | 
