diff options
-rw-r--r-- | cli/src/cli-cmd-parser.c | 23 | ||||
-rw-r--r-- | cli/src/cli-cmd-volume.c | 144 | ||||
-rw-r--r-- | cli/src/cli-rpc-ops.c | 215 | ||||
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 2 | ||||
-rw-r--r-- | rpc/xdr/src/cli1-xdr.x | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 135 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 15 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 83 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 181 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 |
15 files changed, 821 insertions, 43 deletions
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 5520c9e46b1..54a57008457 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -355,6 +355,10 @@ cli_validate_disperse_volume (char *word, gf1_cluster_type type, cli_err ("striped-replicated-dispersed volume " "is not supported"); goto out; + case GF_CLUSTER_TYPE_TIER: + cli_err ("tier-dispersed volume is not " + "supported"); + goto out; case GF_CLUSTER_TYPE_STRIPE: cli_err ("striped-dispersed volume is not " "supported"); @@ -490,6 +494,11 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words, case GF_CLUSTER_TYPE_STRIPE: type = GF_CLUSTER_TYPE_STRIPE_REPLICATE; break; + case GF_CLUSTER_TYPE_TIER: + cli_err ("replicated-tiered volume is not " + "supported"); + goto out; + break; case GF_CLUSTER_TYPE_DISPERSE: cli_err ("replicated-dispersed volume is not " "supported"); @@ -529,6 +538,10 @@ cli_cmd_volume_create_parse (struct cli_state *state, const char **words, cli_err ("striped-dispersed volume is not " "supported"); goto out; + case GF_CLUSTER_TYPE_TIER: + cli_err ("striped-tier volume is not " + "supported"); + goto out; } if (wordcount < (index + 2)) { ret = -1; @@ -3384,6 +3397,16 @@ cli_cmd_volume_defrag_parse (const char **words, int wordcount, if (strcmp (words[3], "start") && strcmp (words[3], "stop") && strcmp (words[3], "status")) goto out; + } else if ((strcmp (words[3], "tier") == 0) && + (strcmp (words[4], "start") == 0)) { + volname = (char *) words[2]; + cmd = GF_DEFRAG_CMD_START_TIER; + goto done; + } else if ((strcmp (words[3], "tier") == 0) && + (strcmp (words[4], "status") == 0)) { + volname = (char *) words[2]; + cmd = GF_DEFRAG_CMD_STATUS_TIER; + goto done; } else { if (strcmp (words[3], "fix-layout") && strcmp (words[3], "start")) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 6c950da4e97..3098d74491c 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -840,6 +840,142 @@ out: return ret; } +int +cli_cmd_volume_attach_tier_cbk (struct cli_state *state, + struct cli_cmd_word *word, const char **words, + int wordcount) +{ + int ret = -1; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + dict_t *options = NULL; + int sent = 0; + int parse_error = 0; + gf_answer_t answer = GF_ANSWER_NO; + cli_local_t *local = NULL; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options); + if (ret) { + cli_usage_out (word->pattern); + parse_error = 1; + goto out; + } + + if (state->mode & GLUSTER_MODE_WIGNORE) { + ret = dict_set_int32 (options, "force", _gf_true); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, "Failed to set force " + "option"); + goto out; + } + } + + ret = dict_set_int32 (options, "attach-tier", 1); + if (ret) + goto out; + + ret = dict_set_int32 (options, "type", GF_CLUSTER_TYPE_TIER); + if (ret) + goto out; + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ATTACH_TIER]; + + CLI_LOCAL_INIT (local, words, frame, options); + + if (proc->fn) { + ret = proc->fn (frame, THIS, options); + } + +out: + if (ret) { + cli_cmd_sent_status_get (&sent); + if ((sent == 0) && (parse_error == 0)) + cli_out ("attach-tier failed"); + } + + CLI_STACK_DESTROY (frame); + + return ret; +} + +int +cli_cmd_volume_detach_tier_cbk (struct cli_state *state, + struct cli_cmd_word *word, const char **words, + int wordcount) +{ + int ret = -1; + rpc_clnt_procedure_t *proc = NULL; + call_frame_t *frame = NULL; + dict_t *options = NULL; + int sent = 0; + int parse_error = 0; + gf_answer_t answer = GF_ANSWER_NO; + cli_local_t *local = NULL; + int need_question = 0; + + const char *question = "Removing tier can result in data loss. " + "Do you want to Continue?"; + + if (wordcount != 3) + goto out; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + options = dict_new (); + if (!options) + goto out; + + ret = dict_set_int32 (options, "force", 1); + if (ret) + goto out; + + ret = dict_set_int32 (options, "command", GF_OP_CMD_DETACH); + if (ret) + goto out; + + ret = dict_set_str (options, "volname", (char *)words[2]); + if (ret) + goto out; + + ret = dict_set_int32 (options, "count", 1); + if (ret) + goto out; + + if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) { + /* we need to ask question only in case of 'commit or force' */ + answer = cli_cmd_get_confirmation (state, question); + if (GF_ANSWER_NO == answer) { + ret = 0; + goto out; + } + } + + proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DETACH_TIER]; + + CLI_LOCAL_INIT (local, words, frame, options); + + if (proc->fn) { + ret = proc->fn (frame, THIS, options); + } + +out: + if (ret) { + cli_cmd_sent_status_get (&sent); + if ((sent == 0) && (parse_error == 0)) + cli_out ("Volume detach-tier failed"); + } + + CLI_STACK_DESTROY (frame); + + return ret; +} + static int gf_cli_create_auxiliary_mount (char *volname) { @@ -2435,6 +2571,14 @@ struct cli_cmd volume_cmds[] = { cli_cmd_volume_rename_cbk, "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/ + { "volume attach-tier <VOLNAME> [<replica COUNT>] <NEW-BRICK>...", + cli_cmd_volume_attach_tier_cbk, + "attach tier to volume <VOLNAME>"}, + + { "volume detach-tier <VOLNAME>", + cli_cmd_volume_detach_tier_cbk, + "detach tier from volume <VOLNAME>"}, + { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]", cli_cmd_volume_add_brick_cbk, "add brick to volume <VOLNAME>"}, diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 6e66e377ed5..c9b01694436 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -61,6 +61,7 @@ char *cli_vol_type_str[] = {"Distribute", "Replicate", "Striped-Replicate", "Disperse", + "Tier", "Distributed-Stripe", "Distributed-Replicate", "Distributed-Striped-Replicate", @@ -739,8 +740,9 @@ xml_output: vol_type = type; // Distributed (stripe/replicate/stripe-replica) setups - if ((type > 0) && ( dist_count < brick_count)) - vol_type = type + 4; + if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) && + (dist_count < brick_count)) + vol_type = type + 5; cli_out ("Volume Name: %s", volname); cli_out ("Type: %s", cli_vol_type_str[vol_type]); @@ -1441,6 +1443,134 @@ out: } int +gf_cli_print_tier_status (dict_t *dict, enum gf_task_types task_type) +{ + int ret = -1; + int count = 0; + int i = 1; + char key[256] = {0,}; + gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + char *node_name = NULL; + uint64_t failures = 0; + uint64_t skipped = 0; + double elapsed = 0; + char *status_str = NULL; + char *size_str = NULL; + + ret = dict_get_int32 (dict, "count", &count); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, "count not set"); + goto out; + } + + + cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node", + "Rebalanced-files", "size", "scanned", "failures", "skipped", + "status", "run time in secs"); + cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------", + "-----------", "-----------", "-----------", "-----------", + "-----------", "------------", "--------------"); + for (i = 1; i <= count; i++) { + /* Reset the variables to prevent carryover of values */ + node_name = NULL; + files = 0; + size = 0; + lookup = 0; + skipped = 0; + status_str = NULL; + elapsed = 0; + + /* Check if status is NOT_STARTED, and continue early */ + memset (key, 0, 256); + snprintf (key, 256, "status-%d", i); + ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd); + if (ret) { + gf_log ("cli", GF_LOG_TRACE, "failed to get status"); + goto out; + } + if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd) + continue; + + + snprintf (key, 256, "node-name-%d", i); + ret = dict_get_str (dict, key, &node_name); + if (ret) + gf_log ("cli", GF_LOG_TRACE, "failed to get node-name"); + + memset (key, 0, 256); + snprintf (key, 256, "files-%d", i); + ret = dict_get_uint64 (dict, key, &files); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get file count"); + + memset (key, 0, 256); + snprintf (key, 256, "size-%d", i); + ret = dict_get_uint64 (dict, key, &size); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get size of xfer"); + + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", i); + ret = dict_get_uint64 (dict, key, &lookup); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get lookedup file count"); + + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", i); + ret = dict_get_uint64 (dict, key, &failures); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get failures count"); + + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", i); + ret = dict_get_uint64 (dict, key, &skipped); + if (ret) + gf_log ("cli", GF_LOG_TRACE, + "failed to get skipped count"); + + /* For remove-brick include skipped count into failure count*/ + if (task_type != GF_TASK_TYPE_REBALANCE) { + failures += skipped; + skipped = 0; + } + + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_get_double (dict, key, &elapsed); + if (ret) + gf_log ("cli", GF_LOG_TRACE, "failed to get run-time"); + + /* Check for array bound */ + if (status_rcd >= GF_DEFRAG_STATUS_MAX) + status_rcd = GF_DEFRAG_STATUS_MAX; + + status_str = cli_vol_task_status_str[status_rcd]; + size_str = gf_uint64_2human_readable(size); + if (size_str) { + cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13" + PRIu64" %13"PRIu64 " %20s %18.2f", node_name, + files, size_str, lookup, failures, skipped, + status_str, elapsed); + } else { + cli_out ("%40s %16"PRIu64 " %13"PRIu64 " %13"PRIu64 + " %13"PRIu64" %13"PRIu64 " %20s %18.2f", + node_name, files, size, lookup, failures, + skipped, status_str, elapsed); + } + GF_FREE(size_str); + } +out: + return ret; +} + +int gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -1504,7 +1634,9 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, } } - if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) && + if (!((cmd == GF_DEFRAG_CMD_STOP) || + (cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER)) && !(global_state->mode & GLUSTER_MODE_XML)) { /* All other possibilites are about starting a rebalance */ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); @@ -1577,7 +1709,12 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REBALANCE); + if (cmd == GF_DEFRAG_CMD_STATUS_TIER) + ret = gf_cli_print_tier_status (dict, GF_TASK_TYPE_REBALANCE); + else + ret = gf_cli_print_rebalance_status (dict, + GF_TASK_TYPE_REBALANCE); + if (ret) gf_log ("cli", GF_LOG_ERROR, "Failed to print rebalance status"); @@ -3616,7 +3753,7 @@ int32_t gf_cli_reset_volume (call_frame_t *frame, xlator_t *this, void *data) { - gf_cli_req req = {{0,}}; + gf_cli_req req = {{0,} }; int ret = 0; dict_t *dict = NULL; @@ -3665,7 +3802,7 @@ int32_t gf_cli_set_volume (call_frame_t *frame, xlator_t *this, void *data) { - gf_cli_req req = {{0,}}; + gf_cli_req req = {{0,} }; int ret = 0; dict_t *dict = NULL; @@ -3691,7 +3828,7 @@ int32_t gf_cli_add_brick (call_frame_t *frame, xlator_t *this, void *data) { - gf_cli_req req = {{0,}}; + gf_cli_req req = {{0,} }; int ret = 0; dict_t *dict = NULL; char *volname = NULL; @@ -3726,6 +3863,66 @@ out: } int32_t +gf_cli_attach_tier (call_frame_t *frame, xlator_t *this, + void *data) +{ + gf_cli_req req = {{0,} }; + int ret = 0; + dict_t *dict = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + dict = data; + + if (ret) + goto out; + + ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk, + (xdrproc_t) xdr_gf_cli_req, dict, + GLUSTER_CLI_ATTACH_TIER, this, + cli_rpc_prog, NULL); +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + + GF_FREE (req.dict.dict_val); + return ret; +} + +int32_t +gf_cli_detach_tier (call_frame_t *frame, xlator_t *this, + void *data) +{ + gf_cli_req req = {{0,} }; + int ret = 0; + dict_t *dict = NULL; + char *volname = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + dict = data; + + ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk, + (xdrproc_t) xdr_gf_cli_req, dict, + GLUSTER_CLI_DETACH_TIER, this, + cli_rpc_prog, NULL); + + +out: + gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret); + + GF_FREE (req.dict.dict_val); + + return ret; +} + + +int32_t gf_cli_remove_brick (call_frame_t *frame, xlator_t *this, void *data) { @@ -9965,7 +10162,9 @@ struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume}, [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha}, [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt}, - [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot} + [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot}, + [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", gf_cli_attach_tier}, + [GLUSTER_CLI_DETACH_TIER] = {"DETACH_TIER", gf_cli_detach_tier} }; struct rpc_clnt_program cli_prog = { diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index bf68366f5dd..60697b8fa66 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -183,6 +183,8 @@ enum gluster_cli_procnum { GLUSTER_CLI_GET_VOL_OPT, GLUSTER_CLI_GANESHA, GLUSTER_CLI_BITROT, + GLUSTER_CLI_ATTACH_TIER, + GLUSTER_CLI_DETACH_TIER, GLUSTER_CLI_MAXVALUE, }; diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x index 925700699ab..72581b0c5d5 100644 --- a/rpc/xdr/src/cli1-xdr.x +++ b/rpc/xdr/src/cli1-xdr.x @@ -3,7 +3,9 @@ GF_DEFRAG_CMD_STOP, GF_DEFRAG_CMD_STATUS, GF_DEFRAG_CMD_START_LAYOUT_FIX, - GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */ + GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */ + GF_DEFRAG_CMD_START_TIER, + GF_DEFRAG_CMD_STATUS_TIER }; enum gf_defrag_status_t { @@ -24,7 +26,8 @@ GF_CLUSTER_TYPE_STRIPE, GF_CLUSTER_TYPE_REPLICATE, GF_CLUSTER_TYPE_STRIPE_REPLICATE, - GF_CLUSTER_TYPE_DISPERSE + GF_CLUSTER_TYPE_DISPERSE, + GF_CLUSTER_TYPE_TIER }; enum gf1_cli_replace_op { @@ -53,7 +56,8 @@ enum gf_bitrot_type { GF_OP_CMD_COMMIT, GF_OP_CMD_STOP, GF_OP_CMD_STATUS, - GF_OP_CMD_COMMIT_FORCE + GF_OP_CMD_COMMIT_FORCE, + GF_OP_CMD_DETACH }; enum gf_quota_type { diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index fd4618bb78c..fa5e533f135 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_nodes = 0; switch (volinfo->type) { + case GF_CLUSTER_TYPE_TIER: + ret = 1; + goto out; + case GF_CLUSTER_TYPE_NONE: case GF_CLUSTER_TYPE_STRIPE: case GF_CLUSTER_TYPE_DISPERSE: @@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) int32_t replica_count = 0; int32_t stripe_count = 0; int type = 0; - this = THIS; GF_ASSERT(this); @@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) total_bricks = volinfo->brick_count + brick_count; + if (dict_get (dict, "attach-tier")) { + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + snprintf (err_str, sizeof (err_str), + "Volume %s is already a tier.", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + goto brick_val; + } + if (!stripe_count && !replica_count) { if (volinfo->type == GF_CLUSTER_TYPE_NONE) goto brick_val; @@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols) GF_FREE (subvols); } +static int +glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo) +{ + char key[256] = {0,}; + char value[256] = {0,}; + int brick_num = 0; + int hot_brick_num = 0; + glusterd_brickinfo_t *brickinfo; + int ret = 0; + + /* cold tier bricks at tail of list so use reverse iteration */ + cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks, + brick_list) { + brick_num++; + if (brick_num > volinfo->tier_info.cold_brick_count) { + hot_brick_num++; + sprintf (key, "brick%d", hot_brick_num); + snprintf (value, 256, "%s:%s", + brickinfo->hostname, + brickinfo->path); + + ret = dict_set_str (dict, key, strdup(value)); + if (ret) + break; + } + } + + ret = dict_set_int32(dict, "count", hot_brick_num); + if (ret) + return -1; + + return hot_brick_num; +} + int __glusterd_handle_remove_brick (rpcsvc_request_t *req) { @@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) /* Do not allow remove-brick if the bricks given is less than the replica count or stripe count */ - if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { + if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->type != GF_CLUSTER_TYPE_TIER)) { if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) { snprintf (err_str, sizeof (err_str), "Remove brick " @@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + strcpy (brick_list, " "); if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && @@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + count = glusterd_set_detach_bricks(dict, volinfo); + while ( i <= count) { snprintf (key, sizeof (key), "brick%d", i); ret = dict_get_str (dict, key, &brick); @@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); + if (ret) { snprintf (err_str, sizeof (err_str), "Incorrect brick " "%s for volume %s", brick, volname); @@ -883,7 +937,8 @@ out: } - GF_FREE (brick_list); + if (brick_list) + GF_FREE (brick_list); subvol_matcher_destroy (subvols); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, ret = glusterd_resolve_brick (brickinfo); if (ret) goto out; - if (stripe_count || replica_count) { + + /* hot tier bricks are added to head of brick list */ + if (dict_get (dict, "attach-tier")) { + cds_list_add (&brickinfo->brick_list, &volinfo->bricks); + } else if (stripe_count || replica_count) { add_brick_at_right_order (brickinfo, volinfo, (i - 1), stripe_count, replica_count); } else { @@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) break; + case GF_OP_CMD_DETACH: case GF_OP_CMD_COMMIT_FORCE: break; } @@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, return ret; } +static int +glusterd_op_perform_attach_tier (dict_t *dict, + glusterd_volinfo_t *volinfo, + int count, + char *bricks) +{ + int ret = 0; + int replica_count = 0; + + /* + * Store the new (cold) tier's structure until the graph is generated. + * If there is a failure before the graph is generated the + * structure will revert to its original state. + */ + volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count; + volinfo->tier_info.cold_type = volinfo->type; + volinfo->tier_info.cold_brick_count = volinfo->brick_count; + volinfo->tier_info.cold_replica_count = volinfo->replica_count; + volinfo->tier_info.cold_disperse_count = volinfo->disperse_count; + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) + volinfo->tier_info.hot_replica_count = replica_count; + else + volinfo->tier_info.hot_replica_count = 1; + volinfo->tier_info.hot_brick_count = count; + + return ret; +} int glusterd_op_add_brick (dict_t *dict, char **op_errstr) @@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) xlator_t *this = NULL; char *bricks = NULL; int32_t count = 0; + int32_t replica_count = 0; this = THIS; GF_ASSERT (this); @@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; } + if (dict_get(dict, "attach-tier")) { + gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier"); + glusterd_op_perform_attach_tier (dict, volinfo, count, bricks); + } + ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); @@ -1829,6 +1924,14 @@ out: return ret; } +static void +glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo) +{ + volinfo->type = volinfo->tier_info.cold_type; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; +} + int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { @@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) force = 1; break; + case GF_OP_CMD_DETACH: + glusterd_op_perform_detach_tier (volinfo); + /* fall through */ + case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { @@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->sub_count = replica_count; volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); - if (replica_count == 1) { + /* + * volinfo->type and sub_count have already been set for + * volumes undergoing a detach operation, they should not + * be modified here. + */ + if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) { if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { volinfo->type = GF_CLUSTER_TYPE_NONE; /* backward compatibility */ @@ -2224,3 +2336,16 @@ out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + +int +glusterd_handle_attach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 77fa96400ba..a41b36b9715 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA}, [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 75756518f28..c5fcb7698e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin char name[1024] = {0,}; gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; this = THIS; GF_ASSERT (this); @@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin ret = dict_get_str (dict, "volname", &volname); if (ret) goto out; - snprintf (name, 1024, "%s-dht",volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + snprintf (name, 1024, "tier-dht"); + else + snprintf (name, 1024, "%s-dht", volname); brick_req->name = gf_strdup (name); break; @@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, while ( i <= count) { snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); @@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, &brickinfo); + if (ret) goto out; + if (glusterd_is_brick_started (brickinfo)) { pending_node = GF_CALLOC (1, sizeof (*pending_node), gf_gld_mt_pending_node_t); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index ba67df436ff..0d66571300f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "--xlator-option", "*replicate*.readdir-failover=off", "--xlator-option", "*dht.readdir-optimize=on", NULL); + + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, + "*tier-dht.xattr-name=trusted.tier-gfid"); + } + runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); runner_add_arg (&runner, "--xlator-option"); @@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) goto out; if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_STOP)) { ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, dict, msg, sizeof (msg)); @@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_TIER: /* Check if the connected clients are all of version * glusterfs-3.6 and higher. This is needed to prevent some data * loss issues that could occur when older clients are connected @@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations other than * start */ - if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (cmd == GF_DEFRAG_CMD_STATUS || + cmd == GF_DEFRAG_CMD_STOP || + cmd == GF_DEFRAG_CMD_STATUS_TIER) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ctx = glusterd_op_get_ctx (); if (!ctx) { @@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + case GF_DEFRAG_CMD_START_TIER: /* Reset defrag status to 'NOT STARTED' whenever a * remove-brick/rebalance command is issued to remove * stale information from previous run. @@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) break; case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STATUS_TIER: break; default: break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 5b2b14503ae..5696229572d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -812,6 +812,63 @@ out: " for volume %s", volinfo->volname); return ret; } + +int32_t +glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + char buf[PATH_MAX] = ""; + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + ret = 0; + goto out; + } + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", + volinfo->tier_info.cold_replica_count); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf); + if (ret) + goto out; + + out: + return ret; +} + int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { @@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + ret = glusterd_volume_write_tier_details (fd, volinfo); + ret = glusterd_volume_write_snap_details (fd, volinfo); out: @@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + strlen (key))) { + volinfo->tier_info.cold_disperse_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE, + strlen (key))) { + volinfo->tier_info.hot_type = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE, + strlen (key))) { + volinfo->tier_info.cold_type = atoi (value); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo->redundancy_count > 0); break; + case GF_CLUSTER_TYPE_TIER: + break; + default: GF_ASSERT (0); break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index afa96be77cf..45ed86a4163 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" +#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type" +#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count" +#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count" +#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count" +#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type" +#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count" +#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count" + #define GLUSTERD_STORE_KEY_SNAP_NAME "name" #define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" #define GLUSTERD_STORE_KEY_SNAP_DESC "desc" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 727a19d24d1..27357955fe8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, new_volinfo->sub_count = volinfo->sub_count; new_volinfo->transport_type = volinfo->transport_type; new_volinfo->brick_count = volinfo->brick_count; + new_volinfo->tier_info = volinfo->tier_info; dict_copy (volinfo->dict, new_volinfo->dict); dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 114e57485fc..79da432bafe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, out: return ret; } - static int brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2712,24 +2711,22 @@ out: } static int -volgen_graph_build_clusters (volgen_graph_t *graph, - glusterd_volinfo_t *volinfo, char *xl_type, - char *xl_namefmt, size_t child_count, - size_t sub_count) +volgen_link_bricks (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count, + xlator_t *trav) { int i = 0; int j = 0; - xlator_t *txl = NULL; xlator_t *xl = NULL; - xlator_t *trav = NULL; char *volname = NULL; int ret = -1; if (child_count == 0) goto out; volname = volinfo->volname; - txl = first_of (graph); - for (trav = txl; --child_count; trav = trav->next); + for (;; trav = trav->prev) { if ((i % sub_count) == 0) { xl = volgen_graph_add_nolink (graph, xl_type, @@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph, if (ret) goto out; - if (trav == txl) - break; - i++; + if (i == child_count) + break; } ret = j; @@ -2756,6 +2752,46 @@ out: return ret; } +static int +volgen_link_bricks_from_list_tail (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + size_t cnt = child_count; + + for (trav = first_of(graph); --cnt; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + +static int +volgen_link_bricks_from_list_head (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + + for (trav = first_of(graph); trav->next; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + /** * This is the build graph function for user-serviceable snapshots. * Generates snapview-client @@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph, else name_fmt = "%s-dht"; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, voltype, name_fmt, child_count, @@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph, xlator_t *ec = NULL; char option[32] = {0}; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, disperse_args[0], disperse_args[1], volinfo->brick_count, @@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, { char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; + char *tier_args[] = {"cluster/tier", + "%s-tier-%d"}; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; + char *disperse_args[] = {"cluster/disperse", + "%s-disperse-%d"}; + char option[32] = ""; int rclusters = 0; int clusters = 0; int dist_count = 0; int ret = -1; + xlator_t *ec = NULL; + xlator_t *client = NULL; if (!volinfo->dist_leaf_count) goto out; @@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, goto out; break; case GF_CLUSTER_TYPE_STRIPE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], volinfo->brick_count, @@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (clusters < 0) goto out; break; + case GF_CLUSTER_TYPE_TIER: + ret = volgen_link_bricks_from_list_head (graph, volinfo, + tier_args[0], + tier_args[1], + volinfo->brick_count, + volinfo->replica_count); + break; case GF_CLUSTER_TYPE_STRIPE_REPLICATE: /* Replicate after the clients, then stripe */ if (volinfo->replica_count == 0) goto out; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, rclusters = volinfo->brick_count / volinfo->replica_count; GF_ASSERT (rclusters == clusters); - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], rclusters, @@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, "log-buf-size option"); ret = volgen_graph_set_options_generic (graph, set_dict, "client", - &log_flush_timeout_option_handler); + &log_flush_timeout_option_handler); if (ret) gf_log (this->name, GF_LOG_WARNING, "Failed to change " "log-flush-timeout option"); @@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, } static int +volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + gf_boolean_t is_quotad) +{ + int ret = -1; + xlator_t *root; + xlator_t *xl, *hxl, *cxl; + glusterd_brickinfo_t *brick = NULL; + char *rule; + int st_brick_count = 0; + int st_replica_count = 0; + int st_disperse_count = 0; + int st_dist_leaf_count = 0; + int st_type = 0; + char st_volname[GD_VOLUME_NAME_MAX]; + int dist_count = 0; + + st_brick_count = volinfo->brick_count; + st_replica_count = volinfo->replica_count; + st_disperse_count = volinfo->disperse_count; + st_type = volinfo->type; + st_dist_leaf_count = volinfo->dist_leaf_count; + strcpy(st_volname, volinfo->volname); + + volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count; + volinfo->brick_count = volinfo->tier_info.cold_brick_count; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; + volinfo->type = volinfo->tier_info.cold_type; + sprintf (volinfo->volname, "%s-cold", st_volname); + + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret) + goto out; + cxl = first_of(graph); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + volinfo->brick_count = volinfo->tier_info.hot_brick_count; + volinfo->replica_count = volinfo->tier_info.hot_replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + volinfo->disperse_count = 0; + + sprintf (volinfo->volname, "%s-hot", st_volname); + + if (volinfo->dist_leaf_count == 1) { + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + ret = volgen_link_bricks_from_list_head (graph, volinfo, + "cluster/distribute", + "%s-dht", + dist_count, + dist_count); + } else { + ret = volume_volgen_graph_build_clusters (graph, + volinfo, + _gf_false); + } + + hxl = first_of(graph); + + xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s", + "tier-dht", 0); + gf_asprintf(&rule, "%s-hot-dht", st_volname); + xlator_set_option(xl, "rule", rule); + xlator_set_option(xl, "xattr-name", "trusted.tier-gfid"); + + ret = volgen_xlator_link (xl, cxl); + ret = volgen_xlator_link (xl, hxl); + + st_type = GF_CLUSTER_TYPE_TIER; + + out: + volinfo->brick_count = st_brick_count; + volinfo->replica_count = st_replica_count; + volinfo->disperse_count = st_disperse_count; + volinfo->type = st_type; + volinfo->dist_leaf_count = st_dist_leaf_count; + strcpy(volinfo->volname, st_volname); + + return ret; +} + +static int client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) { @@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, GF_ASSERT (conf); volname = volinfo->volname; - ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); + ret = volgen_graph_build_clients (graph, volinfo, set_dict, + param); if (ret) goto out; - ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false); + else + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret == -1) goto out; @@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph, char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; - return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate", + return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate", "%s-replicate-%d", volinfo->brick_count, volinfo->replica_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ae866b7ccfc..ada814bb25d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "features/trash", .op_version = GD_OP_VERSION_3_7_0, }, + + /* tier translator - global tunables */ + { .key = "cluster.write-freq-thresold", + .voltype = "cluster/tier", + .option = "write-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-freq-thresold", + .voltype = "cluster/tier", + .option = "read-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-promote-frequency", + .voltype = "cluster/tier", + .option = "tier-promote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-demote-frequency", + .voltype = "cluster/tier", + .option = "tier-demote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = "features.ctr-enabled", .voltype = "features/changetimerecorder", .value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ad280eda053..bac1598598b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -302,9 +302,6 @@ typedef struct tier_info_ { int hot_type; int hot_brick_count; int hot_replica_count; - int hot_disperse_count; - /*Commented for now Dan's DHT Tier patch will have it*/ - /*tier_group_t *root;*/ } gd_tier_info_t; struct glusterd_volinfo_ { @@ -814,6 +811,12 @@ int glusterd_handle_add_brick (rpcsvc_request_t *req); int +glusterd_handle_attach_tier (rpcsvc_request_t *req); + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req); + +int glusterd_handle_replace_brick (rpcsvc_request_t *req); int |