diff options
author | Dan Lambright <dlambrig@redhat.com> | 2015-02-25 16:11:23 -0500 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-19 06:32:28 -0700 |
commit | 6f71bc02df5bd177c2f5dbf4e54b2af1525ab979 (patch) | |
tree | a676a70da909dedebc21dca408fafc9dee9d5810 /xlators/mgmt/glusterd | |
parent | 99586305f66d6b5e81542139d84fbf111ace2554 (diff) |
glusterd: CLI commands to create and manage tiered volumes.
A tiered volume is a normal volume with some number of new bricks
representing "hot" storage. The "hot" bricks can be attached or
detached dynamically to a normal volume. When this happens, a new graph
is constructed. The root of the new graph is an instance of the tier
translator. One subvolume of the tier translator leads to the old volume,
and another leads to the new hot bricks.
attach-tier <VOLNAME> [<replica> <COUNT>] <NEW-BRICK> ... [force]
volume detach-tier <VOLNAME> [replica <COUNT>] <BRICK>
... <start|stop|status|commit|force>
gluster volume rebalance <volume> tier start
gluster volume rebalance <volume> tier stop
gluster volume rebalance <volume> tier status
The "tier start" CLI command starts a server side daemon. The daemon
initiates file level migration based on caching policies. The daemon's
status can be monitored and stopped.
Note development on the "tier status" command is incomplete. It will be
added in a subsequent patch.
When the "hot" storage is detached, the tier translator is removed
from the graph and the tiered volume reverts to its original state as
described in the volume's info file.
For more background and design see the feature page [1].
[1]
http://www.gluster.org/community/documentation/index.php/Features/data-classification
Change-Id: Ic8042ce37327b850b9e199236e5be3dae95d2472
BUG: 1194753
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/9753
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 135 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 15 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 83 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 181 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 |
10 files changed, 438 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index fd4618bb78c..fa5e533f135 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_nodes = 0; switch (volinfo->type) { + case GF_CLUSTER_TYPE_TIER: + ret = 1; + goto out; + case GF_CLUSTER_TYPE_NONE: case GF_CLUSTER_TYPE_STRIPE: case GF_CLUSTER_TYPE_DISPERSE: @@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) int32_t replica_count = 0; int32_t stripe_count = 0; int type = 0; - this = THIS; GF_ASSERT(this); @@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) total_bricks = volinfo->brick_count + brick_count; + if (dict_get (dict, "attach-tier")) { + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + snprintf (err_str, sizeof (err_str), + "Volume %s is already a tier.", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + goto brick_val; + } + if (!stripe_count && !replica_count) { if (volinfo->type == GF_CLUSTER_TYPE_NONE) goto brick_val; @@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols) GF_FREE (subvols); } +static int +glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo) +{ + char key[256] = {0,}; + char value[256] = {0,}; + int brick_num = 0; + int hot_brick_num = 0; + glusterd_brickinfo_t *brickinfo; + int ret = 0; + + /* cold tier bricks at tail of list so use reverse iteration */ + cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks, + brick_list) { + brick_num++; + if (brick_num > volinfo->tier_info.cold_brick_count) { + hot_brick_num++; + sprintf (key, "brick%d", hot_brick_num); + snprintf (value, 256, "%s:%s", + brickinfo->hostname, + brickinfo->path); + + ret = dict_set_str (dict, key, strdup(value)); + if (ret) + break; + } + } + + ret = dict_set_int32(dict, "count", hot_brick_num); + if (ret) + return -1; + + return hot_brick_num; +} + int __glusterd_handle_remove_brick (rpcsvc_request_t *req) { @@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) /* Do not allow remove-brick if the bricks given is less than the replica count or stripe count */ - if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { + if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->type != GF_CLUSTER_TYPE_TIER)) { if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) { snprintf (err_str, sizeof (err_str), "Remove brick " @@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + strcpy (brick_list, " "); if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && @@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + count = glusterd_set_detach_bricks(dict, volinfo); + while ( i <= count) { snprintf (key, sizeof (key), "brick%d", i); ret = dict_get_str (dict, key, &brick); @@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); + if (ret) { snprintf (err_str, sizeof (err_str), "Incorrect brick " "%s for volume %s", brick, volname); @@ -883,7 +937,8 @@ out: } - GF_FREE (brick_list); + if (brick_list) + GF_FREE (brick_list); subvol_matcher_destroy (subvols); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, ret = glusterd_resolve_brick (brickinfo); if (ret) goto out; - if (stripe_count || replica_count) { + + /* hot tier bricks are added to head of brick list */ + if (dict_get (dict, "attach-tier")) { + cds_list_add (&brickinfo->brick_list, &volinfo->bricks); + } else if (stripe_count || replica_count) { add_brick_at_right_order (brickinfo, volinfo, (i - 1), stripe_count, replica_count); } else { @@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) break; + case GF_OP_CMD_DETACH: case GF_OP_CMD_COMMIT_FORCE: break; } @@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, return ret; } +static int +glusterd_op_perform_attach_tier (dict_t *dict, + glusterd_volinfo_t *volinfo, + int count, + char *bricks) +{ + int ret = 0; + int replica_count = 0; + + /* + * Store the new (cold) tier's structure until the graph is generated. + * If there is a failure before the graph is generated the + * structure will revert to its original state. + */ + volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count; + volinfo->tier_info.cold_type = volinfo->type; + volinfo->tier_info.cold_brick_count = volinfo->brick_count; + volinfo->tier_info.cold_replica_count = volinfo->replica_count; + volinfo->tier_info.cold_disperse_count = volinfo->disperse_count; + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) + volinfo->tier_info.hot_replica_count = replica_count; + else + volinfo->tier_info.hot_replica_count = 1; + volinfo->tier_info.hot_brick_count = count; + + return ret; +} int glusterd_op_add_brick (dict_t *dict, char **op_errstr) @@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) xlator_t *this = NULL; char *bricks = NULL; int32_t count = 0; + int32_t replica_count = 0; this = THIS; GF_ASSERT (this); @@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; } + if (dict_get(dict, "attach-tier")) { + gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier"); + glusterd_op_perform_attach_tier (dict, volinfo, count, bricks); + } + ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); @@ -1829,6 +1924,14 @@ out: return ret; } +static void +glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo) +{ + volinfo->type = volinfo->tier_info.cold_type; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; +} + int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { @@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) force = 1; break; + case GF_OP_CMD_DETACH: + glusterd_op_perform_detach_tier (volinfo); + /* fall through */ + case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { @@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->sub_count = replica_count; volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); - if (replica_count == 1) { + /* + * volinfo->type and sub_count have already been set for + * volumes undergoing a detach operation, they should not + * be modified here. + */ + if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) { if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { volinfo->type = GF_CLUSTER_TYPE_NONE; /* backward compatibility */ @@ -2224,3 +2336,16 @@ out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + +int +glusterd_handle_attach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 77fa96400ba..a41b36b9715 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA}, [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 75756518f28..c5fcb7698e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin char name[1024] = {0,}; gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; this = THIS; GF_ASSERT (this); @@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin ret = dict_get_str (dict, "volname", &volname); if (ret) goto out; - snprintf (name, 1024, "%s-dht",volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + snprintf (name, 1024, "tier-dht"); + else + snprintf (name, 1024, "%s-dht", volname); brick_req->name = gf_strdup (name); break; @@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, while ( i <= count) { snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); @@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, &brickinfo); + if (ret) goto out; + if (glusterd_is_brick_started (brickinfo)) { pending_node = GF_CALLOC (1, sizeof (*pending_node), gf_gld_mt_pending_node_t); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index ba67df436ff..0d66571300f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "--xlator-option", "*replicate*.readdir-failover=off", "--xlator-option", "*dht.readdir-optimize=on", NULL); + + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, + "*tier-dht.xattr-name=trusted.tier-gfid"); + } + runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); runner_add_arg (&runner, "--xlator-option"); @@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) goto out; if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_STOP)) { ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, dict, msg, sizeof (msg)); @@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_TIER: /* Check if the connected clients are all of version * glusterfs-3.6 and higher. This is needed to prevent some data * loss issues that could occur when older clients are connected @@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations other than * start */ - if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (cmd == GF_DEFRAG_CMD_STATUS || + cmd == GF_DEFRAG_CMD_STOP || + cmd == GF_DEFRAG_CMD_STATUS_TIER) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ctx = glusterd_op_get_ctx (); if (!ctx) { @@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + case GF_DEFRAG_CMD_START_TIER: /* Reset defrag status to 'NOT STARTED' whenever a * remove-brick/rebalance command is issued to remove * stale information from previous run. @@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) break; case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STATUS_TIER: break; default: break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 5b2b14503ae..5696229572d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -812,6 +812,63 @@ out: " for volume %s", volinfo->volname); return ret; } + +int32_t +glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + char buf[PATH_MAX] = ""; + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + ret = 0; + goto out; + } + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", + volinfo->tier_info.cold_replica_count); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf); + if (ret) + goto out; + + out: + return ret; +} + int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { @@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + ret = glusterd_volume_write_tier_details (fd, volinfo); + ret = glusterd_volume_write_snap_details (fd, volinfo); out: @@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + strlen (key))) { + volinfo->tier_info.cold_disperse_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE, + strlen (key))) { + volinfo->tier_info.hot_type = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE, + strlen (key))) { + volinfo->tier_info.cold_type = atoi (value); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo->redundancy_count > 0); break; + case GF_CLUSTER_TYPE_TIER: + break; + default: GF_ASSERT (0); break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index afa96be77cf..45ed86a4163 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" +#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type" +#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count" +#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count" +#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count" +#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type" +#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count" +#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count" + #define GLUSTERD_STORE_KEY_SNAP_NAME "name" #define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" #define GLUSTERD_STORE_KEY_SNAP_DESC "desc" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 727a19d24d1..27357955fe8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, new_volinfo->sub_count = volinfo->sub_count; new_volinfo->transport_type = volinfo->transport_type; new_volinfo->brick_count = volinfo->brick_count; + new_volinfo->tier_info = volinfo->tier_info; dict_copy (volinfo->dict, new_volinfo->dict); dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 114e57485fc..79da432bafe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, out: return ret; } - static int brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2712,24 +2711,22 @@ out: } static int -volgen_graph_build_clusters (volgen_graph_t *graph, - glusterd_volinfo_t *volinfo, char *xl_type, - char *xl_namefmt, size_t child_count, - size_t sub_count) +volgen_link_bricks (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count, + xlator_t *trav) { int i = 0; int j = 0; - xlator_t *txl = NULL; xlator_t *xl = NULL; - xlator_t *trav = NULL; char *volname = NULL; int ret = -1; if (child_count == 0) goto out; volname = volinfo->volname; - txl = first_of (graph); - for (trav = txl; --child_count; trav = trav->next); + for (;; trav = trav->prev) { if ((i % sub_count) == 0) { xl = volgen_graph_add_nolink (graph, xl_type, @@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph, if (ret) goto out; - if (trav == txl) - break; - i++; + if (i == child_count) + break; } ret = j; @@ -2756,6 +2752,46 @@ out: return ret; } +static int +volgen_link_bricks_from_list_tail (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + size_t cnt = child_count; + + for (trav = first_of(graph); --cnt; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + +static int +volgen_link_bricks_from_list_head (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + + for (trav = first_of(graph); trav->next; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + /** * This is the build graph function for user-serviceable snapshots. * Generates snapview-client @@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph, else name_fmt = "%s-dht"; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, voltype, name_fmt, child_count, @@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph, xlator_t *ec = NULL; char option[32] = {0}; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, disperse_args[0], disperse_args[1], volinfo->brick_count, @@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, { char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; + char *tier_args[] = {"cluster/tier", + "%s-tier-%d"}; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; + char *disperse_args[] = {"cluster/disperse", + "%s-disperse-%d"}; + char option[32] = ""; int rclusters = 0; int clusters = 0; int dist_count = 0; int ret = -1; + xlator_t *ec = NULL; + xlator_t *client = NULL; if (!volinfo->dist_leaf_count) goto out; @@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, goto out; break; case GF_CLUSTER_TYPE_STRIPE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], volinfo->brick_count, @@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (clusters < 0) goto out; break; + case GF_CLUSTER_TYPE_TIER: + ret = volgen_link_bricks_from_list_head (graph, volinfo, + tier_args[0], + tier_args[1], + volinfo->brick_count, + volinfo->replica_count); + break; case GF_CLUSTER_TYPE_STRIPE_REPLICATE: /* Replicate after the clients, then stripe */ if (volinfo->replica_count == 0) goto out; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, rclusters = volinfo->brick_count / volinfo->replica_count; GF_ASSERT (rclusters == clusters); - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], rclusters, @@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, "log-buf-size option"); ret = volgen_graph_set_options_generic (graph, set_dict, "client", - &log_flush_timeout_option_handler); + &log_flush_timeout_option_handler); if (ret) gf_log (this->name, GF_LOG_WARNING, "Failed to change " "log-flush-timeout option"); @@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, } static int +volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + gf_boolean_t is_quotad) +{ + int ret = -1; + xlator_t *root; + xlator_t *xl, *hxl, *cxl; + glusterd_brickinfo_t *brick = NULL; + char *rule; + int st_brick_count = 0; + int st_replica_count = 0; + int st_disperse_count = 0; + int st_dist_leaf_count = 0; + int st_type = 0; + char st_volname[GD_VOLUME_NAME_MAX]; + int dist_count = 0; + + st_brick_count = volinfo->brick_count; + st_replica_count = volinfo->replica_count; + st_disperse_count = volinfo->disperse_count; + st_type = volinfo->type; + st_dist_leaf_count = volinfo->dist_leaf_count; + strcpy(st_volname, volinfo->volname); + + volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count; + volinfo->brick_count = volinfo->tier_info.cold_brick_count; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; + volinfo->type = volinfo->tier_info.cold_type; + sprintf (volinfo->volname, "%s-cold", st_volname); + + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret) + goto out; + cxl = first_of(graph); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + volinfo->brick_count = volinfo->tier_info.hot_brick_count; + volinfo->replica_count = volinfo->tier_info.hot_replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + volinfo->disperse_count = 0; + + sprintf (volinfo->volname, "%s-hot", st_volname); + + if (volinfo->dist_leaf_count == 1) { + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + ret = volgen_link_bricks_from_list_head (graph, volinfo, + "cluster/distribute", + "%s-dht", + dist_count, + dist_count); + } else { + ret = volume_volgen_graph_build_clusters (graph, + volinfo, + _gf_false); + } + + hxl = first_of(graph); + + xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s", + "tier-dht", 0); + gf_asprintf(&rule, "%s-hot-dht", st_volname); + xlator_set_option(xl, "rule", rule); + xlator_set_option(xl, "xattr-name", "trusted.tier-gfid"); + + ret = volgen_xlator_link (xl, cxl); + ret = volgen_xlator_link (xl, hxl); + + st_type = GF_CLUSTER_TYPE_TIER; + + out: + volinfo->brick_count = st_brick_count; + volinfo->replica_count = st_replica_count; + volinfo->disperse_count = st_disperse_count; + volinfo->type = st_type; + volinfo->dist_leaf_count = st_dist_leaf_count; + strcpy(volinfo->volname, st_volname); + + return ret; +} + +static int client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) { @@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, GF_ASSERT (conf); volname = volinfo->volname; - ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); + ret = volgen_graph_build_clients (graph, volinfo, set_dict, + param); if (ret) goto out; - ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false); + else + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret == -1) goto out; @@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph, char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; - return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate", + return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate", "%s-replicate-%d", volinfo->brick_count, volinfo->replica_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ae866b7ccfc..ada814bb25d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "features/trash", .op_version = GD_OP_VERSION_3_7_0, }, + + /* tier translator - global tunables */ + { .key = "cluster.write-freq-thresold", + .voltype = "cluster/tier", + .option = "write-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-freq-thresold", + .voltype = "cluster/tier", + .option = "read-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-promote-frequency", + .voltype = "cluster/tier", + .option = "tier-promote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-demote-frequency", + .voltype = "cluster/tier", + .option = "tier-demote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = "features.ctr-enabled", .voltype = "features/changetimerecorder", .value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ad280eda053..bac1598598b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -302,9 +302,6 @@ typedef struct tier_info_ { int hot_type; int hot_brick_count; int hot_replica_count; - int hot_disperse_count; - /*Commented for now Dan's DHT Tier patch will have it*/ - /*tier_group_t *root;*/ } gd_tier_info_t; struct glusterd_volinfo_ { @@ -814,6 +811,12 @@ int glusterd_handle_add_brick (rpcsvc_request_t *req); int +glusterd_handle_attach_tier (rpcsvc_request_t *req); + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req); + +int glusterd_handle_replace_brick (rpcsvc_request_t *req); int |