diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 135 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 15 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 83 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 181 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 26 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 9 |
10 files changed, 438 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index fd4618bb78c..fa5e533f135 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_nodes = 0; switch (volinfo->type) { + case GF_CLUSTER_TYPE_TIER: + ret = 1; + goto out; + case GF_CLUSTER_TYPE_NONE: case GF_CLUSTER_TYPE_STRIPE: case GF_CLUSTER_TYPE_DISPERSE: @@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) int32_t replica_count = 0; int32_t stripe_count = 0; int type = 0; - this = THIS; GF_ASSERT(this); @@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req) total_bricks = volinfo->brick_count + brick_count; + if (dict_get (dict, "attach-tier")) { + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + snprintf (err_str, sizeof (err_str), + "Volume %s is already a tier.", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + goto brick_val; + } + if (!stripe_count && !replica_count) { if (volinfo->type == GF_CLUSTER_TYPE_NONE) goto brick_val; @@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols) GF_FREE (subvols); } +static int +glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo) +{ + char key[256] = {0,}; + char value[256] = {0,}; + int brick_num = 0; + int hot_brick_num = 0; + glusterd_brickinfo_t *brickinfo; + int ret = 0; + + /* cold tier bricks at tail of list so use reverse iteration */ + cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks, + brick_list) { + brick_num++; + if (brick_num > volinfo->tier_info.cold_brick_count) { + hot_brick_num++; + sprintf (key, "brick%d", hot_brick_num); + snprintf (value, 256, "%s:%s", + brickinfo->hostname, + brickinfo->path); + + ret = dict_set_str (dict, key, strdup(value)); + if (ret) + break; + } + } + + ret = dict_set_int32(dict, "count", hot_brick_num); + if (ret) + return -1; + + return hot_brick_num; +} + int __glusterd_handle_remove_brick (rpcsvc_request_t *req) { @@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) /* Do not allow remove-brick if the bricks given is less than the replica count or stripe count */ - if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { + if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->type != GF_CLUSTER_TYPE_TIER)) { if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) { snprintf (err_str, sizeof (err_str), "Remove brick " @@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + strcpy (brick_list, " "); if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && @@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) goto out; } + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + count = glusterd_set_detach_bricks(dict, volinfo); + while ( i <= count) { snprintf (key, sizeof (key), "brick%d", i); ret = dict_get_str (dict, key, &brick); @@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); + if (ret) { snprintf (err_str, sizeof (err_str), "Incorrect brick " "%s for volume %s", brick, volname); @@ -883,7 +937,8 @@ out: } - GF_FREE (brick_list); + if (brick_list) + GF_FREE (brick_list); subvol_matcher_destroy (subvols); free (cli_req.dict.dict_val); //its malloced by xdr @@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, ret = glusterd_resolve_brick (brickinfo); if (ret) goto out; - if (stripe_count || replica_count) { + + /* hot tier bricks are added to head of brick list */ + if (dict_get (dict, "attach-tier")) { + cds_list_add (&brickinfo->brick_list, &volinfo->bricks); + } else if (stripe_count || replica_count) { add_brick_at_right_order (brickinfo, volinfo, (i - 1), stripe_count, replica_count); } else { @@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) break; + case GF_OP_CMD_DETACH: case GF_OP_CMD_COMMIT_FORCE: break; } @@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, return ret; } +static int +glusterd_op_perform_attach_tier (dict_t *dict, + glusterd_volinfo_t *volinfo, + int count, + char *bricks) +{ + int ret = 0; + int replica_count = 0; + + /* + * Store the new (cold) tier's structure until the graph is generated. + * If there is a failure before the graph is generated the + * structure will revert to its original state. + */ + volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count; + volinfo->tier_info.cold_type = volinfo->type; + volinfo->tier_info.cold_brick_count = volinfo->brick_count; + volinfo->tier_info.cold_replica_count = volinfo->replica_count; + volinfo->tier_info.cold_disperse_count = volinfo->disperse_count; + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) + volinfo->tier_info.hot_replica_count = replica_count; + else + volinfo->tier_info.hot_replica_count = 1; + volinfo->tier_info.hot_brick_count = count; + + return ret; +} int glusterd_op_add_brick (dict_t *dict, char **op_errstr) @@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) xlator_t *this = NULL; char *bricks = NULL; int32_t count = 0; + int32_t replica_count = 0; this = THIS; GF_ASSERT (this); @@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr) goto out; } + if (dict_get(dict, "attach-tier")) { + gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier"); + glusterd_op_perform_attach_tier (dict, volinfo, count, bricks); + } + ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); @@ -1829,6 +1924,14 @@ out: return ret; } +static void +glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo) +{ + volinfo->type = volinfo->tier_info.cold_type; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; +} + int glusterd_op_remove_brick (dict_t *dict, char **op_errstr) { @@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) force = 1; break; + case GF_OP_CMD_DETACH: + glusterd_op_perform_detach_tier (volinfo); + /* fall through */ + case GF_OP_CMD_COMMIT_FORCE: if (volinfo->decommission_in_progress) { @@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->sub_count = replica_count; volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); - if (replica_count == 1) { + /* + * volinfo->type and sub_count have already been set for + * volumes undergoing a detach operation, they should not + * be modified here. + */ + if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) { if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { volinfo->type = GF_CLUSTER_TYPE_NONE; /* backward compatibility */ @@ -2224,3 +2336,16 @@ out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } + +int +glusterd_handle_attach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 77fa96400ba..a41b36b9715 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = { [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA}, [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 75756518f28..c5fcb7698e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin char name[1024] = {0,}; gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID; xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; this = THIS; GF_ASSERT (this); @@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin ret = dict_get_str (dict, "volname", &volname); if (ret) goto out; - snprintf (name, 1024, "%s-dht",volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + snprintf (name, 1024, "tier-dht"); + else + snprintf (name, 1024, "%s-dht", volname); brick_req->name = gf_strdup (name); break; @@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, while ( i <= count) { snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); @@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, &brickinfo); + if (ret) goto out; + if (glusterd_is_brick_started (brickinfo)) { pending_node = GF_CALLOC (1, sizeof (*pending_node), gf_gld_mt_pending_node_t); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index ba67df436ff..0d66571300f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, "--xlator-option", "*replicate*.readdir-failover=off", "--xlator-option", "*dht.readdir-optimize=on", NULL); + + if (volinfo->type == GF_CLUSTER_TYPE_TIER) { + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, + "*tier-dht.xattr-name=trusted.tier-gfid"); + } + runner_add_arg (&runner, "--xlator-option"); runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); runner_add_arg (&runner, "--xlator-option"); @@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req) goto out; if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STATUS_TIER) || (cmd == GF_DEFRAG_CMD_STOP)) { ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, dict, msg, sizeof (msg)); @@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) switch (cmd) { case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_TIER: /* Check if the connected clients are all of version * glusterfs-3.6 and higher. This is needed to prevent some data * loss issues that could occur when older clients are connected @@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations other than * start */ - if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (cmd == GF_DEFRAG_CMD_STATUS || + cmd == GF_DEFRAG_CMD_STOP || + cmd == GF_DEFRAG_CMD_STATUS_TIER) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ctx = glusterd_op_get_ctx (); if (!ctx) { @@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + case GF_DEFRAG_CMD_START_TIER: /* Reset defrag status to 'NOT STARTED' whenever a * remove-brick/rebalance command is issued to remove * stale information from previous run. @@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) break; case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STATUS_TIER: break; default: break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 5b2b14503ae..5696229572d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -812,6 +812,63 @@ out: " for volume %s", volinfo->volname); return ret; } + +int32_t +glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + char buf[PATH_MAX] = ""; + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + ret = 0; + goto out; + } + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", + volinfo->tier_info.cold_replica_count); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf); + if (ret) + goto out; + + out: + return ret; +} + int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { @@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + ret = glusterd_volume_write_tier_details (fd, volinfo); + ret = glusterd_volume_write_snap_details (fd, volinfo); out: @@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT, + strlen (key))) { + volinfo->tier_info.cold_disperse_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT, + strlen (key))) { + volinfo->tier_info.cold_brick_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT, + strlen (key))) { + volinfo->tier_info.cold_replica_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE, + strlen (key))) { + volinfo->tier_info.hot_type = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE, + strlen (key))) { + volinfo->tier_info.cold_type = atoi (value); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo->redundancy_count > 0); break; + case GF_CLUSTER_TYPE_TIER: + break; + default: GF_ASSERT (0); break; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index afa96be77cf..45ed86a4163 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" +#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type" +#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count" +#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count" +#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count" +#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type" +#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count" +#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count" + #define GLUSTERD_STORE_KEY_SNAP_NAME "name" #define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" #define GLUSTERD_STORE_KEY_SNAP_DESC "desc" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 727a19d24d1..27357955fe8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, new_volinfo->sub_count = volinfo->sub_count; new_volinfo->transport_type = volinfo->transport_type; new_volinfo->brick_count = volinfo->brick_count; + new_volinfo->tier_info = volinfo->tier_info; dict_copy (volinfo->dict, new_volinfo->dict); dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 114e57485fc..79da432bafe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, out: return ret; } - static int brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2712,24 +2711,22 @@ out: } static int -volgen_graph_build_clusters (volgen_graph_t *graph, - glusterd_volinfo_t *volinfo, char *xl_type, - char *xl_namefmt, size_t child_count, - size_t sub_count) +volgen_link_bricks (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count, + xlator_t *trav) { int i = 0; int j = 0; - xlator_t *txl = NULL; xlator_t *xl = NULL; - xlator_t *trav = NULL; char *volname = NULL; int ret = -1; if (child_count == 0) goto out; volname = volinfo->volname; - txl = first_of (graph); - for (trav = txl; --child_count; trav = trav->next); + for (;; trav = trav->prev) { if ((i % sub_count) == 0) { xl = volgen_graph_add_nolink (graph, xl_type, @@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph, if (ret) goto out; - if (trav == txl) - break; - i++; + if (i == child_count) + break; } ret = j; @@ -2756,6 +2752,46 @@ out: return ret; } +static int +volgen_link_bricks_from_list_tail (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + size_t cnt = child_count; + + for (trav = first_of(graph); --cnt; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + +static int +volgen_link_bricks_from_list_head (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + xlator_t *trav = NULL; + + for (trav = first_of(graph); trav->next; trav = trav->next) + ; + + return volgen_link_bricks (graph, volinfo, + xl_type, + xl_namefmt, + child_count, + sub_count, + trav); +} + /** * This is the build graph function for user-serviceable snapshots. * Generates snapview-client @@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph, else name_fmt = "%s-dht"; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, voltype, name_fmt, child_count, @@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph, xlator_t *ec = NULL; char option[32] = {0}; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, disperse_args[0], disperse_args[1], volinfo->brick_count, @@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, { char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; + char *tier_args[] = {"cluster/tier", + "%s-tier-%d"}; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; + char *disperse_args[] = {"cluster/disperse", + "%s-disperse-%d"}; + char option[32] = ""; int rclusters = 0; int clusters = 0; int dist_count = 0; int ret = -1; + xlator_t *ec = NULL; + xlator_t *client = NULL; if (!volinfo->dist_leaf_count) goto out; @@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, goto out; break; case GF_CLUSTER_TYPE_STRIPE: - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], volinfo->brick_count, @@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (clusters < 0) goto out; break; + case GF_CLUSTER_TYPE_TIER: + ret = volgen_link_bricks_from_list_head (graph, volinfo, + tier_args[0], + tier_args[1], + volinfo->brick_count, + volinfo->replica_count); + break; case GF_CLUSTER_TYPE_STRIPE_REPLICATE: /* Replicate after the clients, then stripe */ if (volinfo->replica_count == 0) goto out; - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, replicate_args[0], replicate_args[1], volinfo->brick_count, @@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, rclusters = volinfo->brick_count / volinfo->replica_count; GF_ASSERT (rclusters == clusters); - clusters = volgen_graph_build_clusters (graph, volinfo, + clusters = volgen_link_bricks_from_list_tail (graph, volinfo, stripe_args[0], stripe_args[1], rclusters, @@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, "log-buf-size option"); ret = volgen_graph_set_options_generic (graph, set_dict, "client", - &log_flush_timeout_option_handler); + &log_flush_timeout_option_handler); if (ret) gf_log (this->name, GF_LOG_WARNING, "Failed to change " "log-flush-timeout option"); @@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph, } static int +volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + gf_boolean_t is_quotad) +{ + int ret = -1; + xlator_t *root; + xlator_t *xl, *hxl, *cxl; + glusterd_brickinfo_t *brick = NULL; + char *rule; + int st_brick_count = 0; + int st_replica_count = 0; + int st_disperse_count = 0; + int st_dist_leaf_count = 0; + int st_type = 0; + char st_volname[GD_VOLUME_NAME_MAX]; + int dist_count = 0; + + st_brick_count = volinfo->brick_count; + st_replica_count = volinfo->replica_count; + st_disperse_count = volinfo->disperse_count; + st_type = volinfo->type; + st_dist_leaf_count = volinfo->dist_leaf_count; + strcpy(st_volname, volinfo->volname); + + volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count; + volinfo->brick_count = volinfo->tier_info.cold_brick_count; + volinfo->replica_count = volinfo->tier_info.cold_replica_count; + volinfo->disperse_count = volinfo->tier_info.cold_disperse_count; + volinfo->type = volinfo->tier_info.cold_type; + sprintf (volinfo->volname, "%s-cold", st_volname); + + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret) + goto out; + cxl = first_of(graph); + + volinfo->type = GF_CLUSTER_TYPE_TIER; + volinfo->brick_count = volinfo->tier_info.hot_brick_count; + volinfo->replica_count = volinfo->tier_info.hot_replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo); + volinfo->disperse_count = 0; + + sprintf (volinfo->volname, "%s-hot", st_volname); + + if (volinfo->dist_leaf_count == 1) { + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + ret = volgen_link_bricks_from_list_head (graph, volinfo, + "cluster/distribute", + "%s-dht", + dist_count, + dist_count); + } else { + ret = volume_volgen_graph_build_clusters (graph, + volinfo, + _gf_false); + } + + hxl = first_of(graph); + + xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s", + "tier-dht", 0); + gf_asprintf(&rule, "%s-hot-dht", st_volname); + xlator_set_option(xl, "rule", rule); + xlator_set_option(xl, "xattr-name", "trusted.tier-gfid"); + + ret = volgen_xlator_link (xl, cxl); + ret = volgen_xlator_link (xl, hxl); + + st_type = GF_CLUSTER_TYPE_TIER; + + out: + volinfo->brick_count = st_brick_count; + volinfo->replica_count = st_replica_count; + volinfo->disperse_count = st_disperse_count; + volinfo->type = st_type; + volinfo->dist_leaf_count = st_dist_leaf_count; + strcpy(volinfo->volname, st_volname); + + return ret; +} + +static int client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) { @@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, GF_ASSERT (conf); volname = volinfo->volname; - ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); + ret = volgen_graph_build_clients (graph, volinfo, set_dict, + param); if (ret) goto out; - ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (volinfo->type == GF_CLUSTER_TYPE_TIER) + ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false); + else + ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false); + if (ret == -1) goto out; @@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph, char *replicate_args[] = {"cluster/replicate", "%s-replicate-%d"}; - return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate", + return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate", "%s-replicate-%d", volinfo->brick_count, volinfo->replica_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index ae866b7ccfc..ada814bb25d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "features/trash", .op_version = GD_OP_VERSION_3_7_0, }, + + /* tier translator - global tunables */ + { .key = "cluster.write-freq-thresold", + .voltype = "cluster/tier", + .option = "write-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-freq-thresold", + .voltype = "cluster/tier", + .option = "read-freq-thresold", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-promote-frequency", + .voltype = "cluster/tier", + .option = "tier-promote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.tier-demote-frequency", + .voltype = "cluster/tier", + .option = "tier-demote-frequency", + .op_version = GD_OP_VERSION_3_7_0, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = "features.ctr-enabled", .voltype = "features/changetimerecorder", .value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ad280eda053..bac1598598b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -302,9 +302,6 @@ typedef struct tier_info_ { int hot_type; int hot_brick_count; int hot_replica_count; - int hot_disperse_count; - /*Commented for now Dan's DHT Tier patch will have it*/ - /*tier_group_t *root;*/ } gd_tier_info_t; struct glusterd_volinfo_ { @@ -814,6 +811,12 @@ int glusterd_handle_add_brick (rpcsvc_request_t *req); int +glusterd_handle_attach_tier (rpcsvc_request_t *req); + +int +glusterd_handle_detach_tier (rpcsvc_request_t *req); + +int glusterd_handle_replace_brick (rpcsvc_request_t *req); int |