summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c135
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c10
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c15
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c83
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h8
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c181
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c26
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h9
10 files changed, 438 insertions, 32 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index fd4618bb78c..fa5e533f135 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -288,6 +288,10 @@ gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
int replica_nodes = 0;
switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_TIER:
+ ret = 1;
+ goto out;
+
case GF_CLUSTER_TYPE_NONE:
case GF_CLUSTER_TYPE_STRIPE:
case GF_CLUSTER_TYPE_DISPERSE:
@@ -367,7 +371,6 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
int32_t replica_count = 0;
int32_t stripe_count = 0;
int type = 0;
-
this = THIS;
GF_ASSERT(this);
@@ -454,6 +457,17 @@ __glusterd_handle_add_brick (rpcsvc_request_t *req)
total_bricks = volinfo->brick_count + brick_count;
+ if (dict_get (dict, "attach-tier")) {
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ snprintf (err_str, sizeof (err_str),
+ "Volume %s is already a tier.", volname);
+ gf_log (this->name, GF_LOG_ERROR, "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ }
+
if (!stripe_count && !replica_count) {
if (volinfo->type == GF_CLUSTER_TYPE_NONE)
goto brick_val;
@@ -639,6 +653,40 @@ subvol_matcher_destroy (int *subvols)
GF_FREE (subvols);
}
+static int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ char key[256] = {0,};
+ char value[256] = {0,};
+ int brick_num = 0;
+ int hot_brick_num = 0;
+ glusterd_brickinfo_t *brickinfo;
+ int ret = 0;
+
+ /* cold tier bricks at tail of list so use reverse iteration */
+ cds_list_for_each_entry_reverse (brickinfo, &volinfo->bricks,
+ brick_list) {
+ brick_num++;
+ if (brick_num > volinfo->tier_info.cold_brick_count) {
+ hot_brick_num++;
+ sprintf (key, "brick%d", hot_brick_num);
+ snprintf (value, 256, "%s:%s",
+ brickinfo->hostname,
+ brickinfo->path);
+
+ ret = dict_set_str (dict, key, strdup(value));
+ if (ret)
+ break;
+ }
+ }
+
+ ret = dict_set_int32(dict, "count", hot_brick_num);
+ if (ret)
+ return -1;
+
+ return hot_brick_num;
+}
+
int
__glusterd_handle_remove_brick (rpcsvc_request_t *req)
{
@@ -794,7 +842,8 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
/* Do not allow remove-brick if the bricks given is less than
the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->type != GF_CLUSTER_TYPE_TIER)) {
if (volinfo->dist_leaf_count &&
(count % volinfo->dist_leaf_count)) {
snprintf (err_str, sizeof (err_str), "Remove brick "
@@ -813,6 +862,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+
strcpy (brick_list, " ");
if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
@@ -822,6 +872,9 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
goto out;
}
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ count = glusterd_set_detach_bricks(dict, volinfo);
+
while ( i <= count) {
snprintf (key, sizeof (key), "brick%d", i);
ret = dict_get_str (dict, key, &brick);
@@ -836,6 +889,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req)
ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
&brickinfo);
+
if (ret) {
snprintf (err_str, sizeof (err_str), "Incorrect brick "
"%s for volume %s", brick, volname);
@@ -883,7 +937,8 @@ out:
}
- GF_FREE (brick_list);
+ if (brick_list)
+ GF_FREE (brick_list);
subvol_matcher_destroy (subvols);
free (cli_req.dict.dict_val); //its malloced by xdr
@@ -1081,7 +1136,11 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
ret = glusterd_resolve_brick (brickinfo);
if (ret)
goto out;
- if (stripe_count || replica_count) {
+
+ /* hot tier bricks are added to head of brick list */
+ if (dict_get (dict, "attach-tier")) {
+ cds_list_add (&brickinfo->brick_list, &volinfo->bricks);
+ } else if (stripe_count || replica_count) {
add_brick_at_right_order (brickinfo, volinfo, (i - 1),
stripe_count, replica_count);
} else {
@@ -1674,6 +1733,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
break;
+ case GF_OP_CMD_DETACH:
case GF_OP_CMD_COMMIT_FORCE:
break;
}
@@ -1767,6 +1827,35 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
return ret;
}
+static int
+glusterd_op_perform_attach_tier (dict_t *dict,
+ glusterd_volinfo_t *volinfo,
+ int count,
+ char *bricks)
+{
+ int ret = 0;
+ int replica_count = 0;
+
+ /*
+ * Store the new (cold) tier's structure until the graph is generated.
+ * If there is a failure before the graph is generated the
+ * structure will revert to its original state.
+ */
+ volinfo->tier_info.cold_dist_leaf_count = volinfo->dist_leaf_count;
+ volinfo->tier_info.cold_type = volinfo->type;
+ volinfo->tier_info.cold_brick_count = volinfo->brick_count;
+ volinfo->tier_info.cold_replica_count = volinfo->replica_count;
+ volinfo->tier_info.cold_disperse_count = volinfo->disperse_count;
+
+ ret = dict_get_int32 (dict, "replica-count", &replica_count);
+ if (!ret)
+ volinfo->tier_info.hot_replica_count = replica_count;
+ else
+ volinfo->tier_info.hot_replica_count = 1;
+ volinfo->tier_info.hot_brick_count = count;
+
+ return ret;
+}
int
glusterd_op_add_brick (dict_t *dict, char **op_errstr)
@@ -1778,6 +1867,7 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
xlator_t *this = NULL;
char *bricks = NULL;
int32_t count = 0;
+ int32_t replica_count = 0;
this = THIS;
GF_ASSERT (this);
@@ -1812,6 +1902,11 @@ glusterd_op_add_brick (dict_t *dict, char **op_errstr)
goto out;
}
+ if (dict_get(dict, "attach-tier")) {
+ gf_log (THIS->name, GF_LOG_DEBUG, "Adding tier");
+ glusterd_op_perform_attach_tier (dict, volinfo, count, bricks);
+ }
+
ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
if (ret) {
gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
@@ -1829,6 +1924,14 @@ out:
return ret;
}
+static void
+glusterd_op_perform_detach_tier (glusterd_volinfo_t *volinfo)
+{
+ volinfo->type = volinfo->tier_info.cold_type;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+}
+
int
glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
{
@@ -1959,6 +2062,10 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
force = 1;
break;
+ case GF_OP_CMD_DETACH:
+ glusterd_op_perform_detach_tier (volinfo);
+ /* fall through */
+
case GF_OP_CMD_COMMIT_FORCE:
if (volinfo->decommission_in_progress) {
@@ -2051,7 +2158,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->sub_count = replica_count;
volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo);
- if (replica_count == 1) {
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if ((replica_count == 1) && (cmd != GF_OP_CMD_DETACH)) {
if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
volinfo->type = GF_CLUSTER_TYPE_NONE;
/* backward compatibility */
@@ -2224,3 +2336,16 @@ out:
gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+
+int
+glusterd_handle_attach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req, __glusterd_handle_add_brick);
+}
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler (req,
+ __glusterd_handle_remove_brick);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 77fa96400ba..a41b36b9715 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -4817,6 +4817,8 @@ rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA},
[GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_ATTACH_TIER] = { "ATTACH_TIER", GLUSTER_CLI_ATTACH_TIER, glusterd_handle_attach_tier, NULL, 0, DRC_NA},
+ [GLUSTER_CLI_DETACH_TIER] = { "DETACH_TIER", GLUSTER_CLI_DETACH_TIER, glusterd_handle_detach_tier, NULL, 0, DRC_NA},
[GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA},
[GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA},
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index 75756518f28..c5fcb7698e5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -448,6 +448,7 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
char name[1024] = {0,};
gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
this = THIS;
GF_ASSERT (this);
@@ -514,7 +515,11 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin
ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
- snprintf (name, 1024, "%s-dht",volname);
+ ret = glusterd_volinfo_find (volname, &volinfo);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ snprintf (name, 1024, "tier-dht");
+ else
+ snprintf (name, 1024, "%s-dht", volname);
brick_req->name = gf_strdup (name);
break;
@@ -5159,6 +5164,7 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
while ( i <= count) {
snprintf (key, 256, "brick%d", i);
+
ret = dict_get_str (dict, key, &brick);
if (ret) {
gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick");
@@ -5167,8 +5173,10 @@ glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr,
ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
&brickinfo);
+
if (ret)
goto out;
+
if (glusterd_is_brick_started (brickinfo)) {
pending_node = GF_CALLOC (1, sizeof (*pending_node),
gf_gld_mt_pending_node_t);
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index ba67df436ff..0d66571300f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -278,6 +278,13 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
"--xlator-option", "*replicate*.readdir-failover=off",
"--xlator-option", "*dht.readdir-optimize=on",
NULL);
+
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER) {
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner,
+ "*tier-dht.xattr-name=trusted.tier-gfid");
+ }
+
runner_add_arg (&runner, "--xlator-option");
runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
runner_add_arg (&runner, "--xlator-option");
@@ -487,6 +494,7 @@ __glusterd_handle_defrag_volume (rpcsvc_request_t *req)
goto out;
if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_STATUS_TIER) ||
(cmd == GF_DEFRAG_CMD_STOP)) {
ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
dict, msg, sizeof (msg));
@@ -556,6 +564,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_TIER:
/* Check if the connected clients are all of version
* glusterfs-3.6 and higher. This is needed to prevent some data
* loss issues that could occur when older clients are connected
@@ -690,7 +699,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
/* Set task-id, if available, in op_ctx dict for operations other than
* start
*/
- if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (cmd == GF_DEFRAG_CMD_STATUS ||
+ cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_STATUS_TIER) {
if (!uuid_is_null (volinfo->rebal.rebalance_id)) {
ctx = glusterd_op_get_ctx ();
if (!ctx) {
@@ -720,6 +731,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
+ case GF_DEFRAG_CMD_START_TIER:
/* Reset defrag status to 'NOT STARTED' whenever a
* remove-brick/rebalance command is issued to remove
* stale information from previous run.
@@ -791,6 +803,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
break;
case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STATUS_TIER:
break;
default:
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 5b2b14503ae..5696229572d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -812,6 +812,63 @@ out:
" for volume %s", volinfo->volname);
return ret;
}
+
+int32_t
+glusterd_volume_write_tier_details (int fd, glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char buf[PATH_MAX] = "";
+
+ if (volinfo->type != GF_CLUSTER_TYPE_TIER) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_COUNT, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d",
+ volinfo->tier_info.cold_replica_count);
+ ret = gf_store_save_value (fd,
+ GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_disperse_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_brick_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_replica_count);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.hot_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_HOT_TYPE, buf);
+ if (ret)
+ goto out;
+
+ snprintf (buf, sizeof (buf), "%d", volinfo->tier_info.cold_type);
+ ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_COLD_TYPE, buf);
+ if (ret)
+ goto out;
+
+ out:
+ return ret;
+}
+
int32_t
glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
{
@@ -917,6 +974,8 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
goto out;
}
+ ret = glusterd_volume_write_tier_details (fd, volinfo);
+
ret = glusterd_volume_write_snap_details (fd, volinfo);
out:
@@ -2725,6 +2784,27 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
strncpy (volinfo->parent_volname, value,
sizeof(volinfo->parent_volname) - 1);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_disperse_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_brick_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT,
+ strlen (key))) {
+ volinfo->tier_info.cold_replica_count = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_HOT_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.hot_type = atoi (value);
+ } else if (!strncmp (key, GLUSTERD_STORE_KEY_COLD_TYPE,
+ strlen (key))) {
+ volinfo->tier_info.cold_type = atoi (value);
} else {
if (is_key_glusterd_hooks_friendly (key)) {
@@ -2809,6 +2889,9 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo)
GF_ASSERT (volinfo->redundancy_count > 0);
break;
+ case GF_CLUSTER_TYPE_TIER:
+ break;
+
default:
GF_ASSERT (0);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index afa96be77cf..45ed86a4163 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -64,6 +64,14 @@ typedef enum glusterd_store_ver_ac_{
#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+#define GLUSTERD_STORE_KEY_COLD_TYPE "cold_type"
+#define GLUSTERD_STORE_KEY_COLD_COUNT "cold_count"
+#define GLUSTERD_STORE_KEY_COLD_REPLICA_COUNT "cold_replica_count"
+#define GLUSTERD_STORE_KEY_COLD_DISPERSE_COUNT "cold_disperse_count"
+#define GLUSTERD_STORE_KEY_HOT_TYPE "hot_type"
+#define GLUSTERD_STORE_KEY_HOT_COUNT "hot_count"
+#define GLUSTERD_STORE_KEY_HOT_REPLICA_COUNT "hot_replica_count"
+
#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 727a19d24d1..27357955fe8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -560,6 +560,7 @@ glusterd_volinfo_dup (glusterd_volinfo_t *volinfo,
new_volinfo->sub_count = volinfo->sub_count;
new_volinfo->transport_type = volinfo->transport_type;
new_volinfo->brick_count = volinfo->brick_count;
+ new_volinfo->tier_info = volinfo->tier_info;
dict_copy (volinfo->dict, new_volinfo->dict);
dict_copy (volinfo->gsync_slaves, new_volinfo->gsync_slaves);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 114e57485fc..79da432bafe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1472,7 +1472,6 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
out:
return ret;
}
-
static int
brick_graph_add_trash (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -2712,24 +2711,22 @@ out:
}
static int
-volgen_graph_build_clusters (volgen_graph_t *graph,
- glusterd_volinfo_t *volinfo, char *xl_type,
- char *xl_namefmt, size_t child_count,
- size_t sub_count)
+volgen_link_bricks (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count,
+ xlator_t *trav)
{
int i = 0;
int j = 0;
- xlator_t *txl = NULL;
xlator_t *xl = NULL;
- xlator_t *trav = NULL;
char *volname = NULL;
int ret = -1;
if (child_count == 0)
goto out;
volname = volinfo->volname;
- txl = first_of (graph);
- for (trav = txl; --child_count; trav = trav->next);
+
for (;; trav = trav->prev) {
if ((i % sub_count) == 0) {
xl = volgen_graph_add_nolink (graph, xl_type,
@@ -2745,10 +2742,9 @@ volgen_graph_build_clusters (volgen_graph_t *graph,
if (ret)
goto out;
- if (trav == txl)
- break;
-
i++;
+ if (i == child_count)
+ break;
}
ret = j;
@@ -2756,6 +2752,46 @@ out:
return ret;
}
+static int
+volgen_link_bricks_from_list_tail (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+ size_t cnt = child_count;
+
+ for (trav = first_of(graph); --cnt; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
+static int
+volgen_link_bricks_from_list_head (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo, char *xl_type,
+ char *xl_namefmt, size_t child_count,
+ size_t sub_count)
+{
+ xlator_t *trav = NULL;
+
+ for (trav = first_of(graph); trav->next; trav = trav->next)
+ ;
+
+ return volgen_link_bricks (graph, volinfo,
+ xl_type,
+ xl_namefmt,
+ child_count,
+ sub_count,
+ trav);
+}
+
/**
* This is the build graph function for user-serviceable snapshots.
* Generates snapview-client
@@ -2948,7 +2984,7 @@ volgen_graph_build_dht_cluster (volgen_graph_t *graph,
else
name_fmt = "%s-dht";
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
voltype,
name_fmt,
child_count,
@@ -2985,7 +3021,7 @@ volgen_graph_build_ec_clusters (volgen_graph_t *graph,
xlator_t *ec = NULL;
char option[32] = {0};
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
disperse_args[0],
disperse_args[1],
volinfo->brick_count,
@@ -3015,12 +3051,19 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
{
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
+ char *tier_args[] = {"cluster/tier",
+ "%s-tier-%d"};
char *stripe_args[] = {"cluster/stripe",
"%s-stripe-%d"};
+ char *disperse_args[] = {"cluster/disperse",
+ "%s-disperse-%d"};
+ char option[32] = "";
int rclusters = 0;
int clusters = 0;
int dist_count = 0;
int ret = -1;
+ xlator_t *ec = NULL;
+ xlator_t *client = NULL;
if (!volinfo->dist_leaf_count)
goto out;
@@ -3031,7 +3074,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
/* All other cases, it will have one or the other cluster type */
switch (volinfo->type) {
case GF_CLUSTER_TYPE_REPLICATE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3040,7 +3083,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
goto out;
break;
case GF_CLUSTER_TYPE_STRIPE:
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
volinfo->brick_count,
@@ -3048,11 +3091,18 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
if (clusters < 0)
goto out;
break;
+ case GF_CLUSTER_TYPE_TIER:
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ tier_args[0],
+ tier_args[1],
+ volinfo->brick_count,
+ volinfo->replica_count);
+ break;
case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
/* Replicate after the clients, then stripe */
if (volinfo->replica_count == 0)
goto out;
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
replicate_args[0],
replicate_args[1],
volinfo->brick_count,
@@ -3062,7 +3112,7 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph,
rclusters = volinfo->brick_count / volinfo->replica_count;
GF_ASSERT (rclusters == clusters);
- clusters = volgen_graph_build_clusters (graph, volinfo,
+ clusters = volgen_link_bricks_from_list_tail (graph, volinfo,
stripe_args[0],
stripe_args[1],
rclusters,
@@ -3162,7 +3212,7 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
"log-buf-size option");
ret = volgen_graph_set_options_generic (graph, set_dict, "client",
- &log_flush_timeout_option_handler);
+ &log_flush_timeout_option_handler);
if (ret)
gf_log (this->name, GF_LOG_WARNING, "Failed to change "
"log-flush-timeout option");
@@ -3170,6 +3220,88 @@ graph_set_generic_options (xlator_t *this, volgen_graph_t *graph,
}
static int
+volume_volgen_graph_build_clusters_tier (volgen_graph_t *graph,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t is_quotad)
+{
+ int ret = -1;
+ xlator_t *root;
+ xlator_t *xl, *hxl, *cxl;
+ glusterd_brickinfo_t *brick = NULL;
+ char *rule;
+ int st_brick_count = 0;
+ int st_replica_count = 0;
+ int st_disperse_count = 0;
+ int st_dist_leaf_count = 0;
+ int st_type = 0;
+ char st_volname[GD_VOLUME_NAME_MAX];
+ int dist_count = 0;
+
+ st_brick_count = volinfo->brick_count;
+ st_replica_count = volinfo->replica_count;
+ st_disperse_count = volinfo->disperse_count;
+ st_type = volinfo->type;
+ st_dist_leaf_count = volinfo->dist_leaf_count;
+ strcpy(st_volname, volinfo->volname);
+
+ volinfo->dist_leaf_count = volinfo->tier_info.cold_dist_leaf_count;
+ volinfo->brick_count = volinfo->tier_info.cold_brick_count;
+ volinfo->replica_count = volinfo->tier_info.cold_replica_count;
+ volinfo->disperse_count = volinfo->tier_info.cold_disperse_count;
+ volinfo->type = volinfo->tier_info.cold_type;
+ sprintf (volinfo->volname, "%s-cold", st_volname);
+
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (ret)
+ goto out;
+ cxl = first_of(graph);
+
+ volinfo->type = GF_CLUSTER_TYPE_TIER;
+ volinfo->brick_count = volinfo->tier_info.hot_brick_count;
+ volinfo->replica_count = volinfo->tier_info.hot_replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+ volinfo->disperse_count = 0;
+
+ sprintf (volinfo->volname, "%s-hot", st_volname);
+
+ if (volinfo->dist_leaf_count == 1) {
+ dist_count = volinfo->brick_count / volinfo->dist_leaf_count;
+ ret = volgen_link_bricks_from_list_head (graph, volinfo,
+ "cluster/distribute",
+ "%s-dht",
+ dist_count,
+ dist_count);
+ } else {
+ ret = volume_volgen_graph_build_clusters (graph,
+ volinfo,
+ _gf_false);
+ }
+
+ hxl = first_of(graph);
+
+ xl = volgen_graph_add_nolink (graph, "cluster/tier", "%s",
+ "tier-dht", 0);
+ gf_asprintf(&rule, "%s-hot-dht", st_volname);
+ xlator_set_option(xl, "rule", rule);
+ xlator_set_option(xl, "xattr-name", "trusted.tier-gfid");
+
+ ret = volgen_xlator_link (xl, cxl);
+ ret = volgen_xlator_link (xl, hxl);
+
+ st_type = GF_CLUSTER_TYPE_TIER;
+
+ out:
+ volinfo->brick_count = st_brick_count;
+ volinfo->replica_count = st_replica_count;
+ volinfo->disperse_count = st_disperse_count;
+ volinfo->type = st_type;
+ volinfo->dist_leaf_count = st_dist_leaf_count;
+ strcpy(volinfo->volname, st_volname);
+
+ return ret;
+}
+
+static int
client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, void *param)
{
@@ -3188,11 +3320,16 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
GF_ASSERT (conf);
volname = volinfo->volname;
- ret = volgen_graph_build_clients (graph, volinfo, set_dict, param);
+ ret = volgen_graph_build_clients (graph, volinfo, set_dict,
+ param);
if (ret)
goto out;
- ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+ if (volinfo->type == GF_CLUSTER_TYPE_TIER)
+ ret = volume_volgen_graph_build_clusters_tier (graph, volinfo, _gf_false);
+ else
+ ret = volume_volgen_graph_build_clusters (graph, volinfo, _gf_false);
+
if (ret == -1)
goto out;
@@ -3730,7 +3867,7 @@ volgen_graph_build_replicate_clusters (volgen_graph_t *graph,
char *replicate_args[] = {"cluster/replicate",
"%s-replicate-%d"};
- return volgen_graph_build_clusters (graph, volinfo, "cluster/replicate",
+ return volgen_link_bricks_from_list_tail (graph, volinfo, "cluster/replicate",
"%s-replicate-%d",
volinfo->brick_count,
volinfo->replica_count);
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index ae866b7ccfc..ada814bb25d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1690,6 +1690,32 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "features/trash",
.op_version = GD_OP_VERSION_3_7_0,
},
+
+ /* tier translator - global tunables */
+ { .key = "cluster.write-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "write-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.read-freq-thresold",
+ .voltype = "cluster/tier",
+ .option = "read-freq-thresold",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-promote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-promote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
+ { .key = "cluster.tier-demote-frequency",
+ .voltype = "cluster/tier",
+ .option = "tier-demote-frequency",
+ .op_version = GD_OP_VERSION_3_7_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = "features.ctr-enabled",
.voltype = "features/changetimerecorder",
.value = "off",
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index ad280eda053..bac1598598b 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -302,9 +302,6 @@ typedef struct tier_info_ {
int hot_type;
int hot_brick_count;
int hot_replica_count;
- int hot_disperse_count;
- /*Commented for now Dan's DHT Tier patch will have it*/
- /*tier_group_t *root;*/
} gd_tier_info_t;
struct glusterd_volinfo_ {
@@ -814,6 +811,12 @@ int
glusterd_handle_add_brick (rpcsvc_request_t *req);
int
+glusterd_handle_attach_tier (rpcsvc_request_t *req);
+
+int
+glusterd_handle_detach_tier (rpcsvc_request_t *req);
+
+int
glusterd_handle_replace_brick (rpcsvc_request_t *req);
int