diff options
author | Dan Lambright <dlambrig@redhat.com> | 2015-09-18 00:49:06 -0400 |
---|---|---|
committer | Dan Lambright <dlambrig@redhat.com> | 2015-10-10 06:00:04 -0700 |
commit | 672baab88fb7f32e844cd4be22e0924e4e0e83fc (patch) | |
tree | 8a05387b3508cc8f15ec448ab2e7b2baac897413 /xlators/mgmt | |
parent | a4f982be9b21323038704069a56fb2448369d6a0 (diff) |
cluster/tier: add watermarks and policy driver
This fix introduces infrastructure to support different
policies for promotion and demotion.
Currently the tier feature automatically promotes and demotes
files periodically based on access. This is good for testing
but too stringent for most real workloads. It makes it
difficult to fully utilize a hot tier- data will be demoted
before it is touched- its unlikely a 100GB hot SSD will have
all its data touched in a window of time.
A new parameter "mode" allows the user to pick promotion/demotion
polcies.
The "test mode" will be used for *.t and other general testing.
This is the current mechanism.
The "cache mode" introduces watermarks. The watermarks
represent levels of data residing on the hot tier.
"cache mode" policy:
The % the hot tier is full is called P.
Do not promote or demote more than D MB or F files.
A random number [0-100] is called R.
Rules for migration:
if (P < watermark_low) don't demote, always promote.
if (P >= watermark_low) && (P < watermark_hi) demote if R < P; promote if R > P.
if (P > watermark_hi) always demote, don't promote.
gluster volume set {vol} cluster.watermark-hi %
gluster volume set {vol} cluster.watermark-low %
gluster volume set {vol} cluster.tier-max-mb {D}
gluster volume set {vol} cluster.tier-max-files {F}
gluster volume set {vol} cluster.tier-mode {test|cache}
Change-Id: I157f19667ec95aa1d53406041c1e3b073be127c2
BUG: 1257911
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: http://review.gluster.org/12039
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 135 |
1 files changed, 122 insertions, 13 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index c62f2d79c1f..8fdee165c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -19,6 +19,10 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, int ret = 0; xlator_t *this = NULL; int origin_val = -1; + char *current_wm_hi = NULL; + char *current_wm_low = NULL; + uint64_t wm_hi = 0; + uint64_t wm_low = 0; this = THIS; GF_ASSERT (this); @@ -34,12 +38,20 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, goto out; } + if (strstr (key, "cluster.tier-mode")) { + if (strcmp(value, "test") && + strcmp(value, "cache")) { + ret = -1; + goto out; + } + goto out; + } + /* - * All the volume set options for tier are expecting a positive + * Rest of the volume set options for tier are expecting a positive * Integer. Change the function accordingly if this constraint is * changed. */ - ret = gf_string2int (value, &origin_val); if (ret) { snprintf (errstr, sizeof (errstr), "%s is not a compatible " @@ -51,13 +63,55 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ret = -1; goto out; } + if (strstr (key, "watermark-hi") || + strstr (key, "watermark-low")) { + if ((origin_val < 1) || (origin_val > 99)) { + snprintf (errstr, sizeof (errstr), "%s is not a compatible" + "value. %s expects a percentage from 1-99.", + value, key); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + + if (strstr (key, "watermark-hi")) { + wm_hi = origin_val; + } else { + glusterd_volinfo_get (volinfo, + "cluster.watermark-hi", + ¤t_wm_hi); + gf_string2bytesize_uint64 (current_wm_hi, + &wm_hi); + } - if (strstr ("cluster.tier-promote-frequency", key) || - strstr ("cluster.tier-demote-frequency", key)) { + if (strstr (key, "watermark-low")) { + wm_low = origin_val; + } else { + glusterd_volinfo_get (volinfo, + "cluster.watermark-low", + ¤t_wm_low); + gf_string2bytesize_uint64 (current_wm_low, + &wm_low); + } + if (wm_low > wm_hi) { + snprintf (errstr, sizeof (errstr), "lower watermark" + " cannot exceed upper watermark."); + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + } else if (strstr (key, "tier-promote-frequency") || + strstr (key, "tier-max-mb") || + strstr (key, "tier-max-files") || + strstr (key, "tier-demote-frequency")) { if (origin_val < 1) { snprintf (errstr, sizeof (errstr), "%s is not a " - "compatible value. %s expects a positive " - "integer value.", + " compatible value. %s expects a positive " + "integer value greater than 0.", value, key); gf_msg (this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); @@ -65,10 +119,12 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ret = -1; goto out; } + } else { + /* check write-freq-threshold and read-freq-threshold. */ if (origin_val < 0) { snprintf (errstr, sizeof (errstr), "%s is not a " - "compatible value. %s expects a non-negative" + "compatible value. %s expects a positive" " integer value.", value, key); gf_msg (this->name, GF_LOG_ERROR, EINVAL, @@ -1906,6 +1962,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { /* tier translator - global tunables */ { .key = "cluster.write-freq-threshold", .voltype = "cluster/tier", + .value = "0", .option = "write-freq-threshold", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, @@ -1917,6 +1974,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { }, { .key = "cluster.read-freq-threshold", .voltype = "cluster/tier", + .value = "0", .option = "read-freq-threshold", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, @@ -1928,23 +1986,74 @@ struct volopt_map_entry glusterd_volopt_map[] = { }, { .key = "cluster.tier-promote-frequency", .voltype = "cluster/tier", + .value = "120", .option = "tier-promote-frequency", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, .validate_fn = validate_tier, - .description = "Defines how often the promotion should be triggered " - "i.e. periodicity of promotion cycles. The value is in " - "secs." }, { .key = "cluster.tier-demote-frequency", .voltype = "cluster/tier", + .value = "120", .option = "tier-demote-frequency", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, .validate_fn = validate_tier, - .description = "Defines how often the demotion should be triggered " - "i.e. periodicity of demotion cycles. The value is in " - "secs." + }, + { .key = "cluster.watermark-hi", + .voltype = "cluster/tier", + .value = "90", + .option = "watermark-hi", + .op_version = GD_OP_VERSION_3_7_6, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .description = "Upper % watermark for promotion. If hot tier fills" + " above this percentage, no promotion will happen and demotion will " + "happen with high probability." + }, + { .key = "cluster.watermark-low", + .voltype = "cluster/tier", + .value = "75", + .option = "watermark-low", + .op_version = GD_OP_VERSION_3_7_6, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .description = "Lower % watermark. If hot tier is less " + "full than this, promotion will happen and demotion will not happen. " + "If greater than this, promotion/demotion will happen at a probability " + "relative to how full the hot tier is." + }, + { .key = "cluster.tier-mode", + .voltype = "cluster/tier", + .option = "tier-mode", + .value = "test", + .op_version = GD_OP_VERSION_3_7_6, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .description = "Either 'test' or 'cache'. Test mode periodically" + " demotes or promotes files automatically based on access." + " Cache mode does so based on whether the cache is full or not," + " as specified with watermarks." + }, + { .key = "cluster.tier-max-mb", + .voltype = "cluster/tier", + .option = "tier-max-mb", + .value = "1000", + .op_version = GD_OP_VERSION_3_7_6, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .description = "The maximum number of MB that may be migrated" + " in any direction in a given cycle." + }, + { .key = "cluster.tier-max-files", + .voltype = "cluster/tier", + .option = "tier-max-files", + .value = "5000", + .op_version = GD_OP_VERSION_3_7_6, + .flags = OPT_FLAG_CLIENT_OPT, + .validate_fn = validate_tier, + .description = "The maximum number of files that may be migrated" + " in any direction in a given cycle." }, { .key = "features.ctr-enabled", .voltype = "features/changetimerecorder", |