diff options
-rw-r--r-- | tests/basic/tier/frequency-counters.t | 78 | ||||
-rw-r--r-- | tests/tier.rc | 1 | ||||
-rw-r--r-- | xlators/features/changetimerecorder/src/changetimerecorder.c | 16 | ||||
-rw-r--r-- | xlators/features/changetimerecorder/src/ctr-helper.c | 12 | ||||
-rw-r--r-- | xlators/features/changetimerecorder/src/ctr-helper.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 325 |
7 files changed, 392 insertions, 52 deletions
diff --git a/tests/basic/tier/frequency-counters.t b/tests/basic/tier/frequency-counters.t new file mode 100644 index 00000000000..707b8b4bbc6 --- /dev/null +++ b/tests/basic/tier/frequency-counters.t @@ -0,0 +1,78 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../tier.rc + + +NUM_BRICKS=3 +DEMOTE_FREQ=10 +PROMOTE_FREQ=10 +NUM_FILES=5 +TEST_DIR=test +# Creates a tiered volume with pure distribute hot and cold tiers +# Both hot and cold tiers will have an equal number of bricks. + +function create_dist_vol () { + mkdir $B0/cold + mkdir $B0/hot + TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} + TEST $CLI volume set $V0 performance.quick-read off + TEST $CLI volume set $V0 performance.io-cache off + TEST $CLI volume start $V0 +} + +function create_dist_tier_vol () { + TEST $CLI volume attach-tier $V0 $H0:$B0/hot/${V0}{0..$1} + TEST $CLI volume set $V0 cluster.tier-mode test + TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ + TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ + TEST $CLI volume set $V0 features.record-counters on + TEST $CLI volume set $V0 cluster.read-freq-threshold 2 + TEST $CLI volume set $V0 cluster.write-freq-threshold 2 +} + +cleanup; + + +TEST glusterd + +#Create and start a tiered volume +create_dist_vol $NUM_BRICKS + +$CLI volume set $V0 diagnostics.client-log-level DEBUG +# Mount FUSE +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +# create some files +mkdir $M0/$TEST_DIR +cd $M0/${TEST_DIR} + +touch file{1..2} + +# attach tier +create_dist_tier_vol $NUM_BRICKS + +sleep_until_mid_cycle $PROMOTE_FREQ + +# check if promotion on single hit, should fail +echo "hi" >> file2 +drop_cache $M0 +sleep $PROMOTE_FREQ +EXPECT "0" check_counters 0 0 + +# check if promotion on double hit, should suceed +sleep_until_mid_cycle $PROMOTE_FREQ +echo "hi" >> file2 +drop_cache $M0 +echo "hi" >> file2 +drop_cache $M0 + +EXPECT_WITHIN $PROMOTE_FREQ "0" check_counters 1 0 + +TEST ! $CLI volume set $V0 features.record-counters off + +cd / + +cleanup + diff --git a/tests/tier.rc b/tests/tier.rc index 4fd24de0659..32ee00cf4fe 100644 --- a/tests/tier.rc +++ b/tests/tier.rc @@ -109,6 +109,7 @@ function sleep_first_cycle { function sleep_until_mid_cycle { startTime=$(date +%s) mod=$(( ( $startTime % $1 ) + 1 )) + mod=$(( $1 - $mod )) mod=$(( $mod + $1 / 2 )) sleep $mod } diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 2c05d07dcb7..85a04944077 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -1982,12 +1982,12 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("ctr_link_consistency", priv->ctr_link_consistency, options, bool, out); - GF_OPTION_RECONF ("ctr_inode_heal_expire_period", - priv->ctr_inode_heal_expire_period, + GF_OPTION_RECONF ("ctr_lookupheal_inode_timeout", + priv->ctr_lookupheal_inode_timeout, options, uint64, out); - GF_OPTION_RECONF ("ctr_hardlink_heal_expire_period", - priv->ctr_hardlink_heal_expire_period, + GF_OPTION_RECONF ("ctr_lookupheal_link_timeout", + priv->ctr_lookupheal_link_timeout, options, uint64, out); GF_OPTION_RECONF ("record-exit", priv->ctr_record_unwind, options, @@ -2041,9 +2041,9 @@ init (xlator_t *this) priv->gfdb_sync_type = GFDB_DB_SYNC; priv->enabled = _gf_true; priv->_db_conn = NULL; - priv->ctr_hardlink_heal_expire_period = + priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD; - priv->ctr_inode_heal_expire_period = + priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD; /*Extract ctr xlator options*/ @@ -2219,11 +2219,11 @@ struct volume_options options[] = { .value = {"on", "off"}, .default_value = "off" }, - { .key = {"ctr_hardlink_heal_expire_period"}, + { .key = {"ctr_lookupheal_link_timeout"}, .type = GF_OPTION_TYPE_INT, .default_value = "300" }, - { .key = {"ctr_inode_heal_expire_period"}, + { .key = {"ctr_lookupheal_inode_timeout"}, .type = GF_OPTION_TYPE_INT, .default_value = "300" }, diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c index ba48a70f583..44abf7d5142 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.c +++ b/xlators/features/changetimerecorder/src/ctr-helper.c @@ -288,14 +288,14 @@ int extract_ctr_options (xlator_t *this, gf_ctr_private_t *_priv) { GF_OPTION_INIT ("ctr_link_consistency", _priv->ctr_link_consistency, bool, out); - /*Extract ctr_inode_heal_expire_period */ - GF_OPTION_INIT ("ctr_inode_heal_expire_period", - _priv->ctr_inode_heal_expire_period, + /*Extract ctr_lookupheal_inode_timeout */ + GF_OPTION_INIT ("ctr_lookupheal_inode_timeout", + _priv->ctr_lookupheal_inode_timeout, uint64, out); - /*Extract ctr_hardlink_heal_expire_period*/ - GF_OPTION_INIT ("ctr_hardlink_heal_expire_period", - _priv->ctr_hardlink_heal_expire_period, + /*Extract ctr_lookupheal_link_timeout*/ + GF_OPTION_INIT ("ctr_lookupheal_link_timeout", + _priv->ctr_lookupheal_link_timeout, uint64, out); /*Extract flag for hot tier brick*/ diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h index 025965898d2..2b2dc3e17c7 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ b/xlators/features/changetimerecorder/src/ctr-helper.h @@ -50,8 +50,8 @@ typedef struct gf_ctr_private { gfdb_db_type_t gfdb_db_type; gfdb_sync_type_t gfdb_sync_type; gfdb_conn_node_t *_db_conn; - uint64_t ctr_hardlink_heal_expire_period; - uint64_t ctr_inode_heal_expire_period; + uint64_t ctr_lookupheal_link_timeout; + uint64_t ctr_lookupheal_inode_timeout; } gf_ctr_private_t; @@ -685,7 +685,7 @@ __is_inode_expired (ctr_xlator_ctx_t *ctr_xlator_ctx, time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period; - ret = (time_diff >= _priv->ctr_inode_heal_expire_period) ? + ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true : _gf_false; return ret; } @@ -705,7 +705,7 @@ __is_hardlink_expired (ctr_hard_link_t *ctr_hard_link, time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period; - ret = ret || (time_diff >= _priv->ctr_hardlink_heal_expire_period) ? + ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true : _gf_false; return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index ddd4aaef768..b3dd47d09f4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1727,11 +1727,11 @@ brick_graph_add_changetimerecorder (volgen_graph_t *graph, if (ret) goto out; - ret = xlator_set_option (xl, "ctr_hardlink_heal_expire_period", "300"); + ret = xlator_set_option (xl, "ctr_lookupheal_link_timeout", "300"); if (ret) goto out; - ret = xlator_set_option (xl, "ctr_inode_heal_expire_period", "300"); + ret = xlator_set_option (xl, "ctr_lookupheal_inode_timeout", "300"); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index b085e4dbbe3..cbda00250e1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -12,6 +12,278 @@ #include "glusterd-utils.h" static int +get_tier_freq_threshold (glusterd_volinfo_t *volinfo, char *threshold_key) { + int threshold = 0; + char *str_thresold = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + glusterd_volinfo_get (volinfo, threshold_key, &str_thresold); + if (str_thresold) { + ret = gf_string2int (str_thresold, &threshold); + if (ret == -1) { + threshold = ret; + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "Failed to convert " + "string to integer"); + } + } + + return threshold; +} + +/* + * Validation function for record-counters + * if write-freq-threshold and read-freq-threshold both have non-zero values + * record-counters cannot be set to off + * if record-counters is set to on + * check if both the frequency thresholds are zero, then pop + * a note, but volume set is not failed. + * */ +static int +validate_tier_counters (glusterd_volinfo_t *volinfo, + dict_t *dict, + char *key, + char *value, + char **op_errstr) { + + char errstr[2048] = ""; + int ret = -1; + xlator_t *this = NULL; + gf_boolean_t origin_val = -1; + int current_wt = 0; + int current_rt = 0; + + this = THIS; + GF_ASSERT (this); + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + snprintf (errstr, sizeof (errstr), "Volume %s is not a tier " + "volume. Option %s is only valid for tier volume.", + volinfo->volname, key); + goto out; + } + + ret = gf_string2boolean (value, &origin_val); + if (ret) { + snprintf (errstr, sizeof (errstr), "%s is not a compatible " + "value. %s expects an boolean value", value, key); + goto out; + } + + current_rt = get_tier_freq_threshold (volinfo, + "cluster.read-freq-threshold"); + if (current_rt == -1) { + snprintf (errstr, sizeof (errstr), " Failed to retrive value of" + "cluster.read-freq-threshold"); + goto out; + } + current_wt = get_tier_freq_threshold (volinfo, + "cluster.write-freq-threshold"); + if (current_wt == -1) { + snprintf (errstr, sizeof (errstr), " Failed to retrive value of" + "cluster.write-freq-threshold"); + goto out; + } + /* If record-counters is set to off */ + if (!origin_val) { + + /* Both the thresholds should be zero to set + * record-counters to off*/ + if (current_rt || current_wt) { + snprintf (errstr, sizeof (errstr), + "Cannot set features.record-counters to \"%s\"" + " as cluster.write-freq-threshold is %d" + " and cluster.read-freq-threshold is %d. Please" + " set both cluster.write-freq-threshold and " + " cluster.read-freq-threshold to 0, to set " + " features.record-counters to \"%s\".", + value, current_wt, current_rt, value); + ret = -1; + goto out; + } + } + /* TODO give a warning message to the user. errstr without re = -1 will + * not result in a warning on cli for now. + else { + if (!current_rt && !current_wt) { + snprintf (errstr, sizeof (errstr), + " Note : cluster.write-freq-threshold is %d" + " and cluster.read-freq-threshold is %d. Please" + " set both cluster.write-freq-threshold and " + " cluster.read-freq-threshold to" + " appropriate positive values.", + current_wt, current_rt); + } + }*/ + + ret = 0; +out: + + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); + *op_errstr = gf_strdup (errstr); + } + + return ret; + +} + +/* Validation for tiering frequency thresholds + * If any of the frequency thresholds are set to a non-zero value, + * switch record-counters on, if not already on + * If both the frequency thresholds are set to zero, + * switch record-counters off, if not already off + * */ +static int +validate_tier_thresholds (glusterd_volinfo_t *volinfo, + dict_t *dict, + char *key, + char *value, + char **op_errstr) +{ + char errstr[2048] = ""; + int ret = -1; + xlator_t *this = NULL; + int origin_val = -1; + gf_boolean_t current_rc = _gf_false; + char *str_current_rc = NULL; + int current_wt = 0; + int current_rt = 0; + char *str_current_wt = NULL; + char *str_current_rt = NULL; + gf_boolean_t is_set_rc = _gf_false; + char *proposed_rc = NULL; + gf_boolean_t is_set_wrt_thsd = _gf_false; + + + this = THIS; + GF_ASSERT (this); + + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { + snprintf (errstr, sizeof (errstr), "Volume %s is not a tier " + "volume. Option %s is only valid for tier volume.", + volinfo->volname, key); + goto out; + } + + + ret = gf_string2int (value, &origin_val); + if (ret) { + snprintf (errstr, sizeof (errstr), "%s is not a compatible " + "value. %s expects an integer value.", value, key); + goto out; + } + + if (origin_val < 0) { + snprintf (errstr, sizeof (errstr), "%s is not a " + "compatible value. %s expects a positive" + "integer value.", value, key); + goto out; + } + + /* Get the record-counters value */ + ret = glusterd_volinfo_get_boolean (volinfo, + "features.record-counters"); + if (ret == -1) { + snprintf (errstr, sizeof (errstr), "Failed to retrive value of" + "features.record-counters from volume info"); + goto out; + } + current_rc = ret; + + /* if any of the thresholds are set to a non-zero value + * switch record-counters on, if not already on*/ + if (origin_val > 0) { + if (!current_rc) { + is_set_rc = _gf_true; + current_rc = _gf_true; + } + } else { + /* if the set is for write-freq-threshold */ + if (strstr (key, "write-freq-threshold")) { + current_rt = get_tier_freq_threshold (volinfo, + "cluster.read-freq-threshold"); + if (current_rt == -1) { + snprintf (errstr, sizeof (errstr), + " Failed to retrive value of" + "cluster.read-freq-threshold"); + goto out; + } + current_wt = origin_val; + } + /* else it should be read-freq-threshold */ + else { + current_wt = get_tier_freq_threshold (volinfo, + "cluster.write-freq-threshold"); + if (current_wt == -1) { + snprintf (errstr, sizeof (errstr), + " Failed to retrive value of" + "cluster.write-freq-threshold"); + goto out; + } + current_rt = origin_val; + } + + /* Since both the thresholds are zero, set record-counters + * to off, if not already off */ + if (current_rt == 0 && current_wt == 0) { + if (current_rc) { + is_set_rc = _gf_true; + current_rc = _gf_false; + } + } + } + + /* if record-counter has to be set to proposed value */ + if (is_set_rc) { + if (current_rc) { + ret = gf_asprintf (&proposed_rc, "on"); + } else { + ret = gf_asprintf (&proposed_rc, "off"); + } + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, + "Failed to allocate memory to dict_value"); + goto error; + } + ret = dict_set_str (volinfo->dict, "features.record-counters", + proposed_rc); +error: + if (ret) { + snprintf (errstr, sizeof (errstr), + "Failed to set features.record-counters" + "to \"%s\" automatically." + "Please try to set features.record-counters " + "\"%s\" manually. The options " + "cluster.write-freq-threshold and " + "cluster.read-freq-threshold can only " + "be set to a non zero value, if " + "features.record-counters is " + "set to \"on\".", proposed_rc, proposed_rc); + goto out; + } + } + ret = 0; +out: + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); + *op_errstr = gf_strdup (errstr); + if (proposed_rc) + GF_FREE (proposed_rc); + } + return ret; +} + + + +static int validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, char *value, char **op_errstr) { @@ -45,9 +317,7 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, goto out; } goto out; - } - - else if (strstr (key, "tier-pause")) { + } else if (strstr (key, "tier-pause")) { if (strcmp(value, "off") && strcmp(value, "on")) { ret = -1; @@ -72,6 +342,7 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, ret = -1; goto out; } + if (strstr (key, "watermark-hi") || strstr (key, "watermark-low")) { if ((origin_val < 1) || (origin_val > 99)) { @@ -130,21 +401,7 @@ validate_tier (glusterd_volinfo_t *volinfo, dict_t *dict, char *key, goto out; } - } else { - /* check write-freq-threshold and read-freq-threshold. */ - if (origin_val < 0) { - snprintf (errstr, sizeof (errstr), "%s is not a " - "compatible value. %s expects a positive" - " integer value.", - value, key); - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - GD_MSG_INCOMPATIBLE_VALUE, "%s", errstr); - *op_errstr = gf_strdup (errstr); - ret = -1; - goto out; - } } - out: gf_msg_debug (this->name, 0, "Returning %d", ret); @@ -2001,7 +2258,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "write-freq-threshold", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, - .validate_fn = validate_tier, + .validate_fn = validate_tier_thresholds, .description = "Defines the number of writes, in a promotion/demotion" " cycle, that would mark a file HOT for promotion. Any" " file that has write hits less than this value will " @@ -2013,7 +2270,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "read-freq-threshold", .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT, - .validate_fn = validate_tier, + .validate_fn = validate_tier_thresholds, .description = "Defines the number of reads, in a promotion/demotion " "cycle, that would mark a file HOT for promotion. Any " "file that has read hits less than this value will be " @@ -2109,7 +2366,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .value = "off", .option = "record-counters", .op_version = GD_OP_VERSION_3_7_0, - .description = "Its a Change Time Recorder Xlator option to enable recording write " + .validate_fn = validate_tier_counters, + .description = "Its a Change Time Recorder Xlator option to " + "enable recording write " "and read heat counters. The default is disabled. " "If enabled, \"cluster.write-freq-threshold\" and " "\"cluster.read-freq-threshold\" defined the number " @@ -2121,31 +2380,32 @@ struct volopt_map_entry glusterd_volopt_map[] = { .value = "off", .option = "ctr-record-metadata-heat", .op_version = GD_OP_VERSION_3_7_0, - /* Purposefully commenting the description so that this option remains - * hidden from the users as this is more of a developer option as of - * now. - * .description = "Its a Change Time Recorder Xlator option to " - * "enable recording write heat on metadata of the file. " + .type = NO_DOC, + .description = "Its a Change Time Recorder Xlator option to " + "enable recording write heat on metadata of the file. " "The default is disabled. " "Metadata is inode atttributes like atime, mtime," " permissions etc and " "extended attributes of a file ." - * */ }, { .key = "features.ctr_link_consistency", .voltype = "features/changetimerecorder", .value = "off", .option = "ctr_link_consistency", .op_version = GD_OP_VERSION_3_7_0, + .type = NO_DOC, .description = "Enable a crash consistent way of recording hardlink " - "updates by Change Time Recorder Xlator. When recording in a crash " - "consistent way the data operations will experience more latency." + "updates by Change Time Recorder Xlator. " + "When recording in a crash " + "consistent way the data operations will " + "experience more latency." }, - { .key = "features.ctr_hardlink_heal_expire_period", + { .key = "features.ctr_lookupheal_link_timeout", .voltype = "features/changetimerecorder", .value = "300", - .option = "ctr_hardlink_heal_expire_period", + .option = "ctr_lookupheal_link_timeout", .op_version = GD_OP_VERSION_3_7_2, + .type = NO_DOC, .description = "Defines the expiry period of in-memory " "hardlink of an inode," "used by lookup heal in Change Time Recorder." @@ -2154,11 +2414,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { "hardlink is done and the " "in-memory hardlink period is reset" }, - { .key = "features.ctr_inode_heal_expire_period", + { .key = "features.ctr_lookupheal_inode_timeout", .voltype = "features/changetimerecorder", .value = "300", - .option = "ctr_inode_heal_expire_period", + .option = "ctr_lookupheal_inode_timeout", .op_version = GD_OP_VERSION_3_7_2, + .type = NO_DOC, .description = "Defines the expiry period of in-memory inode," "used by lookup heal in Change Time Recorder. " "Once the expiry period" |