diff options
-rw-r--r-- | libglusterfs/src/glusterfs.h | 2 | ||||
-rw-r--r-- | tests/basic/tier/legacy-many.t | 122 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 130 | ||||
-rw-r--r-- | xlators/features/changetimerecorder/src/changetimerecorder.c | 1 | ||||
-rw-r--r-- | xlators/features/changetimerecorder/src/ctr-helper.h | 8 |
5 files changed, 257 insertions, 6 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index f23f19cbaa0..2a556485824 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -244,6 +244,8 @@ #define CTR_RESPONSE_LINK_COUNT_XDATA "ctr_response_link_count" #define CTR_REQUEST_LINK_COUNT_XDATA "ctr_request_link_count" +#define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup" + #define GF_LOG_LRU_BUFSIZE_DEFAULT 5 #define GF_LOG_LRU_BUFSIZE_MIN 0 #define GF_LOG_LRU_BUFSIZE_MAX 20 diff --git a/tests/basic/tier/legacy-many.t b/tests/basic/tier/legacy-many.t new file mode 100644 index 00000000000..17275494aba --- /dev/null +++ b/tests/basic/tier/legacy-many.t @@ -0,0 +1,122 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +LAST_BRICK=3 +CACHE_BRICK_FIRST=4 +CACHE_BRICK_LAST=5 +DEMOTE_TIMEOUT=12 +PROMOTE_TIMEOUT=5 +MIGRATION_TIMEOUT=10 +DEMOTE_FREQ=60 +PROMOTE_FREQ=4 +TEST_DIR="test_files" +NUM_FILES=20 + + +# Grab md5sum without file path (failed attempt notifications are discarded) +function fingerprint { + md5sum $1 2> /dev/null | grep --only-matching -m 1 '^[0-9a-f]*' +} + +# Create a large number of files. Store their md5 signatures. +function create_many_files { + mkdir ${TEST_DIR} + for i in `seq 1 $NUM_FILES`; do + dd if=/dev/urandom of=./${TEST_DIR}/i$i bs=1048576 count=1; + id[i]=$(fingerprint "./${TEST_DIR}/i$i"); + done +} + +function confirm_tier_removed { + $CLI system getspec $V0 | grep $1 + if [ $? == 0 ]; then + echo "1" + else + echo "0" + fi +} + +function confirm_vol_stopped { + $CLI volume stop $1 + if [ $? == 0 ]; then + echo "0" + else + echo "1" + fi +} + +function check_counters { + index=0 + ret=0 + rm -f /tmp/tc*.txt + echo "0" > /tmp/tc2.txt + + $CLI volume rebalance $V0 tier status | grep localhost > /tmp/tc.txt + + promote=`cat /tmp/tc.txt |awk '{print $2}'` + demote=`cat /tmp/tc.txt |awk '{print $3}'` + if [ "${promote}" != "${1}" ]; then + echo "1" > /tmp/tc2.txt + + elif [ "${demote}" != "${2}" ]; then + echo "2" > /tmp/tc2.txt + fi + + # temporarily disable non-Linux tests. + case $OSTYPE in + NetBSD | FreeBSD | Darwin) + echo "0" > /tmp/tc2.txt + ;; + esac + cat /tmp/tc2.txt +} + +function read_all { + for file in * + do + cat $file + done +} + +cleanup + +TEST glusterd +TEST pidof glusterd + +# Create distributed replica volume +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..$LAST_BRICK} +TEST $CLI volume start $V0 + +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 features.ctr-enabled on + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; + +# Create a number of "legacy" files before attaching tier +cd $M0 +TEST create_many_files +wait + +# Attach tier +TEST $CLI volume attach-tier $V0 replica 2 $H0:$B0/${V0}$CACHE_BRICK_FIRST $H0:$B0/${V0}$CACHE_BRICK_LAST +TEST $CLI volume rebalance $V0 tier status + +TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ +TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ +TEST $CLI volume set $V0 cluster.read-freq-threshold 0 +TEST $CLI volume set $V0 cluster.write-freq-threshold 0 + +# Read "legacy" files +drop_cache $M0 +cd ${TEST_DIR} +TEST read_all + +# Test to make sure files were promoted as expected +sleep $DEMOTE_TIMEOUT +EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 20 0 + +cd; +cleanup diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 69c64816909..9c45cd73bfd 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -2568,6 +2568,118 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag, return 0; } + + +/* Function for doing a named lookup on file inodes during an attach tier + * So that a hardlink lookup heal i.e gfid to parent gfid lookup heal + * happens on pre-existing data. This is required so that the ctr database has + * hardlinks of all the exisitng file in the volume. CTR xlator on the + * brick/server side does db update/insert of the hardlink on a namelookup. + * Currently the namedlookup is done synchronous to the fixlayout that is + * triggered by attach tier. This is not performant, adding more time to + * fixlayout. The performant approach is record the hardlinks on a compressed + * datastore and then do the namelookup asynchronously later, giving the ctr db + * eventual consistency + * */ +int +gf_fix_layout_tier_attach_lookup (xlator_t *this, + loc_t *parent_loc, + gf_dirent_t *file_dentry) +{ + int ret = -1; + dict_t *lookup_xdata = NULL; + dht_conf_t *conf = NULL; + loc_t file_loc = {0,}; + struct iatt iatt = {0,}; + + GF_VALIDATE_OR_GOTO ("tier", this, out); + + GF_VALIDATE_OR_GOTO (this->name, parent_loc, out); + + GF_VALIDATE_OR_GOTO (this->name, file_dentry, out); + + GF_VALIDATE_OR_GOTO (this->name, this->private, out); + + if (!parent_loc->inode) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s parent is NULL", parent_loc->path, + file_dentry->d_name); + goto out; + } + + + conf = this->private; + + loc_wipe (&file_loc); + + if (gf_uuid_is_null (file_dentry->d_stat.ia_gfid)) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s gfid not present", parent_loc->path, + file_dentry->d_name); + goto out; + } + + gf_uuid_copy (file_loc.gfid, file_dentry->d_stat.ia_gfid); + + if (gf_uuid_is_null (parent_loc->gfid)) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s/%s" + " gfid not present", parent_loc->path, + file_dentry->d_name); + goto out; + } + + gf_uuid_copy (file_loc.pargfid, parent_loc->gfid); + + + ret = dht_build_child_loc (this, &file_loc, parent_loc, + file_dentry->d_name); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Child loc build failed"); + ret = -1; + goto out; + } + + lookup_xdata = dict_new (); + if (!lookup_xdata) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed creating lookup dict for %s", + file_dentry->d_name); + goto out; + } + + ret = dict_set_int32 (lookup_xdata, CTR_ATTACH_TIER_LOOKUP, 1); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "Failed to set lookup flag"); + goto out; + } + + gf_uuid_copy (file_loc.parent->gfid, parent_loc->gfid); + + /* Sending lookup to cold tier only */ + ret = syncop_lookup (conf->subvolumes[0], &file_loc, &iatt, + NULL, lookup_xdata, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, + "%s lookup failed", file_loc.path); + goto out; + } + + ret = 0; + +out: + + loc_wipe (&file_loc); + + if (lookup_xdata) + dict_unref (lookup_xdata); + + return ret; +} + + int gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *fix_layout, dict_t *migrate_data) @@ -2583,6 +2695,8 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, struct iatt iatt = {0,}; inode_t *linked_inode = NULL, *inode = NULL; + + ret = syncop_lookup (this, loc, &iatt, NULL, NULL, NULL); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s", @@ -2644,10 +2758,22 @@ gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) continue; + if (!IA_ISDIR (entry->d_stat.ia_type)) { + + /* If its a fix layout during the attach + * tier operation do lookups on files + * on cold subvolume so that there is a + * CTR DB Lookup Heal triggered on existing + * data. + * */ + if (defrag->cmd == + GF_DEFRAG_CMD_START_TIER) { + gf_fix_layout_tier_attach_lookup + (this, loc, entry); + } - if (!IA_ISDIR (entry->d_stat.ia_type)) continue; - + } loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c index 258b56ba541..090e54ca319 100644 --- a/xlators/features/changetimerecorder/src/changetimerecorder.c +++ b/xlators/features/changetimerecorder/src/changetimerecorder.c @@ -214,7 +214,6 @@ ctr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_boolean_t _is_heal_needed = _gf_false; CTR_IS_DISABLED_THEN_GOTO(this, out); - CTR_IF_INTERNAL_FOP_THEN_GOTO (frame, dict, out); /* if the lookup failed lookup dont do anything*/ if (op_ret == -1) { diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h index 244427230b4..51dec44598d 100644 --- a/xlators/features/changetimerecorder/src/ctr-helper.h +++ b/xlators/features/changetimerecorder/src/ctr-helper.h @@ -289,10 +289,12 @@ do {\ * */ #define CTR_IS_INTERNAL_FOP(frame, dict)\ (AFR_SELF_HEAL_FOP (frame) \ - || REBALANCE_FOP (frame) \ - || TIER_REBALANCE_FOP (frame) \ + || (REBALANCE_FOP (frame) && dict && \ + !dict_get (dict, CTR_ATTACH_TIER_LOOKUP)) \ + || (TIER_REBALANCE_FOP (frame) && dict && \ + !dict_get (dict, CTR_ATTACH_TIER_LOOKUP)) \ || (dict && \ - dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY))) + dict_get (dict, GLUSTERFS_INTERNAL_FOP_KEY))) /** * ignore internal fops for all clients except AFR self-heal daemon |