diff options
-rw-r--r-- | tests/basic/tier/file_lock.c | 75 | ||||
-rwxr-xr-x | tests/basic/tier/locked_file_migration.t | 112 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 106 |
3 files changed, 282 insertions, 11 deletions
diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c new file mode 100644 index 00000000000..730cca92e42 --- /dev/null +++ b/tests/basic/tier/file_lock.c @@ -0,0 +1,75 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> + + +void usage (void) +{ + + printf ("Usage: testlock <filepath> [R|W]\n"); + return; +} + + +int main (int argc, char *argv[]) +{ + char *file_path = NULL; + int fd = -1; + struct flock lock = {0}; + int ret = -1; + int c = 0; + + if (argc != 3) { + usage (); + exit (1); + } + + file_path = argv[1]; + fd = open (file_path, O_RDWR); + + if (-1 == fd) { + printf ("Failed to open file %s. %m\n", file_path); + exit (1); + } + + /* TODO: Check for invalid input*/ + + if (!strcmp (argv[2], "W")) { + lock.l_type = F_WRLCK; + printf("Taking write lock\n"); + + } else { + lock.l_type = F_RDLCK; + printf("Taking read lock\n"); + } + + lock.l_whence = SEEK_SET; + lock.l_start = 0; + lock.l_len = 0; + lock.l_pid = getpid (); + + + printf ("Acquiring lock on %s\n", file_path); + ret = fcntl (fd, F_SETLK, &lock); + if (ret) { + printf ("Failed to acquire lock on %s (%m)\n", file_path); + close (fd); + exit (1); + } + + sleep(10); + + /*Unlock*/ + + printf ("Releasing lock on %s\n", file_path); + lock.l_type = F_UNLCK; + ret = fcntl (fd, F_SETLK, &lock); + if (ret) { + printf ("Failed to release lock on %s (%m)\n", file_path); + } + + close (fd); + return ret; + +} diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t new file mode 100755 index 00000000000..c3ba1b27749 --- /dev/null +++ b/tests/basic/tier/locked_file_migration.t @@ -0,0 +1,112 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + + +NUM_BRICKS=3 +DEMOTE_FREQ=7 +PROMOTE_FREQ=30 +DEMOTE_TIMEOUT=15 + +TEST_STR="Testing write and truncate fops on tier migration" + +function is_sticky_set () { + echo $1 + if [ -k $1 ]; + then + echo "yes" + else + echo "no" + fi +} + +function check_counters { + index=0 + ret=0 + rm -f /tmp/tc*.txt + echo "0" > /tmp/tc2.txt + $CLI volume rebalance $V0 tier status | grep localhost > /tmp/tc.txt + + promote=`cat /tmp/tc.txt |awk '{print $2}'` + demote=`cat /tmp/tc.txt |awk '{print $3}'` + if [ "${promote}" != "${1}" ]; then + echo "1" > /tmp/tc2.txt + + elif [ "${demote}" != "${2}" ]; then + echo "2" > /tmp/tc2.txt + fi + + # temporarily disable non-Linux tests. + case $OSTYPE in + NetBSD | FreeBSD | Darwin) + echo "0" > /tmp/tc2.txt + ;; + esac + cat /tmp/tc2.txt +} + + +# Creates a tiered volume with pure distribute hot and cold tiers +# Both hot and cold tiers will have an equal number of bricks. + +function create_dist_tier_vol () { + mkdir $B0/cold + mkdir $B0/hot + TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1} + TEST $CLI volume set $V0 performance.quick-read off + TEST $CLI volume set $V0 performance.io-cache off + TEST $CLI volume set $V0 features.ctr-enabled on + TEST $CLI volume start $V0 + TEST $CLI volume attach-tier $V0 $H0:$B0/hot/${V0}{0..$1} + TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ + TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ + +#We don't want promotes to happen in this test + TEST $CLI volume set $V0 cluster.read-freq-threshold 10 + TEST $CLI volume set $V0 cluster.write-freq-threshold 10 +} + + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + + +# Create and start a tiered volume +create_dist_tier_vol $NUM_BRICKS + +# Mount FUSE +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +TEST mkdir $M0/dir1 +build_tester $(dirname $0)/file_lock.c -o file_lock +cp $(dirname $0)/file_lock $M0/file_lock + +# The files will be created on the hot tier +touch $M0/dir1/FILE1 +touch $M0/dir1/FILE2 + +# For FILE1, take a POSIX write lock on the entire file. +# Don't take a lock on FILE2 + +./file_lock $M0/dir1/FILE1 W & + +sleep $DEMOTE_FREQ + +# Wait for the tier process to demote the file +# Only FILE2 and file_lock should be demoted +# FILE1 should be skipped because of the lock held +# on it + +EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2 + +sleep 10 + +rm $(dirname $0)/file_lock + +cleanup; + diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 9c45cd73bfd..1e98142e9ec 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -394,6 +394,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) { int ret = -1; + int lock_count = 0; if (IA_ISDIR (stbuf->ia_type)) { gf_msg (this->name, GF_LOG_WARNING, 0, @@ -404,10 +405,30 @@ __is_file_migratable (xlator_t *this, loc_t *loc, goto out; } + ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Unable to get lock count for file", loc->path); + ret = -1; + goto out; + } + + if (lock_count) { + gf_msg (this->name, GF_LOG_WARNING, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: File has locks." + " Skipping file migration", loc->path); + ret = -1; + goto out; + } + if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) { ret = 0; goto out; } + if (stbuf->ia_nlink > 1) { /* support for decomission */ if (flags == GF_DHT_MIGRATE_HARDLINK) { @@ -442,7 +463,7 @@ out: return ret; } -static inline int +static int __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf, fd_t **dst_fd, dict_t *xattr) { @@ -998,7 +1019,6 @@ out: return ret; } - static int __dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd, xlator_t *from, ia_prot_t *src_ia_prot) @@ -1089,11 +1109,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, dht_conf_t *conf = this->private; int rcvd_enoent_from_src = 0; struct gf_flock flock = {0, }; + struct gf_flock plock = {0, }; loc_t tmp_loc = {0, }; gf_boolean_t locked = _gf_false; + gf_boolean_t p_locked = _gf_false; int lk_ret = -1; gf_defrag_info_t *defrag = NULL; gf_boolean_t clean_src = _gf_false; + gf_boolean_t clean_dst = _gf_false; defrag = conf->defrag; if (!defrag) @@ -1115,6 +1138,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + + /* Don't migrate files with POSIX locks */ + ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t)); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: %s: failed to " + "set "GLUSTERFS_POSIXLK_COUNT" key in dict", loc->path); + goto out; + } + flock.l_type = F_WRLCK; tmp_loc.inode = inode_ref (loc->inode); @@ -1167,6 +1201,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* TODO: move all xattr related operations to fd based operations */ ret = syncop_listxattr (from, loc, &xattr, NULL, NULL); if (ret < 0) { @@ -1184,6 +1219,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (ret) goto out; + clean_dst = _gf_true; + ret = __dht_check_free_space (to, from, loc, &stbuf, flag); if (ret) { @@ -1216,6 +1253,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE)) file_has_holes = 1; + /* All I/O happens in this function */ ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd, stbuf.ia_size, file_has_holes); @@ -1224,15 +1262,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, DHT_MSG_MIGRATE_FILE_FAILED, "Migrate file failed: %s: failed to migrate data", loc->path); - /* reset the destination back to 0 */ - ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - DHT_MSG_MIGRATE_FILE_FAILED, - "Migrate file failed: " - "%s: failed to reset target size back to 0 (%s)", - loc->path, strerror (-ret)); - } ret = -1; goto out; @@ -1262,6 +1291,35 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, goto out; } + /* Lock the entire source file to prevent clients from taking a + lock on it as dht_lk does not handle file migration. + + This still leaves a small window where conflicting locks can + be granted to different clients. If client1 requests a blocking + lock on the src file, it will be granted after the migrating + process releases its lock. If client2 requests a lock on the dst + data file, it will also be granted, but all FOPs will be redirected + to the dst data file. + */ + + plock.l_type = F_WRLCK; + plock.l_start = 0; + plock.l_len = 0; + plock.l_whence = SEEK_SET; + + ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed:" + "%s: Failed to lock on %s", + loc->path, from->name); + ret = -1; + goto out; + } + + p_locked = _gf_true; + /* source would have both sticky bit and sgid bit set, reset it to 0, and set the source permission on destination, if it was not set prior to setting rebalance-modes in source */ @@ -1298,6 +1356,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, ret = -1; } + clean_dst = _gf_false; + /* Posix acls are not set on DHT linkto files as part of the initial * initial xattrs set on the dst file, so these need * to be set on the dst file after the linkto attrs are removed. @@ -1443,6 +1503,18 @@ out: } } + /* reset the destination back to 0 */ + if (clean_dst) { + ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "Migrate file failed: " + "%s: failed to reset target size back to 0", + loc->path); + } + } + if (locked) { flock.l_type = F_UNLCK; @@ -1456,6 +1528,18 @@ out: } } + if (p_locked) { + plock.l_type = F_UNLCK; + lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL); + + if (lk_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, -lk_ret, + DHT_MSG_MIGRATE_FILE_FAILED, + "%s: failed to unlock file on %s", + loc->path, from->name); + } + } + if (dict) dict_unref (dict); |