summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/basic/tier/file_lock.c75
-rwxr-xr-xtests/basic/tier/locked_file_migration.t112
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c106
3 files changed, 282 insertions, 11 deletions
diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c
new file mode 100644
index 00000000000..730cca92e42
--- /dev/null
+++ b/tests/basic/tier/file_lock.c
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+
+void usage (void)
+{
+
+ printf ("Usage: testlock <filepath> [R|W]\n");
+ return;
+}
+
+
+int main (int argc, char *argv[])
+{
+ char *file_path = NULL;
+ int fd = -1;
+ struct flock lock = {0};
+ int ret = -1;
+ int c = 0;
+
+ if (argc != 3) {
+ usage ();
+ exit (1);
+ }
+
+ file_path = argv[1];
+ fd = open (file_path, O_RDWR);
+
+ if (-1 == fd) {
+ printf ("Failed to open file %s. %m\n", file_path);
+ exit (1);
+ }
+
+ /* TODO: Check for invalid input*/
+
+ if (!strcmp (argv[2], "W")) {
+ lock.l_type = F_WRLCK;
+ printf("Taking write lock\n");
+
+ } else {
+ lock.l_type = F_RDLCK;
+ printf("Taking read lock\n");
+ }
+
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = getpid ();
+
+
+ printf ("Acquiring lock on %s\n", file_path);
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret) {
+ printf ("Failed to acquire lock on %s (%m)\n", file_path);
+ close (fd);
+ exit (1);
+ }
+
+ sleep(10);
+
+ /*Unlock*/
+
+ printf ("Releasing lock on %s\n", file_path);
+ lock.l_type = F_UNLCK;
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret) {
+ printf ("Failed to release lock on %s (%m)\n", file_path);
+ }
+
+ close (fd);
+ return ret;
+
+}
diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t
new file mode 100755
index 00000000000..c3ba1b27749
--- /dev/null
+++ b/tests/basic/tier/locked_file_migration.t
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+NUM_BRICKS=3
+DEMOTE_FREQ=7
+PROMOTE_FREQ=30
+DEMOTE_TIMEOUT=15
+
+TEST_STR="Testing write and truncate fops on tier migration"
+
+function is_sticky_set () {
+ echo $1
+ if [ -k $1 ];
+ then
+ echo "yes"
+ else
+ echo "no"
+ fi
+}
+
+function check_counters {
+ index=0
+ ret=0
+ rm -f /tmp/tc*.txt
+ echo "0" > /tmp/tc2.txt
+ $CLI volume rebalance $V0 tier status | grep localhost > /tmp/tc.txt
+
+ promote=`cat /tmp/tc.txt |awk '{print $2}'`
+ demote=`cat /tmp/tc.txt |awk '{print $3}'`
+ if [ "${promote}" != "${1}" ]; then
+ echo "1" > /tmp/tc2.txt
+
+ elif [ "${demote}" != "${2}" ]; then
+ echo "2" > /tmp/tc2.txt
+ fi
+
+ # temporarily disable non-Linux tests.
+ case $OSTYPE in
+ NetBSD | FreeBSD | Darwin)
+ echo "0" > /tmp/tc2.txt
+ ;;
+ esac
+ cat /tmp/tc2.txt
+}
+
+
+# Creates a tiered volume with pure distribute hot and cold tiers
+# Both hot and cold tiers will have an equal number of bricks.
+
+function create_dist_tier_vol () {
+ mkdir $B0/cold
+ mkdir $B0/hot
+ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
+ TEST $CLI volume set $V0 performance.quick-read off
+ TEST $CLI volume set $V0 performance.io-cache off
+ TEST $CLI volume set $V0 features.ctr-enabled on
+ TEST $CLI volume start $V0
+ TEST $CLI volume attach-tier $V0 $H0:$B0/hot/${V0}{0..$1}
+ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
+ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
+
+#We don't want promotes to happen in this test
+ TEST $CLI volume set $V0 cluster.read-freq-threshold 10
+ TEST $CLI volume set $V0 cluster.write-freq-threshold 10
+}
+
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+
+# Create and start a tiered volume
+create_dist_tier_vol $NUM_BRICKS
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+build_tester $(dirname $0)/file_lock.c -o file_lock
+cp $(dirname $0)/file_lock $M0/file_lock
+
+# The files will be created on the hot tier
+touch $M0/dir1/FILE1
+touch $M0/dir1/FILE2
+
+# For FILE1, take a POSIX write lock on the entire file.
+# Don't take a lock on FILE2
+
+./file_lock $M0/dir1/FILE1 W &
+
+sleep $DEMOTE_FREQ
+
+# Wait for the tier process to demote the file
+# Only FILE2 and file_lock should be demoted
+# FILE1 should be skipped because of the lock held
+# on it
+
+EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2
+
+sleep 10
+
+rm $(dirname $0)/file_lock
+
+cleanup;
+
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 9c45cd73bfd..1e98142e9ec 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -394,6 +394,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
gf_defrag_info_t *defrag)
{
int ret = -1;
+ int lock_count = 0;
if (IA_ISDIR (stbuf->ia_type)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
@@ -404,10 +405,30 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
goto out;
}
+ ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (lock_count) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration", loc->path);
+ ret = -1;
+ goto out;
+ }
+
if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
ret = 0;
goto out;
}
+
if (stbuf->ia_nlink > 1) {
/* support for decomission */
if (flags == GF_DHT_MIGRATE_HARDLINK) {
@@ -442,7 +463,7 @@ out:
return ret;
}
-static inline int
+static int
__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
fd_t **dst_fd, dict_t *xattr)
{
@@ -998,7 +1019,6 @@ out:
return ret;
}
-
static int
__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd,
xlator_t *from, ia_prot_t *src_ia_prot)
@@ -1089,11 +1109,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
dht_conf_t *conf = this->private;
int rcvd_enoent_from_src = 0;
struct gf_flock flock = {0, };
+ struct gf_flock plock = {0, };
loc_t tmp_loc = {0, };
gf_boolean_t locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
int lk_ret = -1;
gf_defrag_info_t *defrag = NULL;
gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
defrag = conf->defrag;
if (!defrag)
@@ -1115,6 +1138,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
+ /* Don't migrate files with POSIX locks */
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set "GLUSTERFS_POSIXLK_COUNT" key in dict", loc->path);
+ goto out;
+ }
+
flock.l_type = F_WRLCK;
tmp_loc.inode = inode_ref (loc->inode);
@@ -1167,6 +1201,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
/* TODO: move all xattr related operations to fd based operations */
ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
if (ret < 0) {
@@ -1184,6 +1219,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret)
goto out;
+ clean_dst = _gf_true;
+
ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
if (ret) {
@@ -1216,6 +1253,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
file_has_holes = 1;
+
/* All I/O happens in this function */
ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes);
@@ -1224,15 +1262,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s: failed to migrate data",
loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: "
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (-ret));
- }
ret = -1;
goto out;
@@ -1262,6 +1291,35 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
+
+ ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ p_locked = _gf_true;
+
/* source would have both sticky bit and sgid bit set, reset it to 0,
and set the source permission on destination, if it was not set
prior to setting rebalance-modes in source */
@@ -1298,6 +1356,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = -1;
}
+ clean_dst = _gf_false;
+
/* Posix acls are not set on DHT linkto files as part of the initial
* initial xattrs set on the dst file, so these need
* to be set on the dst file after the linkto attrs are removed.
@@ -1443,6 +1503,18 @@ out:
}
}
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
+
if (locked) {
flock.l_type = F_UNLCK;
@@ -1456,6 +1528,18 @@ out:
}
}
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+
+ if (lk_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s",
+ loc->path, from->name);
+ }
+ }
+
if (dict)
dict_unref (dict);