summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2015-10-13 15:02:00 +0530
committerDan Lambright <dlambrig@redhat.com>2015-10-15 06:08:37 -0700
commitbd71446b25aefe066ca18a28d73d777774ab7f87 (patch)
treeb83f7444288e78c49e3548a834a916623e11aed6
parent816ca94f5dd49f34f395caf501de3c71f0ba113d (diff)
cluster/dht : Do not migrate files with POSIX locks held
dht_migrate_file does not migrate file locks to the dst file. Any locks held on the source file are lost once the migration is complete. This issue is magnified in the case of a tier volume as file migrations occur more frequently and repeatedly as compared to a DHT rebalance. The fix makes 2 changes: 1. Before starting the actual migration process, check if there are any locks held on the file. If yes, do not migrate the file. 2. The rebalance process tries to lock on the entire file just before moving into the Phase 2 of the file migration. If the lock acquisition fails, the file migration does not proceed. If the lock is granted, the file migration proceeds. This still leaves a small window where conflicting locks can be granted to different clients. If client1 requests a lock on the src file just after it is converted to a linkto file and client2 requests a lock on the dst data file, they will both be granted, but all FOPs will be redirected to the dst data file. This issue will be taken up in a subsequent patch. Change-Id: I8c895fc3cced50dd2894259d40a827c7b43d58ac BUG: 1271148 Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: http://review.gluster.org/12347 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com>
-rw-r--r--tests/basic/tier/file_lock.c75
-rwxr-xr-xtests/basic/tier/locked_file_migration.t112
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c105
3 files changed, 282 insertions, 10 deletions
diff --git a/tests/basic/tier/file_lock.c b/tests/basic/tier/file_lock.c
new file mode 100644
index 00000000000..730cca92e42
--- /dev/null
+++ b/tests/basic/tier/file_lock.c
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+
+void usage (void)
+{
+
+ printf ("Usage: testlock <filepath> [R|W]\n");
+ return;
+}
+
+
+int main (int argc, char *argv[])
+{
+ char *file_path = NULL;
+ int fd = -1;
+ struct flock lock = {0};
+ int ret = -1;
+ int c = 0;
+
+ if (argc != 3) {
+ usage ();
+ exit (1);
+ }
+
+ file_path = argv[1];
+ fd = open (file_path, O_RDWR);
+
+ if (-1 == fd) {
+ printf ("Failed to open file %s. %m\n", file_path);
+ exit (1);
+ }
+
+ /* TODO: Check for invalid input*/
+
+ if (!strcmp (argv[2], "W")) {
+ lock.l_type = F_WRLCK;
+ printf("Taking write lock\n");
+
+ } else {
+ lock.l_type = F_RDLCK;
+ printf("Taking read lock\n");
+ }
+
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = getpid ();
+
+
+ printf ("Acquiring lock on %s\n", file_path);
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret) {
+ printf ("Failed to acquire lock on %s (%m)\n", file_path);
+ close (fd);
+ exit (1);
+ }
+
+ sleep(10);
+
+ /*Unlock*/
+
+ printf ("Releasing lock on %s\n", file_path);
+ lock.l_type = F_UNLCK;
+ ret = fcntl (fd, F_SETLK, &lock);
+ if (ret) {
+ printf ("Failed to release lock on %s (%m)\n", file_path);
+ }
+
+ close (fd);
+ return ret;
+
+}
diff --git a/tests/basic/tier/locked_file_migration.t b/tests/basic/tier/locked_file_migration.t
new file mode 100755
index 00000000000..c3ba1b27749
--- /dev/null
+++ b/tests/basic/tier/locked_file_migration.t
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+NUM_BRICKS=3
+DEMOTE_FREQ=7
+PROMOTE_FREQ=30
+DEMOTE_TIMEOUT=15
+
+TEST_STR="Testing write and truncate fops on tier migration"
+
+function is_sticky_set () {
+ echo $1
+ if [ -k $1 ];
+ then
+ echo "yes"
+ else
+ echo "no"
+ fi
+}
+
+function check_counters {
+ index=0
+ ret=0
+ rm -f /tmp/tc*.txt
+ echo "0" > /tmp/tc2.txt
+ $CLI volume rebalance $V0 tier status | grep localhost > /tmp/tc.txt
+
+ promote=`cat /tmp/tc.txt |awk '{print $2}'`
+ demote=`cat /tmp/tc.txt |awk '{print $3}'`
+ if [ "${promote}" != "${1}" ]; then
+ echo "1" > /tmp/tc2.txt
+
+ elif [ "${demote}" != "${2}" ]; then
+ echo "2" > /tmp/tc2.txt
+ fi
+
+ # temporarily disable non-Linux tests.
+ case $OSTYPE in
+ NetBSD | FreeBSD | Darwin)
+ echo "0" > /tmp/tc2.txt
+ ;;
+ esac
+ cat /tmp/tc2.txt
+}
+
+
+# Creates a tiered volume with pure distribute hot and cold tiers
+# Both hot and cold tiers will have an equal number of bricks.
+
+function create_dist_tier_vol () {
+ mkdir $B0/cold
+ mkdir $B0/hot
+ TEST $CLI volume create $V0 $H0:$B0/cold/${V0}{0..$1}
+ TEST $CLI volume set $V0 performance.quick-read off
+ TEST $CLI volume set $V0 performance.io-cache off
+ TEST $CLI volume set $V0 features.ctr-enabled on
+ TEST $CLI volume start $V0
+ TEST $CLI volume attach-tier $V0 $H0:$B0/hot/${V0}{0..$1}
+ TEST $CLI volume set $V0 cluster.tier-demote-frequency $DEMOTE_FREQ
+ TEST $CLI volume set $V0 cluster.tier-promote-frequency $PROMOTE_FREQ
+
+#We don't want promotes to happen in this test
+ TEST $CLI volume set $V0 cluster.read-freq-threshold 10
+ TEST $CLI volume set $V0 cluster.write-freq-threshold 10
+}
+
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+
+# Create and start a tiered volume
+create_dist_tier_vol $NUM_BRICKS
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+build_tester $(dirname $0)/file_lock.c -o file_lock
+cp $(dirname $0)/file_lock $M0/file_lock
+
+# The files will be created on the hot tier
+touch $M0/dir1/FILE1
+touch $M0/dir1/FILE2
+
+# For FILE1, take a POSIX write lock on the entire file.
+# Don't take a lock on FILE2
+
+./file_lock $M0/dir1/FILE1 W &
+
+sleep $DEMOTE_FREQ
+
+# Wait for the tier process to demote the file
+# Only FILE2 and file_lock should be demoted
+# FILE1 should be skipped because of the lock held
+# on it
+
+EXPECT_WITHIN $DEMOTE_TIMEOUT "0" check_counters 0 2
+
+sleep 10
+
+rm $(dirname $0)/file_lock
+
+cleanup;
+
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index a6c14b085fa..b3c25ba9ee2 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -389,6 +389,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
gf_defrag_info_t *defrag)
{
int ret = -1;
+ int lock_count = 0;
if (IA_ISDIR (stbuf->ia_type)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
@@ -399,10 +400,30 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
goto out;
}
+ ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (lock_count) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration", loc->path);
+ ret = -1;
+ goto out;
+ }
+
if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
ret = 0;
goto out;
}
+
if (stbuf->ia_nlink > 1) {
/* support for decomission */
if (flags == GF_DHT_MIGRATE_HARDLINK) {
@@ -437,6 +458,7 @@ out:
return ret;
}
+
static int
__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
fd_t **dst_fd, dict_t *xattr)
@@ -993,7 +1015,6 @@ out:
return ret;
}
-
static int
__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd,
xlator_t *from, ia_prot_t *src_ia_prot)
@@ -1084,11 +1105,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
dht_conf_t *conf = this->private;
int rcvd_enoent_from_src = 0;
struct gf_flock flock = {0, };
+ struct gf_flock plock = {0, };
loc_t tmp_loc = {0, };
gf_boolean_t locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
int lk_ret = -1;
gf_defrag_info_t *defrag = NULL;
gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
defrag = conf->defrag;
if (!defrag)
@@ -1110,6 +1134,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
+ /* Don't migrate files with POSIX locks */
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set "GLUSTERFS_POSIXLK_COUNT" key in dict", loc->path);
+ goto out;
+ }
+
flock.l_type = F_WRLCK;
tmp_loc.inode = inode_ref (loc->inode);
@@ -1162,6 +1197,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
/* TODO: move all xattr related operations to fd based operations */
ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
if (ret < 0) {
@@ -1179,6 +1215,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret)
goto out;
+ clean_dst = _gf_true;
+
ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
if (ret) {
@@ -1211,6 +1249,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
file_has_holes = 1;
+
/* All I/O happens in this function */
ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes);
@@ -1219,15 +1258,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s: failed to migrate data",
loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: "
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (-ret));
- }
ret = -1;
goto out;
@@ -1257,6 +1287,35 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
+
+ ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ p_locked = _gf_true;
+
/* source would have both sticky bit and sgid bit set, reset it to 0,
and set the source permission on destination, if it was not set
prior to setting rebalance-modes in source */
@@ -1293,6 +1352,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = -1;
}
+ clean_dst = _gf_false;
+
/* Posix acls are not set on DHT linkto files as part of the initial
* initial xattrs set on the dst file, so these need
* to be set on the dst file after the linkto attrs are removed.
@@ -1438,6 +1499,18 @@ out:
}
}
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
+
if (locked) {
flock.l_type = F_UNLCK;
@@ -1451,6 +1524,18 @@ out:
}
}
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+
+ if (lk_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s",
+ loc->path, from->name);
+ }
+ }
+
if (dict)
dict_unref (dict);