summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c105
1 files changed, 95 insertions, 10 deletions
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index a6c14b085fa..b3c25ba9ee2 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -389,6 +389,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
gf_defrag_info_t *defrag)
{
int ret = -1;
+ int lock_count = 0;
if (IA_ISDIR (stbuf->ia_type)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
@@ -399,10 +400,30 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
goto out;
}
+ ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file", loc->path);
+ ret = -1;
+ goto out;
+ }
+
+ if (lock_count) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration", loc->path);
+ ret = -1;
+ goto out;
+ }
+
if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
ret = 0;
goto out;
}
+
if (stbuf->ia_nlink > 1) {
/* support for decomission */
if (flags == GF_DHT_MIGRATE_HARDLINK) {
@@ -437,6 +458,7 @@ out:
return ret;
}
+
static int
__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
fd_t **dst_fd, dict_t *xattr)
@@ -993,7 +1015,6 @@ out:
return ret;
}
-
static int
__dht_migration_cleanup_src_file (xlator_t *this, loc_t *loc, fd_t *fd,
xlator_t *from, ia_prot_t *src_ia_prot)
@@ -1084,11 +1105,14 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
dht_conf_t *conf = this->private;
int rcvd_enoent_from_src = 0;
struct gf_flock flock = {0, };
+ struct gf_flock plock = {0, };
loc_t tmp_loc = {0, };
gf_boolean_t locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
int lk_ret = -1;
gf_defrag_info_t *defrag = NULL;
gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
defrag = conf->defrag;
if (!defrag)
@@ -1110,6 +1134,17 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
+ /* Don't migrate files with POSIX locks */
+ ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set "GLUSTERFS_POSIXLK_COUNT" key in dict", loc->path);
+ goto out;
+ }
+
flock.l_type = F_WRLCK;
tmp_loc.inode = inode_ref (loc->inode);
@@ -1162,6 +1197,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+
/* TODO: move all xattr related operations to fd based operations */
ret = syncop_listxattr (from, loc, &xattr, NULL, NULL);
if (ret < 0) {
@@ -1179,6 +1215,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (ret)
goto out;
+ clean_dst = _gf_true;
+
ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
if (ret) {
@@ -1211,6 +1249,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
file_has_holes = 1;
+
/* All I/O happens in this function */
ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
stbuf.ia_size, file_has_holes);
@@ -1219,15 +1258,6 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
DHT_MSG_MIGRATE_FILE_FAILED,
"Migrate file failed: %s: failed to migrate data",
loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "Migrate file failed: "
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (-ret));
- }
ret = -1;
goto out;
@@ -1257,6 +1287,35 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
goto out;
}
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
+
+ ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ p_locked = _gf_true;
+
/* source would have both sticky bit and sgid bit set, reset it to 0,
and set the source permission on destination, if it was not set
prior to setting rebalance-modes in source */
@@ -1293,6 +1352,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
ret = -1;
}
+ clean_dst = _gf_false;
+
/* Posix acls are not set on DHT linkto files as part of the initial
* initial xattrs set on the dst file, so these need
* to be set on the dst file after the linkto attrs are removed.
@@ -1438,6 +1499,18 @@ out:
}
}
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
+
if (locked) {
flock.l_type = F_UNLCK;
@@ -1451,6 +1524,18 @@ out:
}
}
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk (from, src_fd, F_SETLK, &plock, NULL, NULL);
+
+ if (lk_ret < 0) {
+ gf_msg (this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s",
+ loc->path, from->name);
+ }
+ }
+
if (dict)
dict_unref (dict);