diff options
author | Xavier Hernandez <xhernandez@datalab.es> | 2016-06-09 16:53:19 +0200 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2016-06-15 02:04:29 -0700 |
commit | 4c08d36e7c6f189499f2340eb529b7f4ceff57f6 (patch) | |
tree | 825d74e65a7bfad50bace21df80cfb2545083814 | |
parent | e740b700975bdb1014ede99c65731095bdf81a72 (diff) |
cluster/dht: Fix unsafe iteration on inode->fd_list
When DHT traverses the inode->fd_list, it does that in an unsafe
way that can generate races with fd_unref() called from other threads.
This patch fixes this problem taking the inode->lock and adding a
reference to the fd while it's being used outside of the mutex
protected region.
A minor change in storage/posix has been done to also access the
inode->fd_list in a safe way.
Change-Id: I10d469ca6a8f76e950a8c9779ae9c8b70f88ef93
BUG: 1344340
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/14682
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 86 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 2 |
2 files changed, 71 insertions, 17 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 0a5e1128db1..590d0043507 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -1103,6 +1103,7 @@ dht_migration_complete_check_task (void *data) dht_conf_t *conf = NULL; inode_t *inode = NULL; fd_t *iter_fd = NULL; + fd_t *tmp = NULL; uint64_t tmp_miginfo = 0; dht_migrate_info_t *miginfo = NULL; int open_failed = 0; @@ -1228,27 +1229,46 @@ dht_migration_complete_check_task (void *data) goto out; } + /* perform 'open()' on all the fd's present on the inode */ + if (tmp_loc.path == NULL) { + inode_path (inode, NULL, &path); + if (path) + tmp_loc.path = path; + } + + LOCK(&inode->lock); + if (list_empty (&inode->fd_list)) - goto out; + goto unlock; /* perform open as root:root. There is window between linkfile * creation(root:root) and setattr with the correct uid/gid */ SYNCTASK_SETID(0, 0); - /* perform 'open()' on all the fd's present on the inode */ - tmp_loc.inode = inode; - inode_path (inode, NULL, &path); - if (path) - tmp_loc.path = path; - - list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + /* It's possible that we are the last user of iter_fd after each + * iteration. In this case the fd_unref() of iter_fd at the end of + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ + list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) { if (fd_is_anonymous (iter_fd)) continue; + if (dht_fd_open_on_dst (this, iter_fd, dst_node)) continue; + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ + __fd_ref(iter_fd); + + UNLOCK(&inode->lock); + /* flags for open are stripped down to allow following the * new location of the file, otherwise we can get EEXIST or * truncate the file again as rebalance is moving the data */ @@ -1270,15 +1290,23 @@ dht_migration_complete_check_task (void *data) } else { dht_fd_ctx_set (this, iter_fd, dst_node); } + + fd_unref(iter_fd); + + LOCK(&inode->lock); } SYNCTASK_SETID (frame->root->uid, frame->root->gid); if (open_failed) { ret = -1; - goto out; + goto unlock; } ret = 0; + +unlock: + UNLOCK(&inode->lock); + out: loc_wipe (&tmp_loc); @@ -1351,6 +1379,7 @@ dht_rebalance_inprogress_task (void *data) dht_conf_t *conf = NULL; inode_t *inode = NULL; fd_t *iter_fd = NULL; + fd_t *tmp = NULL; int open_failed = 0; uint64_t tmp_miginfo = 0; dht_migrate_info_t *miginfo = NULL; @@ -1461,24 +1490,44 @@ dht_rebalance_inprogress_task (void *data) } ret = 0; + if (tmp_loc.path == NULL) { + inode_path (inode, NULL, &path); + if (path) + tmp_loc.path = path; + } + + LOCK(&inode->lock); + if (list_empty (&inode->fd_list)) - goto done; + goto unlock; /* perform open as root:root. There is window between linkfile * creation(root:root) and setattr with the correct uid/gid */ SYNCTASK_SETID (0, 0); - inode_path (inode, NULL, &path); - if (path) - tmp_loc.path = path; - - list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + /* It's possible that we are the last user of iter_fd after each + * iteration. In this case the fd_unref() of iter_fd at the end of + * the loop will cause the destruction of the fd. So we need to + * iterate the list safely because iter_fd cannot be trusted. + */ + list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) { if (fd_is_anonymous (iter_fd)) continue; if (dht_fd_open_on_dst (this, iter_fd, dst_node)) continue; + + /* We need to release the inode->lock before calling + * syncop_open() to avoid possible deadlocks. However this + * can cause the iter_fd to be released by other threads. + * To avoid this, we take a reference before releasing the + * lock. + */ + __fd_ref(iter_fd); + + UNLOCK(&inode->lock); + /* flags for open are stripped down to allow following the * new location of the file, otherwise we can get EEXIST or * truncate the file again as rebalance is moving the data */ @@ -1501,16 +1550,21 @@ dht_rebalance_inprogress_task (void *data) dht_fd_ctx_set (this, iter_fd, dst_node); } + fd_unref(iter_fd); + + LOCK(&inode->lock); } SYNCTASK_SETID (frame->root->uid, frame->root->gid); +unlock: + UNLOCK(&inode->lock); + if (open_failed) { ret = -1; goto out; } -done: ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index c6bf5174186..2320bf13449 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -4388,7 +4388,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { - if (!list_empty (&loc->inode->fd_list)) { + if (!fd_list_empty (loc->inode)) { ret = dict_set_uint32 (dict, (char *)name, 1); if (ret < 0) gf_msg (this->name, GF_LOG_WARNING, 0, |