diff options
| author | Xavier Hernandez <xhernandez@datalab.es> | 2016-06-09 16:53:19 +0200 | 
|---|---|---|
| committer | Kaushal M <kaushal@redhat.com> | 2016-06-24 03:20:05 -0700 | 
| commit | ed16cfb0455e41ee39addf6b3cdacdbe0d98308a (patch) | |
| tree | 4f83e39f18fb229f8449f5fd4e568ed755ea9ddd | |
| parent | 8dddda654a60ed66792212b413fde113f4cf951f (diff) | |
cluster/dht: Fix unsafe iteration on inode->fd_list
When DHT traverses the inode->fd_list, it does that in an unsafe
way that can generate races with fd_unref() called from other threads.
This patch fixes this problem taking the inode->lock and adding a
reference to the fd while it's being used outside of the mutex
protected region.
A minor change in storage/posix has been done to also access the
inode->fd_list in a safe way.
Backport of:
> Change-Id: I10d469ca6a8f76e950a8c9779ae9c8b70f88ef93
> BUG: 1344340
> Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
> Reviewed-on: http://review.gluster.org/14682
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Change-Id: I10d469ca6a8f76e950a8c9779ae9c8b70f88ef93
BUG: 1346751
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/14734
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 86 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 2 | 
2 files changed, 71 insertions, 17 deletions
| diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index ac70a665094..472abc1cf68 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -1108,6 +1108,7 @@ dht_migration_complete_check_task (void *data)          dht_conf_t         *conf        = NULL;          inode_t            *inode       = NULL;          fd_t               *iter_fd     = NULL; +        fd_t               *tmp         = NULL;          uint64_t            tmp_miginfo = 0;          dht_migrate_info_t *miginfo     = NULL;          int                 open_failed = 0; @@ -1230,27 +1231,46 @@ dht_migration_complete_check_task (void *data)                  goto out;          } +        /* perform 'open()' on all the fd's present on the inode */ +        if (tmp_loc.path == NULL) { +                inode_path (inode, NULL, &path); +                if (path) +                        tmp_loc.path = path; +        } + +        LOCK(&inode->lock); +          if (list_empty (&inode->fd_list)) -                goto out; +                goto unlock;          /* perform open as root:root. There is window between linkfile           * creation(root:root) and setattr with the correct uid/gid           */          SYNCTASK_SETID(0, 0); -        /* perform 'open()' on all the fd's present on the inode */ -        tmp_loc.inode = inode; -        inode_path (inode, NULL, &path); -        if (path) -                tmp_loc.path = path; - -        list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { +        /* It's possible that we are the last user of iter_fd after each +         * iteration. In this case the fd_unref() of iter_fd at the end of +         * the loop will cause the destruction of the fd. So we need to +         * iterate the list safely because iter_fd cannot be trusted. +         */ +        list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {                  if (fd_is_anonymous (iter_fd))                          continue; +                  if (dht_fd_open_on_dst (this, iter_fd, dst_node))                          continue; +                /* We need to release the inode->lock before calling +                 * syncop_open() to avoid possible deadlocks. However this +                 * can cause the iter_fd to be released by other threads. +                 * To avoid this, we take a reference before releasing the +                 * lock. +                 */ +                __fd_ref(iter_fd); + +                UNLOCK(&inode->lock); +                  /* flags for open are stripped down to allow following the                   * new location of the file, otherwise we can get EEXIST or                   * truncate the file again as rebalance is moving the data */ @@ -1272,15 +1292,23 @@ dht_migration_complete_check_task (void *data)                  } else {                          dht_fd_ctx_set (this, iter_fd, dst_node);                  } + +                fd_unref(iter_fd); + +                LOCK(&inode->lock);          }          SYNCTASK_SETID (frame->root->uid, frame->root->gid);          if (open_failed) {                  ret = -1; -                goto out; +                goto unlock;          }          ret = 0; + +unlock: +        UNLOCK(&inode->lock); +  out:          loc_wipe (&tmp_loc); @@ -1353,6 +1381,7 @@ dht_rebalance_inprogress_task (void *data)          dht_conf_t   *conf            = NULL;          inode_t      *inode           = NULL;          fd_t         *iter_fd         = NULL; +        fd_t         *tmp             = NULL;          int           open_failed     = 0;          uint64_t      tmp_miginfo     = 0;          dht_migrate_info_t *miginfo   = NULL; @@ -1463,24 +1492,44 @@ dht_rebalance_inprogress_task (void *data)          ret = 0; +        if (tmp_loc.path == NULL) { +                inode_path (inode, NULL, &path); +                if (path) +                        tmp_loc.path = path; +        } + +        LOCK(&inode->lock); +          if (list_empty (&inode->fd_list)) -                goto done; +                goto unlock;          /* perform open as root:root. There is window between linkfile           * creation(root:root) and setattr with the correct uid/gid           */          SYNCTASK_SETID (0, 0); -        inode_path (inode, NULL, &path); -        if (path) -                tmp_loc.path = path; - -        list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { +        /* It's possible that we are the last user of iter_fd after each +         * iteration. In this case the fd_unref() of iter_fd at the end of +         * the loop will cause the destruction of the fd. So we need to +         * iterate the list safely because iter_fd cannot be trusted. +         */ +        list_for_each_entry_safe (iter_fd, tmp, &inode->fd_list, inode_list) {                  if (fd_is_anonymous (iter_fd))                          continue;                  if (dht_fd_open_on_dst (this, iter_fd, dst_node))                          continue; + +                /* We need to release the inode->lock before calling +                 * syncop_open() to avoid possible deadlocks. However this +                 * can cause the iter_fd to be released by other threads. +                 * To avoid this, we take a reference before releasing the +                 * lock. +                 */ +                __fd_ref(iter_fd); + +                UNLOCK(&inode->lock); +                  /* flags for open are stripped down to allow following the                   * new location of the file, otherwise we can get EEXIST or                   * truncate the file again as rebalance is moving the data */ @@ -1503,16 +1552,21 @@ dht_rebalance_inprogress_task (void *data)                          dht_fd_ctx_set (this, iter_fd, dst_node);                  } +                fd_unref(iter_fd); + +                LOCK(&inode->lock);          }          SYNCTASK_SETID (frame->root->uid, frame->root->gid); +unlock: +        UNLOCK(&inode->lock); +          if (open_failed) {                  ret = -1;                  goto out;          } -done:          ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node);          if (ret) {                  gf_msg (this->name, GF_LOG_ERROR, 0, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index c2644116eb1..7290905807b 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -4338,7 +4338,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,  	}          if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { -                if (!list_empty (&loc->inode->fd_list)) { +                if (!fd_list_empty (loc->inode)) {                          ret = dict_set_uint32 (dict, (char *)name, 1);                          if (ret < 0)                                  gf_msg (this->name, GF_LOG_WARNING, 0, | 
