diff options
author | N Balachandran <nbalacha@redhat.com> | 2017-07-10 09:38:54 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2017-07-11 05:02:52 +0000 |
commit | f7a450c17fee7e43c544473366220887f0534ed7 (patch) | |
tree | b869cadd7aaba526c588b185ac8ba91a15ff4f85 /xlators | |
parent | a62b16b72b03f2b7c25c24ea9ac5968453a92009 (diff) |
cluster/dht: Fix fd check race
There is a another race between the cached subvol
being updated in the inode_ctx and the fd being opened on
the target.
1. fop1 -> fd1 -> subvol0
2. file migrated from subvol0 to subvol1 and cached_subvol
changed to subvol1 in inode_ctx
3. fop2 -> fd1 -> subvol1 [takes new cached subvol]
4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1
5. fop1 -> checks fd ctx (fd not open on subvol0)
-> tries to open fd1 on subvol0 -> fails with "No such file on directory".
Fix:
If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol
and let the phase1/phase2 checks handle it.
Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225
BUG: 1465075
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://review.gluster.org/17731
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 26 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-messages.h | 9 |
2 files changed, 34 insertions, 1 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index ed84a3b79b6..22c84a24b3d 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -287,10 +287,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, glusterfs_fop_t fop = 0; dht_local_t *local = NULL; xlator_t *subvol = NULL; + xlator_t *this = NULL; fd_t *fd = NULL; int op_errno = -1; local = frame->local; + this = frame->this; fop = local->fop; subvol = local->cached_subvol; fd = local->fd; @@ -379,6 +381,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame, break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -439,6 +446,11 @@ handle_err: break; default: + gf_msg (this->name, GF_LOG_ERROR, 0, + DHT_MSG_UNKNOWN_FOP, + "Unknown FOP on fd (%p) on file %s @ %s", + fd, uuid_utoa (fd->inode->gfid), + subvol->name); break; } @@ -502,6 +514,20 @@ dht_check_and_open_fd_on_subvol_task (void *data) " (%p, flags=0%o) on file %s @ %s", fd, fd->flags, uuid_utoa (fd->inode->gfid), subvol->name); + /* This can happen if the cached subvol was updated in the + * inode_ctx and the fd was opened on the new cached suvol + * after this fop was wound on the old cached subvol. + * As we do not close the fd on the old subvol (a leak) + * don't treat ENOENT as an error and allow the phase1/phase2 + * checks to handle it. + */ + + if ((-ret != ENOENT) && (-ret != ESTALE)) { + local->op_errno = -ret; + ret = -1; + } else { + ret = 0; + } local->op_errno = -ret; ret = -1; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 6c8430b4920..b6184ebe088 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@ */ #define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES 124 +#define GLFS_DHT_NUM_MESSAGES 125 #define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1) /* Messages with message IDs */ @@ -1127,5 +1127,12 @@ */ #define DHT_MSG_LOC_FAILED (GLFS_DHT_BASE + 124) +/* + * @messageid 109125 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125) + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* _DHT_MESSAGES_H_ */ |