summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorN Balachandran <nbalacha@redhat.com>2017-07-10 09:38:54 +0530
committerRaghavendra Talur <rtalur@redhat.com>2017-07-20 09:33:20 +0000
commit7eff3f644b67fdb7a44174f836ffe1a84b6f479e (patch)
tree384f369528898f75e6ebad609f266b4edfab0f02
parentbabde83eb1e2bb0fa072a2eb8de962c777616875 (diff)
cluster/dht: Fix fd check race
There is a another race between the cached subvol being updated in the inode_ctx and the fd being opened on the target. 1. fop1 -> fd1 -> subvol0 2. file migrated from subvol0 to subvol1 and cached_subvol changed to subvol1 in inode_ctx 3. fop2 -> fd1 -> subvol1 [takes new cached subvol] 4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1 5. fop1 -> checks fd ctx (fd not open on subvol0) -> tries to open fd1 on subvol0 -> fails with "No such file on directory". Fix: If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol and let the phase1/phase2 checks handle it. Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225 BUG: 1467010 > BUG: 1465075 > Signed-off-by: N Balachandran <nbalacha@redhat.com> > Reviewed-on: https://review.gluster.org/17731 > Smoke: Gluster Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> > Reviewed-by: Amar Tumballi <amarts@redhat.com> (cherry picked from commit f7a450c17fee7e43c544473366220887f0534ed7) Signed-off-by: N Balachandran <nbalacha@redhat.com> Reviewed-on: https://review.gluster.org/17829 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
-rw-r--r--xlators/cluster/dht/src/dht-helper.c26
-rw-r--r--xlators/cluster/dht/src/dht-messages.h51
2 files changed, 76 insertions, 1 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 3c392fe04be..c9bf60eb591 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -293,10 +293,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,
glusterfs_fop_t fop = 0;
dht_local_t *local = NULL;
xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
fd_t *fd = NULL;
int op_errno = -1;
local = frame->local;
+ this = frame->this;
fop = local->fop;
subvol = local->cached_subvol;
fd = local->fd;
@@ -385,6 +387,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,
break;
default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_UNKNOWN_FOP,
+ "Unknown FOP on fd (%p) on file %s @ %s",
+ fd, uuid_utoa (fd->inode->gfid),
+ subvol->name);
break;
}
@@ -445,6 +452,11 @@ handle_err:
break;
default:
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_UNKNOWN_FOP,
+ "Unknown FOP on fd (%p) on file %s @ %s",
+ fd, uuid_utoa (fd->inode->gfid),
+ subvol->name);
break;
}
@@ -508,6 +520,20 @@ dht_check_and_open_fd_on_subvol_task (void *data)
" (%p, flags=0%o) on file %s @ %s",
fd, fd->flags, uuid_utoa (fd->inode->gfid),
subvol->name);
+ /* This can happen if the cached subvol was updated in the
+ * inode_ctx and the fd was opened on the new cached suvol
+ * after this fop was wound on the old cached subvol.
+ * As we do not close the fd on the old subvol (a leak)
+ * don't treat ENOENT as an error and allow the phase1/phase2
+ * checks to handle it.
+ */
+
+ if ((-ret != ENOENT) && (-ret != ESTALE)) {
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
+ }
local->op_errno = -ret;
ret = -1;
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index 30b64eb5711..b6184ebe088 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_DHT_BASE GLFS_MSGID_COMP_DHT
-#define GLFS_DHT_NUM_MESSAGES 118
+#define GLFS_DHT_NUM_MESSAGES 125
#define GLFS_MSGID_END (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
/* Messages with message IDs */
@@ -1085,5 +1085,54 @@
*/
#define DHT_MSG_DIR_LOOKUP_FAILED (GLFS_DHT_BASE + 118)
+/*
+ * @messageid 109119
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_INODELK_FAILED (GLFS_DHT_BASE + 119)
+
+/*
+ * @messageid 109120
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCK_FRAME_FAILED (GLFS_DHT_BASE + 120)
+
+/*
+ * @messageid 109121
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED (GLFS_DHT_BASE + 121)
+
+/*
+ * @messageid 109122
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_ENTRYLK_ERROR (GLFS_DHT_BASE + 122)
+
+/*
+ * @messageid 109123
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_INODELK_ERROR (GLFS_DHT_BASE + 123)
+
+/*
+ * @messageid 109124
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOC_FAILED (GLFS_DHT_BASE + 124)
+
+/*
+ * @messageid 109125
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_UNKNOWN_FOP (GLFS_DHT_BASE + 125)
+
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
#endif /* _DHT_MESSAGES_H_ */