diff options
| author | N Balachandran <nbalacha@redhat.com> | 2017-07-10 09:38:54 +0530 | 
|---|---|---|
| committer | Raghavendra Talur <rtalur@redhat.com> | 2017-07-20 09:33:20 +0000 | 
| commit | 7eff3f644b67fdb7a44174f836ffe1a84b6f479e (patch) | |
| tree | 384f369528898f75e6ebad609f266b4edfab0f02 | |
| parent | babde83eb1e2bb0fa072a2eb8de962c777616875 (diff) | |
cluster/dht: Fix fd check race
There is a another race between the cached subvol
being updated in the inode_ctx and the fd being opened on
the target.
1. fop1 -> fd1 -> subvol0
2. file migrated from subvol0 to subvol1 and cached_subvol
   changed to subvol1 in inode_ctx
3. fop2 -> fd1 -> subvol1 [takes new cached subvol]
4. fop2 -> checks fd ctx (fd not open on subvol1) -> opens fd1 on subvol1
5. fop1 -> checks fd ctx (fd not open on subvol0)
   -> tries to open fd1 on subvol0 -> fails with "No such file on directory".
Fix:
If dht_fd_open_on_dst fails with ENOENT or ESTALE, wind to old subvol
and let the phase1/phase2 checks handle it.
Change-Id: I34f8011574a8b72e3bcfe03b0cc4f024b352f225
BUG: 1467010
> BUG: 1465075
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17731
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
(cherry picked from commit f7a450c17fee7e43c544473366220887f0534ed7)
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://review.gluster.org/17829
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
| -rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 26 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-messages.h | 51 | 
2 files changed, 76 insertions, 1 deletions
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 3c392fe04be..c9bf60eb591 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -293,10 +293,12 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,          glusterfs_fop_t     fop     = 0;          dht_local_t        *local   = NULL;          xlator_t           *subvol  = NULL; +        xlator_t           *this  = NULL;          fd_t               *fd      = NULL;          int                 op_errno = -1;          local = frame->local; +        this = frame->this;          fop = local->fop;          subvol = local->cached_subvol;          fd = local->fd; @@ -385,6 +387,11 @@ dht_check_and_open_fd_on_subvol_complete (int ret, call_frame_t *frame,                  break;          default: +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_UNKNOWN_FOP, +                        "Unknown FOP on fd (%p) on file %s @ %s", +                        fd, uuid_utoa (fd->inode->gfid), +                        subvol->name);                  break;          } @@ -445,6 +452,11 @@ handle_err:                  break;          default: +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        DHT_MSG_UNKNOWN_FOP, +                        "Unknown FOP on fd (%p) on file %s @ %s", +                        fd, uuid_utoa (fd->inode->gfid), +                        subvol->name);                  break;          } @@ -508,6 +520,20 @@ dht_check_and_open_fd_on_subvol_task (void *data)                          " (%p, flags=0%o) on file %s @ %s",                          fd, fd->flags, uuid_utoa (fd->inode->gfid),                          subvol->name); +                /* This can happen if the cached subvol was updated in the +                 * inode_ctx and the fd was opened on the new cached suvol +                 * after this fop was wound on the old cached subvol. +                 * As we do not close the fd on the old subvol (a leak) +                 * don't treat ENOENT as an error and allow the phase1/phase2 +                 * checks to handle it. +                 */ + +                if ((-ret != ENOENT) && (-ret != ESTALE)) { +                        local->op_errno = -ret; +                        ret = -1; +                } else { +                        ret = 0; +                }                  local->op_errno = -ret;                  ret = -1; diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h index 30b64eb5711..b6184ebe088 100644 --- a/xlators/cluster/dht/src/dht-messages.h +++ b/xlators/cluster/dht/src/dht-messages.h @@ -40,7 +40,7 @@   */  #define GLFS_DHT_BASE                   GLFS_MSGID_COMP_DHT -#define GLFS_DHT_NUM_MESSAGES           118 +#define GLFS_DHT_NUM_MESSAGES           125  #define GLFS_MSGID_END          (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)  /* Messages with message IDs */ @@ -1085,5 +1085,54 @@   */  #define DHT_MSG_DIR_LOOKUP_FAILED          (GLFS_DHT_BASE + 118) +/* + * @messageid 109119 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_FAILED          (GLFS_DHT_BASE + 119) + +/* + * @messageid 109120 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCK_FRAME_FAILED          (GLFS_DHT_BASE + 120) + +/* + * @messageid 109121 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOCAL_LOCK_INIT_FAILED          (GLFS_DHT_BASE + 121) + +/* + * @messageid 109122 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_ENTRYLK_ERROR          (GLFS_DHT_BASE + 122) + +/* + * @messageid 109123 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_INODELK_ERROR          (GLFS_DHT_BASE + 123) + +/* + * @messageid 109124 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_LOC_FAILED             (GLFS_DHT_BASE + 124) + +/* + * @messageid 109125 + * @diagnosis + * @recommendedaction None + */ +#define DHT_MSG_UNKNOWN_FOP            (GLFS_DHT_BASE + 125) +  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"  #endif /* _DHT_MESSAGES_H_ */  | 
