diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2017-02-02 16:41:45 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-02-15 07:33:24 -0500 | 
| commit | 5d8951afdc083008ad1d6f930291b36dca86c94f (patch) | |
| tree | 22545b0a1de8664afb0f7ed5d7936396af35c3c3 | |
| parent | 8de5213db8771088ae214d42bcae056e409d7b6a (diff) | |
protocol/client: Fix double free of client fdctx destroy
This patch fixes the race between fd re-open code and fd release code,
both of which free the fd context due to a race in certain variable
checks as explained below:
1. client process (shd in the case of this BZ) sends an opendir to its
children (client xlators) which send the fop to the bricks to get a valid fd.
2. Client xlator loses connection to the brick. fdctx->remotefd is -1
3. Client re-establishes connection. After handshake, it reopens the dir
and sets fdctx->remotefd to a valid fd in client3_3_reopendir_cbk().
4. Meanwhile, shd sends a fd unref after it is done with the opendir.
This triggers a releasedir (since fd->refcount becomes 0).
5. client3_3_releasedir() sees that fdctx-->remotefd is a valid number
(i.e not -1), sets fdctx->released=1 and calls  client_fdctx_destroy()
6. As a continuation of step3, client_reopen_done() is called by
client3_3_reopendir_cbk(), which sees that fdctx->released==1 and
again calls client_fdctx_destroy().
Depending on when step-5 does GF_FREE(fdctx), we may crash at any place in
step-6 in client3_3_reopendir_cbk() when it tries to access
fdctx->{whatever}.
> Reviewed-on: https://review.gluster.org/16521
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
(cherry picked from commit 25fc74f9d1f2b1e7bab76485a99f27abadd10b7b)
Change-Id: Ia50873d11763e084e41d2a1f4d53715438e5e947
BUG: 1422350
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://review.gluster.org/16619
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
| -rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 37 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-rpc-fops.c | 25 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client.h | 5 | 
3 files changed, 33 insertions, 34 deletions
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 6d1f14b2aa7..447366c0deb 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -349,7 +349,7 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov,          clnt_fd_lk_reacquire_failed (this, fdctx, conf); -        fdctx->reopen_done (fdctx, this); +        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          frame->local = NULL;          STACK_DESTROY (frame->root); @@ -382,7 +382,7 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx)   out:          if (ret) {                  clnt_fd_lk_reacquire_failed (this, fdctx, conf); -                fdctx->reopen_done (fdctx, this); +                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          }          return 0;  } @@ -501,7 +501,7 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov,                  }                  pthread_mutex_unlock (&conf->lock); -                fdctx->reopen_done (fdctx, this); +                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          }          ret = 0; @@ -611,7 +611,7 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)          if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {                  gf_msg_debug (this->name, 0,                                "fd lock list is empty"); -                fdctx->reopen_done (fdctx, this); +                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          } else {                  lk_ctx = fdctx->lk_ctx; @@ -627,14 +627,14 @@ out:  }  void -client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)  {          gf_log_callingfn (this->name, GF_LOG_WARNING,                            "This function should never be called");  }  void -client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)  {          clnt_conf_t  *conf    = NULL;          gf_boolean_t destroy  = _gf_false; @@ -643,21 +643,23 @@ client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)          pthread_mutex_lock (&conf->lock);          { +                fdctx->remote_fd = rfd;                  fdctx->reopen_attempts = 0; +                fdctx->reopen_done = client_default_reopen_done;                  if (!fdctx->released)                          list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);                  else                          destroy = _gf_true; -                fdctx->reopen_done = client_default_reopen_done;          }          pthread_mutex_unlock (&conf->lock);          if (destroy)                  client_fdctx_destroy (this, fdctx); +  }  void -client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)  {          clnt_conf_t  *conf    = NULL;          uint64_t     fd_count = 0; @@ -670,7 +672,7 @@ client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)          }          UNLOCK (&conf->rec_lock); -        client_reopen_done (fdctx, this); +        client_reopen_done (fdctx, rfd, this);          if (fd_count == 0) {                  gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_UP_NOTIFY,                          "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); @@ -733,7 +735,6 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,          pthread_mutex_lock (&conf->lock);          { -                fdctx->remote_fd = rsp.fd;                  if (!fdctx->released) {                          if (conf->lk_heal &&                              !client_fd_lk_list_empty (fdctx->lk_ctx, @@ -763,7 +764,7 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,  out:          if (!attempt_lock_recovery) -                fdctx->reopen_done (fdctx, this); +                fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, this);          frame->local = NULL;          STACK_DESTROY (frame->root); @@ -780,14 +781,12 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,          int32_t        ret   = -1;          gfs3_open_rsp  rsp   = {0,};          clnt_local_t  *local = NULL; -        clnt_conf_t   *conf  = NULL;          clnt_fd_ctx_t *fdctx = NULL;          call_frame_t  *frame = NULL;          frame = myframe;          local = frame->local;          fdctx = local->fdctx; -        conf  = frame->this->private;          if (-1 == req->rpc_status) { @@ -823,14 +822,8 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,                  goto out;          } -        pthread_mutex_lock (&conf->lock); -        { -                fdctx->remote_fd = rsp.fd; -        } -        pthread_mutex_unlock (&conf->lock); -  out: -        fdctx->reopen_done (fdctx, frame->this); +        fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, frame->this);          frame->local = NULL;          STACK_DESTROY (frame->root); @@ -891,7 +884,7 @@ out:          if (local)                  client_local_wipe (local); -        fdctx->reopen_done (fdctx, this); +        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          return 0; @@ -955,7 +948,7 @@ out:          if (local)                  client_local_wipe (local); -        fdctx->reopen_done (fdctx, this); +        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);          return 0; diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index c260ac211ef..37d119fc08c 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -3299,6 +3299,7 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this,          clnt_fd_ctx_t       *fdctx       = NULL;          clnt_args_t         *args        = NULL;          int64_t              remote_fd   = -1; +        gf_boolean_t         destroy     = _gf_false;          if (!this || !data)                  goto out; @@ -3317,16 +3318,19 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this,                             reopen_cbk handle releasing                          */ -                        if (remote_fd != -1) +                        if (remote_fd == -1) { +                                fdctx->released = 1; +                        } else {                                  list_del_init (&fdctx->sfd_pos); - -                        fdctx->released = 1; +                                destroy = _gf_true; +                        }                  }          }          pthread_mutex_unlock (&conf->lock); -        if (remote_fd != -1) +        if (destroy)                  client_fdctx_destroy (this, fdctx); +  out:          return 0; @@ -3341,6 +3345,7 @@ client3_3_release (call_frame_t *frame, xlator_t *this,          clnt_fd_ctx_t    *fdctx         = NULL;          clnt_args_t      *args          = NULL;          lk_heal_state_t   lk_heal_state = GF_LK_HEAL_DONE; +        gf_boolean_t      destroy       = _gf_false;          if (!this || !data)                  goto out; @@ -3359,17 +3364,17 @@ client3_3_release (call_frame_t *frame, xlator_t *this,                             in progress. Just mark ->released = 1 and let                             reopen_cbk handle releasing                          */ - -                        if (remote_fd != -1 && -                            lk_heal_state == GF_LK_HEAL_DONE) +                        if (remote_fd == -1) { +                                fdctx->released = 1; +                        } else if (lk_heal_state == GF_LK_HEAL_DONE) {                                  list_del_init (&fdctx->sfd_pos); - -                        fdctx->released = 1; +                                destroy = _gf_true; +                        }                  }          }          pthread_mutex_unlock (&conf->lock); -        if (remote_fd != -1 && lk_heal_state == GF_LK_HEAL_DONE) +        if (destroy)                  client_fdctx_destroy (this, fdctx);  out:          return 0; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index f7453bedc47..c025b9812b7 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -218,7 +218,7 @@ typedef struct _client_fd_ctx {          pthread_mutex_t   mutex;          lk_heal_state_t   lk_heal_state;          uuid_t            gfid; -        void (*reopen_done) (struct _client_fd_ctx*, xlator_t *); +        void (*reopen_done)(struct _client_fd_ctx*, int64_t rfd, xlator_t *);          struct list_head  lock_list;     /* List of all granted locks on this fd */          int32_t           reopen_attempts;  } clnt_fd_ctx_t; @@ -347,7 +347,8 @@ int client_mark_fd_bad (xlator_t *this);  int client_set_lk_version (xlator_t *this);  int client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t use_try_lock); -void client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this); +void client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, +                                 xlator_t *this);  void client_attempt_reopen (fd_t *fd, xlator_t *this);  int client_get_remote_fd (xlator_t *this, fd_t *fd, int flags,                            int64_t *remote_fd);  | 
