diff options
| author | Rajesh Joseph <rjoseph@redhat.com> | 2016-12-03 01:10:51 +0530 | 
|---|---|---|
| committer | Raghavendra Talur <rtalur@redhat.com> | 2016-12-20 09:11:57 -0800 | 
| commit | 63152c4037218ca9b89282547d6fe38399fb111a (patch) | |
| tree | d7283c8849a1bc1610f7775de915c950df70ad71 | |
| parent | a8a2d6d3bb161e86464c9bb72b8847fe3ef7699e (diff) | |
rpc: fix for race between rpc and protocol/client
It is possible that the notification thread which notifies
protocol/client layer about the disconnection is put to sleep
and meanwhile, a fuse thread or a timer thread initiates and
completes reconnection to the brick. The notification thread
is then woken up and protocol/client layer updates its flags
to indicate that network is disconnected. No reconnection is
initiated because reconnection is rpc-lib layer's responsibility
and its flags indicate that connection is connected.
Fix: Serialize connect and disconnect notify
> Credit: Raghavendra Talur <rtalur@redhat.com>
> Reviewed-on: http://review.gluster.org/15916
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
(cherry picked from commit aa22f24f5db7659387704998ae01520708869873)
Change-Id: I8ff5d1a3283b47f5c26848a42016a40bc34ffc1d
BUG: 1388323
Signed-off-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-on: http://review.gluster.org/16026
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Raghavendra Talur <rtalur@redhat.com>
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 98 | ||||
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.h | 1 | 
2 files changed, 59 insertions, 40 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index e8a8ea2ecd9..b868f56bdb3 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -871,6 +871,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);  #define RPC_THIS_RESTORE        (THIS = old_THIS) +static int +rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) +{ +        struct timespec         ts          = {0, }; +        gf_boolean_t            unref_clnt  = _gf_false; + +        rpc_clnt_connection_cleanup (conn); + +        pthread_mutex_lock (&conn->lock); +        { +                if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { +                        ts.tv_sec = 10; +                        ts.tv_nsec = 0; + +                        rpc_clnt_ref (clnt); +                        conn->reconnect = gf_timer_call_after (clnt->ctx, ts, +                                                rpc_clnt_reconnect, conn); +                        if (conn->reconnect == NULL) { +                                gf_log (conn->name, GF_LOG_WARNING, +                                                "Cannot create rpc_clnt_reconnect timer"); +                                unref_clnt = _gf_true; +                        } +                } +        } +        pthread_mutex_unlock (&conn->lock); + +        if (clnt->notifyfn) +                clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL); + +        if (unref_clnt) +                rpc_clnt_ref (clnt); + +        return 0; +} +  int  rpc_clnt_notify (rpc_transport_t *trans, void *mydata,                   rpc_transport_event_t event, void *data, ...) @@ -880,9 +915,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,          int                     ret         = -1;          rpc_request_info_t     *req_info    = NULL;          rpc_transport_pollin_t *pollin      = NULL; -        struct timespec         ts          = {0, };          void                   *clnt_mydata = NULL; -        gf_boolean_t            unref_clnt  = _gf_false;          DECLARE_OLD_THIS;          conn = mydata; @@ -898,35 +931,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,          switch (event) {          case RPC_TRANSPORT_DISCONNECT:          { -                rpc_clnt_connection_cleanup (conn); - -                pthread_mutex_lock (&conn->lock); +                pthread_mutex_lock (&clnt->notifylock);                  { -                        if (!conn->rpc_clnt->disabled -                            && (conn->reconnect == NULL)) { -                                ts.tv_sec = 10; -                                ts.tv_nsec = 0; - -                                rpc_clnt_ref (clnt); -                                conn->reconnect = -                                        gf_timer_call_after (clnt->ctx, ts, -                                                             rpc_clnt_reconnect, -                                                             conn); -                                if (conn->reconnect == NULL) { -                                        gf_log (conn->name, GF_LOG_WARNING, -                                                "Cannot create rpc_clnt_reconnect timer"); -                                        unref_clnt = _gf_true; -                                } -                        } +                        rpc_clnt_handle_disconnect (clnt, conn);                  } -                pthread_mutex_unlock (&conn->lock); - -                if (clnt->notifyfn) -                        ret = clnt->notifyfn (clnt, clnt->mydata, -                                              RPC_CLNT_DISCONNECT, NULL); -                if (unref_clnt) -                        rpc_clnt_ref (clnt); - +                pthread_mutex_unlock (&clnt->notifylock);                  break;          } @@ -981,17 +990,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,          case RPC_TRANSPORT_CONNECT:          { -                /* Every time there is a disconnection, processes -                   should try to connect to 'glusterd' (ie, default -                   port) or whichever port given as 'option remote-port' -                   in volume file. */ -                /* Below code makes sure the (re-)configured port lasts -                   for just one successful attempt */ -                conn->config.remote_port = 0; - -                if (clnt->notifyfn) -                        ret = clnt->notifyfn (clnt, clnt->mydata, -                                              RPC_CLNT_CONNECT, NULL); +                pthread_mutex_lock (&clnt->notifylock); +                { +                        /* Every time there is a disconnection, processes +                         * should try to connect to 'glusterd' (ie, default +                         * port) or whichever port given as 'option remote-port' +                         * in volume file. */ +                        /* Below code makes sure the (re-)configured port lasts +                         * for just one successful attempt */ +                        conn->config.remote_port = 0; + +                        if (clnt->notifyfn) +                                ret = clnt->notifyfn (clnt, clnt->mydata, +                                                RPC_CLNT_CONNECT, NULL); +                } +                pthread_mutex_unlock (&clnt->notifylock);                  break;          } @@ -1115,6 +1128,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,          }          pthread_mutex_init (&rpc->lock, NULL); +        pthread_mutex_init (&rpc->notifylock, NULL);          rpc->ctx = ctx;          rpc->owner = owner; @@ -1124,6 +1138,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,          rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);          if (rpc->reqpool == NULL) {                  pthread_mutex_destroy (&rpc->lock); +                pthread_mutex_destroy (&rpc->notifylock);                  GF_FREE (rpc);                  rpc = NULL;                  goto out; @@ -1133,6 +1148,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,                                                 reqpool_size);          if (rpc->saved_frames_pool == NULL) {                  pthread_mutex_destroy (&rpc->lock); +                pthread_mutex_destroy (&rpc->notifylock);                  mem_pool_destroy (rpc->reqpool);                  GF_FREE (rpc);                  rpc = NULL; @@ -1142,6 +1158,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,          ret = rpc_clnt_connection_init (rpc, ctx, options, name);          if (ret == -1) {                  pthread_mutex_destroy (&rpc->lock); +                pthread_mutex_destroy (&rpc->notifylock);                  mem_pool_destroy (rpc->reqpool);                  mem_pool_destroy (rpc->saved_frames_pool);                  GF_FREE (rpc); @@ -1737,6 +1754,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)          saved_frames_destroy (rpc->conn.saved_frames);          pthread_mutex_destroy (&rpc->lock);          pthread_mutex_destroy (&rpc->conn.lock); +        pthread_mutex_destroy (&rpc->notifylock);          /* mem-pool should be destroyed, otherwise,             it will cause huge memory leaks */ diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index f84b4cbf806..3a5b287cd49 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -172,6 +172,7 @@ struct rpc_req {  typedef struct rpc_clnt {          pthread_mutex_t        lock; +        pthread_mutex_t        notifylock;          rpc_clnt_notify_t      notifyfn;          rpc_clnt_connection_t  conn;          void                  *mydata;  | 
