diff options
| author | Xavier Hernandez <jahernan@redhat.com> | 2018-01-19 12:18:13 +0100 | 
|---|---|---|
| committer | Raghavendra G <rgowdapp@redhat.com> | 2019-05-11 14:25:53 +0000 | 
| commit | 59841f7e1ff0511b04884015441a181a56d07bea (patch) | |
| tree | 7b4f16752014cf0cfc0ba1aad1847a43325e28a9 /rpc/rpc-lib/src/rpc-clnt.c | |
| parent | da4601d536da761ce908a2461a0930857f99f171 (diff) | |
rpc: implement reconnect back-off strategy
When a connection failure happens, gluster tries to reconnect every 3
seconds. In some cases the failure is spurious, so a delay of 3 seconds
could be unnecessarily long.
This patch implements a back-off strategy that tries a reconnect as soon
as 1 tenth of a second. If this fails, the time is doubled until it's
around 3 seconds. After that, the reconnect is attempted every 3 seconds
as before.
Change-Id: Icb3fbe20d618f50cbbb599dce542b4e871c22149
Updates: bz#1193929
Signed-off-by: Xavier Hernandez <xhernandez@redhat.com>
Diffstat (limited to 'rpc/rpc-lib/src/rpc-clnt.c')
| -rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 33 | 
1 files changed, 17 insertions, 16 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 8ef05378351..c1945dfb6ec 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -392,8 +392,16 @@ rpc_clnt_reconnect(void *conn_ptr)          conn->reconnect = 0;          if ((conn->connected == 0) && !clnt->disabled) { -            ts.tv_sec = 3; -            ts.tv_nsec = 0; +            if (conn->reconnect_delay.tv_sec < 3) { +                conn->reconnect_delay.tv_sec *= 2; +                int64_t ns = conn->reconnect_delay.tv_nsec * 2; +                if (ns >= 1000000000ULL) { +                    conn->reconnect_delay.tv_sec++; +                    ns -= 1000000000ULL; +                } +                conn->reconnect_delay.tv_nsec = ns; +            } +            ts = conn->reconnect_delay;              gf_log(conn->name, GF_LOG_TRACE, "attempting reconnect");              (void)rpc_transport_connect(trans, conn->config.remote_port); @@ -838,9 +846,11 @@ rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)      pthread_mutex_lock(&conn->lock);      { +        conn->reconnect_delay.tv_sec = 0; +        conn->reconnect_delay.tv_nsec = 100000000; +          if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { -            ts.tv_sec = 3; -            ts.tv_nsec = 0; +            ts = conn->reconnect_delay;              rpc_clnt_ref(clnt);              conn->reconnect = gf_timer_call_after(clnt->ctx, ts, @@ -1160,6 +1170,8 @@ rpc_clnt_start(struct rpc_clnt *rpc)       * rpc_clnt_reconnect fire event.       */      rpc_clnt_ref(rpc); +    conn->reconnect_delay.tv_sec = 0; +    conn->reconnect_delay.tv_nsec = 50000000;      rpc_clnt_reconnect(conn);      return 0; @@ -1177,18 +1189,7 @@ rpc_clnt_cleanup_and_start(struct rpc_clnt *rpc)      rpc_clnt_connection_cleanup(conn); -    pthread_mutex_lock(&conn->lock); -    { -        rpc->disabled = 0; -    } -    pthread_mutex_unlock(&conn->lock); -    /* Corresponding unref will be either on successful timer cancel or last -     * rpc_clnt_reconnect fire event. -     */ -    rpc_clnt_ref(rpc); -    rpc_clnt_reconnect(conn); - -    return 0; +    return rpc_clnt_start(rpc);  }  int  | 
