summaryrefslogtreecommitdiffstats
path: root/rpc/rpc-lib
diff options
context:
space:
mode:
authorRaghavendra G <rgowdapp@redhat.com>2017-02-28 13:13:59 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2017-03-06 11:20:33 -0500
commitfab2c6d574742e6c356d6b364d720540fc354fe8 (patch)
tree3bb4507a5dda544a2883138873d74587037640e3 /rpc/rpc-lib
parent44a8e1d4947a9f701bfd7d83558263d6ec314764 (diff)
rpc/clnt: remove locks while notifying CONNECT/DISCONNECT
Locking during notify was introduced as part of commit aa22f24f5db7659387704998ae01520708869873 [1]. The fix was introduced to fix out-of-order CONNECT/DISCONNECT events from rpc-clnt to parent xlators [2]. However as part of handling DISCONNECT protocol/client does unwind saved frames (with failure) waiting for responses. This saved_frames_unwind can be a costly operation and hence ideally shouldn't be included in the critical section of notifylock, as it unnecessarily delays the reconnection to same brick. Also, its not a good practise to pass control to other xlators holding a lock as it can lead to deadlocks. So, this patch removes locking in rpc-clnt while notifying parent xlators. To fix [2], two changes are present in this patch: * notify DISCONNECT before cleaning up rpc connection (same as commit a6b63e11b7758cf1bfcb6798, patch [3]). * protocol/client uses rpc_clnt_cleanup_and_start, which cleans up rpc connection and does a start while handling a DISCONNECT event from rpc. Note that patch [3] was reverted as rpc_clnt_start called in quick_reconnect path of protocol/client didn't invoke connect on transport as the connection was not cleaned up _yet_ (as cleanup was moved post notification in rpc-clnt). This resulted in clients never attempting connect to bricks. Note that one of the neater ways to fix [2] (without using locks) is to introduce generation numbers to map CONNECT and DISCONNECTS across epochs and ignore DISCONNECT events if they don't belong to current epoch. However, this approach is a bit complex to implement and requires time. So, current patch is a hacky stop-gap fix till we come up with a more cleaner solution. [1] http://review.gluster.org/15916 [2] https://bugzilla.redhat.com/show_bug.cgi?id=1386626 [3] http://review.gluster.org/15681 >Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418 >Signed-off-by: Raghavendra G <rgowdapp@redhat.com> >BUG: 1427012 >Reviewed-on: https://review.gluster.org/16784 >Smoke: Gluster Build System <jenkins@build.gluster.org> >Reviewed-by: Milind Changire <mchangir@redhat.com> >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> >CentOS-regression: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit 773f32caf190af4ee48818279b6e6d3c9f2ecc79) Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418 Signed-off-by: Raghavendra G <rgowdapp@redhat.com> BUG: 1428670 Reviewed-on: https://review.gluster.org/16835 Smoke: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'rpc/rpc-lib')
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c94
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h4
2 files changed, 67 insertions, 31 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index d39b5236b91..3284971b3e9 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -549,6 +549,7 @@ rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
/*reset rpc msgs stats*/
conn->pingcnt = 0;
conn->msgcnt = 0;
+ conn->cleanup_gen++;
}
pthread_mutex_unlock (&conn->lock);
@@ -874,10 +875,29 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
static int
rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
{
- struct timespec ts = {0, };
- gf_boolean_t unref_clnt = _gf_false;
+ struct timespec ts = {0, };
+ gf_boolean_t unref_clnt = _gf_false;
+ uint64_t pre_notify_gen = 0, post_notify_gen = 0;
- rpc_clnt_connection_cleanup (conn);
+ pthread_mutex_lock (&conn->lock);
+ {
+ pre_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (clnt->notifyfn)
+ clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ post_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (pre_notify_gen == post_notify_gen) {
+ /* program didn't invoke cleanup, so rpc has to do it */
+ rpc_clnt_connection_cleanup (conn);
+ }
pthread_mutex_lock (&conn->lock);
{
@@ -897,8 +917,6 @@ rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
}
pthread_mutex_unlock (&conn->lock);
- if (clnt->notifyfn)
- clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
if (unref_clnt)
rpc_clnt_ref (clnt);
@@ -931,11 +949,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
switch (event) {
case RPC_TRANSPORT_DISCONNECT:
{
- pthread_mutex_lock (&clnt->notifylock);
- {
- rpc_clnt_handle_disconnect (clnt, conn);
- }
- pthread_mutex_unlock (&clnt->notifylock);
+ rpc_clnt_handle_disconnect (clnt, conn);
break;
}
@@ -990,21 +1004,19 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
case RPC_TRANSPORT_CONNECT:
{
- pthread_mutex_lock (&clnt->notifylock);
- {
- /* Every time there is a disconnection, processes
- * should try to connect to 'glusterd' (ie, default
- * port) or whichever port given as 'option remote-port'
- * in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- * for just one successful attempt */
- conn->config.remote_port = 0;
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_CONNECT, NULL);
- }
- pthread_mutex_unlock (&clnt->notifylock);
+
+ /* Every time there is a disconnection, processes
+ * should try to connect to 'glusterd' (ie, default
+ * port) or whichever port given as 'option remote-port'
+ * in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ * for just one successful attempt */
+ conn->config.remote_port = 0;
+
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn (clnt, clnt->mydata,
+ RPC_CLNT_CONNECT, NULL);
+
break;
}
@@ -1128,7 +1140,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
}
pthread_mutex_init (&rpc->lock, NULL);
- pthread_mutex_init (&rpc->notifylock, NULL);
rpc->ctx = ctx;
rpc->owner = owner;
@@ -1138,7 +1149,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
if (rpc->reqpool == NULL) {
pthread_mutex_destroy (&rpc->lock);
- pthread_mutex_destroy (&rpc->notifylock);
GF_FREE (rpc);
rpc = NULL;
goto out;
@@ -1148,7 +1158,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
reqpool_size);
if (rpc->saved_frames_pool == NULL) {
pthread_mutex_destroy (&rpc->lock);
- pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
GF_FREE (rpc);
rpc = NULL;
@@ -1158,7 +1167,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
ret = rpc_clnt_connection_init (rpc, ctx, options, name);
if (ret == -1) {
pthread_mutex_destroy (&rpc->lock);
- pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
mem_pool_destroy (rpc->saved_frames_pool);
GF_FREE (rpc);
@@ -1204,6 +1212,33 @@ rpc_clnt_start (struct rpc_clnt *rpc)
int
+rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc)
+{
+ struct rpc_clnt_connection *conn = NULL;
+
+ if (!rpc)
+ return -1;
+
+ conn = &rpc->conn;
+
+ rpc_clnt_connection_cleanup (conn);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ rpc->disabled = 0;
+ }
+ pthread_mutex_unlock (&conn->lock);
+ /* Corresponding unref will be either on successful timer cancel or last
+ * rpc_clnt_reconnect fire event.
+ */
+ rpc_clnt_ref (rpc);
+ rpc_clnt_reconnect (conn);
+
+ return 0;
+}
+
+
+int
rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
void *mydata)
{
@@ -1754,7 +1789,6 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
saved_frames_destroy (rpc->conn.saved_frames);
pthread_mutex_destroy (&rpc->lock);
pthread_mutex_destroy (&rpc->conn.lock);
- pthread_mutex_destroy (&rpc->notifylock);
/* mem-pool should be destroyed, otherwise,
it will cause huge memory leaks */
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index 4d66498a0aa..b731ba2dfad 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -149,6 +149,7 @@ struct rpc_clnt_connection {
int32_t ping_timeout;
uint64_t pingcnt;
uint64_t msgcnt;
+ uint64_t cleanup_gen;
};
typedef struct rpc_clnt_connection rpc_clnt_connection_t;
@@ -171,7 +172,6 @@ struct rpc_req {
typedef struct rpc_clnt {
pthread_mutex_t lock;
- pthread_mutex_t notifylock;
rpc_clnt_notify_t notifyfn;
rpc_clnt_connection_t conn;
void *mydata;
@@ -198,6 +198,8 @@ struct rpc_clnt *rpc_clnt_new (dict_t *options, xlator_t *owner,
int rpc_clnt_start (struct rpc_clnt *rpc);
+int rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc);
+
int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
void *mydata);