summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilind Changire <mchangir@redhat.com>2017-08-31 11:37:32 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2017-09-07 06:49:51 +0000
commite0335c32de133aafd88b888a0c20f4eb88bb9845 (patch)
treea53957dfdc8ae2338207ce457a002ec1b516465e
parente6aaeda4c3a44f1c459adbd619ee7c6d0151b254 (diff)
rpc: destroy transport after client_t
Problem: 1. Ref counting increment on the client_t object is done in rpcsvc_request_init() which is incorrect. 2. Ref not taken when delegating to grace_time_handler() Solution: 1. Only fop requests which require processing down the graph via stack 'frames' now ref count the request in get_frame_from_request() 2. Take ref on client_t object in server_rpc_notify() but avoid dropping in RPCSVC_EVENT_TRANSPORT_DESRTROY. Drop the ref unconditionally when exiting out of grace_time_handler(). Also, avoid dropping ref on client_t in RPCSVC_EVENT_TRANSPORT_DESTROY when ref mangement as been delegated to grace_time_handler() mainline: > BUG: 1481600 > Reported-by: Raghavendra G <rgowdapp@redhat.com> > Signed-off-by: Milind Changire <mchangir@redhat.com> > Reviewed-on: https://review.gluster.org/17982 > Tested-by: Raghavendra G <rgowdapp@redhat.com> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Smoke: Gluster Build System <jenkins@build.gluster.org> (cherry picked from commit 24b95089a18a6a40e7703cb344e92025d67f3086) Change-Id: Ic16246bebc7ea4490545b26564658f4b081675e4 BUG: 1487033 Reported-by: Raghavendra G <rgowdapp@redhat.com> Signed-off-by: Milind Changire <mchangir@redhat.com> Reviewed-on: https://review.gluster.org/18156 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Tested-by: Raghavendra G <rgowdapp@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c1
-rw-r--r--xlators/protocol/server/src/server-handshake.c17
-rw-r--r--xlators/protocol/server/src/server-helpers.c4
-rw-r--r--xlators/protocol/server/src/server.c73
4 files changed, 70 insertions, 25 deletions
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index fa44bb84e16..68e27ab9e3f 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -411,7 +411,6 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
req->progver = rpc_call_progver (callmsg);
req->procnum = rpc_call_progproc (callmsg);
req->trans = rpc_transport_ref (trans);
- gf_client_ref (req->trans->xl_private);
req->count = msg->count;
req->msg[0] = progmsg;
req->iobref = iobref_ref (msg->iobref);
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
index fa2f61315df..731e1c184ef 100644
--- a/xlators/protocol/server/src/server-handshake.c
+++ b/xlators/protocol/server/src/server-handshake.c
@@ -633,9 +633,24 @@ server_setvolume (rpcsvc_request_t *req)
gf_msg_debug (this->name, 0, "Connected to %s", client->client_uid);
cancelled = server_cancel_grace_timer (this, client);
- if (cancelled)//Do gf_client_put on behalf of grace-timer-handler.
+ if (cancelled) {
+ /* If timer has been successfully cancelled then it means
+ * that the client has reconnected within grace period.
+ * Since we've bumped up the bind count with a gf_client_get()
+ * for this connect attempt, we need to drop the bind count
+ * for earlier connect, since grace timer handler couldn't
+ * drop it since the timer was cancelled.
+ */
gf_client_put (client, NULL);
+ /* We need to drop the ref count for this reconnected client
+ * since one ref was taken before delegating to the grace
+ * timer handler. Since grace timer handler was cancelled,
+ * it couldn't run and drop the ref either.
+ */
+ gf_client_unref (client);
+ }
+
serv_ctx = server_ctx_get (client, client->this);
if (serv_ctx == NULL) {
gf_msg (this->name, GF_LOG_INFO, 0,
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index 381fb6f515e..51eb4919952 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -490,6 +490,10 @@ get_frame_from_request (rpcsvc_request_t *req)
}
}
+ /* Add a ref for this fop */
+ if (client)
+ gf_client_ref (client);
+
frame->root->uid = req->uid;
frame->root->gid = req->gid;
frame->root->pid = req->pid;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index ee900712f79..e47acb28637 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -73,10 +73,8 @@ grace_time_handler (void *data)
if (cancelled) {
/*
- * client must not be destroyed in gf_client_put(),
- * so take a ref.
+ * ref has already been taken in server_rpc_notify()
*/
- gf_client_ref (client);
gf_client_put (client, &detached);
if (detached) /* reconnection did not happen :-( */
@@ -543,13 +541,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
* new transport will be created upon reconnect.
*/
pthread_mutex_lock (&conf->mutex);
+ client = trans->xl_private;
list_del_init (&trans->list);
- rpc_transport_unref (trans);
pthread_mutex_unlock (&conf->mutex);
- client = trans->xl_private;
if (!client)
- break;
+ goto unref_transport;
gf_msg (this->name, GF_LOG_INFO, 0,
PS_MSG_CLIENT_DISCONNECTING, "disconnecting connection"
@@ -582,18 +579,13 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
trans->myinfo.identifier,
auth_path);
}
- gf_client_unref (client);
- break;
+
+ /*
+ * gf_client_unref will be done while handling
+ * RPC_EVENT_TRANSPORT_DESTROY
+ */
+ goto unref_transport;
}
- trans->xl_private = NULL;
- server_connection_cleanup (this, client, INTERNAL_LOCKS);
- gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;"
- "client_identifier=%s;server_identifier=%s;"
- "brick_path=%s",
- client->client_uid,
- trans->peerinfo.identifier,
- trans->myinfo.identifier,
- auth_path);
serv_ctx = server_ctx_get (client, this);
@@ -601,7 +593,7 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
gf_msg (this->name, GF_LOG_INFO, 0,
PS_MSG_SERVER_CTX_GET_FAILED,
"server_ctx_get() failed");
- goto out;
+ goto unref_transport;
}
grace_ts.tv_sec = conf->grace_timeout;
@@ -616,6 +608,12 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
"starting a grace timer for %s",
client->client_uid);
+ /* ref to protect against client destruction
+ * in RPCSVC_EVENT_TRANSPORT_DESTROY while
+ * we are starting a grace timer
+ */
+ gf_client_ref (client);
+
serv_ctx->grace_timer =
gf_timer_call_after (this->ctx,
grace_ts,
@@ -624,13 +622,42 @@ server_rpc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
}
}
UNLOCK (&serv_ctx->fdtable_lock);
+
+ ret = dict_get_str (this->options, "auth-path", &auth_path);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ PS_MSG_DICT_GET_FAILED,
+ "failed to get auth-path");
+ auth_path = NULL;
+ }
+
+ gf_event (EVENT_CLIENT_DISCONNECT, "client_uid=%s;"
+ "client_identifier=%s;server_identifier=%s;"
+ "brick_path=%s",
+ client->client_uid,
+ trans->peerinfo.identifier,
+ trans->myinfo.identifier,
+ auth_path);
+
+unref_transport:
+ /* rpc_transport_unref() causes a RPCSVC_EVENT_TRANSPORT_DESTROY
+ * to be called in blocking manner
+ * So no code should ideally be after this unref
+ */
+ rpc_transport_unref (trans);
+
break;
+
case RPCSVC_EVENT_TRANSPORT_DESTROY:
- /*- conn obj has been disassociated from trans on first
- * disconnect.
- * conn cleanup and destruction is handed over to
- * grace_time_handler or the subsequent handler that 'owns'
- * the conn. Nothing left to be done here. */
+ client = trans->xl_private;
+ if (!client)
+ break;
+
+ /* unref only for if (!client->lk_heal) */
+ if (!conf->lk_heal)
+ gf_client_unref (client);
+
+ trans->xl_private = NULL;
break;
default:
break;