diff options
author | Krishnan Parthasarathi <kparthas@redhat.com> | 2014-01-21 23:41:07 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2014-03-05 21:26:59 -0800 |
commit | d6c1468b2779b6247e44b75276436021a3469a59 (patch) | |
tree | 08e3e240c4b40f980b2fa375b19592f632bacfbf | |
parent | 9f45d0f6212d6d5c96dafc4aba73d9d12b39c3d6 (diff) |
rpc: transport may be destroyed while rpc isn't
rpc_clnt object is destroyed after the corresponding transport object is
destroyed. But rpc_clnt_reconnect, a timer driven function, refers to
the transport object beyond its 'life'. Instead, using the embedded
connection object prevents use after free problem wrt transport object.
Also, access transport object under conn->lock.
Change-Id: Iae28e8a657d02689963c510114ad7cb7e6764e62
BUG: 962619
Signed-off-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-on: http://review.gluster.org/6751
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.c | 161 | ||||
-rw-r--r-- | rpc/rpc-lib/src/rpc-clnt.h | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 2 |
5 files changed, 106 insertions, 64 deletions
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index 4920edaa3ad..22513b789d6 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -137,6 +137,7 @@ out: static void call_bail (void *data) { + rpc_transport_t *trans = NULL; struct rpc_clnt *clnt = NULL; rpc_clnt_connection_t *conn = NULL; struct timeval current; @@ -147,12 +148,27 @@ call_bail (void *data) char frame_sent[256] = {0,}; struct timespec timeout = {0,}; struct iovec iov = {0,}; + char peerid[UNIX_PATH_MAX] = {0}; GF_VALIDATE_OR_GOTO ("client", data, out); clnt = data; conn = &clnt->conn; + pthread_mutex_lock (&conn->lock); + { + trans = conn->trans; + if (trans) { + strncpy (peerid, conn->trans->peerinfo.identifier, + sizeof (peerid)-1); + + } + } + pthread_mutex_unlock (&conn->lock); + /*rpc_clnt_connection_cleanup will be unwinding all saved frames, + * bailed or otherwise*/ + if (!trans) + goto out; gettimeofday (¤t, NULL); INIT_LIST_HEAD (&list); @@ -172,9 +188,9 @@ call_bail (void *data) (void *) clnt); if (conn->timer == NULL) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "Cannot create bailout timer for %s", - conn->trans->peerinfo.identifier); + peerid); } } @@ -197,7 +213,7 @@ call_bail (void *data) 256 - strlen (frame_sent), ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec); - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "bailing out frame type(%s) op(%s(%d)) xid = 0x%x " "sent = %s. timeout = %d for %s", trav->rpcreq->prog->progname, @@ -205,7 +221,7 @@ call_bail (void *data) trav->rpcreq->prog->procnames[trav->rpcreq->procnum] : "--", trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent, - conn->frame_timeout, conn->trans->peerinfo.identifier); + conn->frame_timeout, peerid); clnt = rpc_clnt_ref (clnt); trav->rpcreq->rpc_status = -1; @@ -357,7 +373,7 @@ saved_frames_unwind (struct saved_frames *saved_frames) if (!trav->rpcreq || !trav->rpcreq->prog) continue; - gf_log_callingfn (trav->rpcreq->conn->trans->name, + gf_log_callingfn (trav->rpcreq->conn->name, GF_LOG_ERROR, "forced unwinding frame type(%s) op(%s(%d)) " "called at %s (xid=0x%x)", @@ -394,7 +410,7 @@ saved_frames_destroy (struct saved_frames *frames) void -rpc_clnt_reconnect (void *trans_ptr) +rpc_clnt_reconnect (void *conn_ptr) { rpc_transport_t *trans = NULL; rpc_clnt_connection_t *conn = NULL; @@ -402,15 +418,16 @@ rpc_clnt_reconnect (void *trans_ptr) int32_t ret = 0; struct rpc_clnt *clnt = NULL; - trans = trans_ptr; - if (!trans || !trans->mydata) - return; - - conn = trans->mydata; + conn = conn_ptr; clnt = conn->rpc_clnt; pthread_mutex_lock (&conn->lock); { + trans = conn->trans; + if (!trans) { + pthread_mutex_unlock (&conn->lock); + return; + } if (conn->reconnect) gf_timer_call_cancel (clnt->ctx, conn->reconnect); @@ -420,16 +437,16 @@ rpc_clnt_reconnect (void *trans_ptr) ts.tv_sec = 3; ts.tv_nsec = 0; - gf_log (trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "attempting reconnect"); ret = rpc_transport_connect (trans, conn->config.remote_port); conn->reconnect = gf_timer_call_after (clnt->ctx, ts, rpc_clnt_reconnect, - trans); + conn); } else { - gf_log (trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "breaking reconnect chain"); } } @@ -457,7 +474,7 @@ rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info) pthread_mutex_unlock (&clnt->conn.lock); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_CRITICAL, + gf_log (clnt->conn.name, GF_LOG_CRITICAL, "cannot lookup the saved " "frame corresponding to xid (%d)", info->xid); goto out; @@ -517,11 +534,9 @@ rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn) clnt = conn->rpc_clnt; - gf_log (conn->trans->name, GF_LOG_TRACE, - "cleaning up state in transport object %p", conn->trans); - pthread_mutex_lock (&conn->lock); { + saved_frames = conn->saved_frames; conn->saved_frames = saved_frames_new (); @@ -651,7 +666,7 @@ rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg, ret = xdr_to_rpc_reply (msgbuf, msglen, &rpcmsg, &progmsg, req->verf.authdata); if (ret != 0) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "RPC reply decoding failed"); goto out; } @@ -662,13 +677,13 @@ rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg, goto out; } - gf_log (conn->trans->name, GF_LOG_TRACE, + gf_log (conn->name, GF_LOG_TRACE, "received rpc message (RPC XID: 0x%x" " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)", saved_frame->rpcreq->xid, saved_frame->rpcreq->prog->progname, saved_frame->rpcreq->prog->progver, - saved_frame->rpcreq->procnum, conn->trans->name); + saved_frame->rpcreq->procnum, conn->name); out: if (ret != 0) { @@ -696,18 +711,18 @@ rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg) clnt = rpc_clnt_ref (clnt); ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg, NULL,NULL); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "RPC call decoding failed"); goto out; } - gf_log (clnt->conn.trans->name, GF_LOG_TRACE, + gf_log (clnt->conn.name, GF_LOG_TRACE, "received rpc message (XID: 0x%lx, " "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) " "from rpc-transport (%s)", rpc_call_xid (&rpcmsg), rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg), rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg), - clnt->conn.trans->name); + clnt->conn.name); procnum = rpc_call_progproc (&rpcmsg); @@ -750,7 +765,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) xid = ntoh32 (*((uint32_t *)pollin->vector[0].iov_base)); saved_frame = lookup_frame (conn, xid); if (saved_frame == NULL) { - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "cannot lookup the saved frame for reply with xid (%u)", xid); goto out; @@ -758,7 +773,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) req = saved_frame->rpcreq; if (req == NULL) { - gf_log (conn->trans->name, GF_LOG_ERROR, + gf_log (conn->name, GF_LOG_ERROR, "no request with frame for xid (%u)", xid); goto out; } @@ -766,7 +781,7 @@ rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin) ret = rpc_clnt_reply_init (conn, pollin, req, saved_frame); if (ret != 0) { req->rpc_status = -1; - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "initialising rpc reply failed"); } @@ -859,7 +874,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, conn->reconnect = gf_timer_call_after (clnt->ctx, ts, rpc_clnt_reconnect, - conn->trans); + conn); } } pthread_mutex_unlock (&conn->lock); @@ -961,10 +976,17 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, { int ret = -1; rpc_clnt_connection_t *conn = NULL; + rpc_transport_t *trans = NULL; conn = &clnt->conn; pthread_mutex_init (&clnt->conn.lock, NULL); + conn->name = gf_strdup (name); + if (!conn->name) { + ret = -1; + goto out; + } + ret = dict_get_int32 (options, "frame-timeout", &conn->frame_timeout); if (ret >= 0) { @@ -975,25 +997,28 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, "defaulting frame-timeout to 30mins"); conn->frame_timeout = 1800; } + conn->rpc_clnt = clnt; - conn->trans = rpc_transport_load (ctx, options, name); - if (!conn->trans) { + trans = rpc_transport_load (ctx, options, name); + if (!trans) { gf_log (name, GF_LOG_WARNING, "loading of new rpc-transport" " failed"); ret = -1; goto out; } + rpc_transport_ref (trans); - rpc_transport_ref (conn->trans); - - conn->rpc_clnt = clnt; + pthread_mutex_lock (&conn->lock); + { + conn->trans = trans; + trans = NULL; + } + pthread_mutex_unlock (&conn->lock); ret = rpc_transport_register_notify (conn->trans, rpc_clnt_notify, conn); if (ret == -1) { gf_log (name, GF_LOG_WARNING, "registering notify failed"); - rpc_clnt_connection_cleanup (conn); - conn = NULL; goto out; } @@ -1001,13 +1026,26 @@ rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx, if (!conn->saved_frames) { gf_log (name, GF_LOG_WARNING, "creation of saved_frames " "failed"); - rpc_clnt_connection_cleanup (conn); + ret = -1; goto out; } ret = 0; out: + if (ret) { + pthread_mutex_lock (&conn->lock); + { + trans = conn->trans; + conn->trans = NULL; + } + pthread_mutex_unlock (&conn->lock); + if (trans) + rpc_transport_unref (trans); + //conn cleanup needs to be done since we might have failed to + // register notification. + rpc_clnt_connection_cleanup (conn); + } return ret; } @@ -1079,7 +1117,7 @@ rpc_clnt_start (struct rpc_clnt *rpc) conn = &rpc->conn; - rpc_clnt_reconnect (conn->trans); + rpc_clnt_reconnect (conn); return 0; } @@ -1234,7 +1272,7 @@ rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver, xid, au, &request, auth_data); if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "cannot build a rpc-request xid (%"PRIu64")", xid); goto out; } @@ -1257,7 +1295,7 @@ rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver, hdrsize); if (!recordhdr.iov_base) { - gf_log (clnt->conn.trans->name, GF_LOG_ERROR, + gf_log (clnt->conn.name, GF_LOG_ERROR, "Failed to build record header"); iobuf_unref (request_iob); request_iob = NULL; @@ -1307,7 +1345,7 @@ rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame, au.lk_owner.lk_owner_len = 4; } - gf_log (clnt->conn.trans->name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" + gf_log (clnt->conn.name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" ", gid: %d, owner: %s", au.pid, au.uid, au.gid, lkowner_utoa (&call_frame->root->lk_owner)); @@ -1317,7 +1355,7 @@ rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame, callid, &au, rpchdr); if (!request_iob) { - gf_log (clnt->conn.trans->name, GF_LOG_WARNING, + gf_log (clnt->conn.name, GF_LOG_WARNING, "cannot build rpc-record"); goto out; } @@ -1353,7 +1391,7 @@ rpcclnt_cbk_program_register (struct rpc_clnt *clnt, pthread_mutex_unlock (&clnt->lock); if (already_registered) { - gf_log_callingfn (clnt->conn.trans->name, GF_LOG_DEBUG, + gf_log_callingfn (clnt->conn.name, GF_LOG_DEBUG, "already registered"); ret = 0; goto out; @@ -1377,14 +1415,14 @@ rpcclnt_cbk_program_register (struct rpc_clnt *clnt, pthread_mutex_unlock (&clnt->lock); ret = 0; - gf_log (clnt->conn.trans->name, GF_LOG_DEBUG, + gf_log (clnt->conn.name, GF_LOG_DEBUG, "New program registered: %s, Num: %d, Ver: %d", program->progname, program->prognum, program->progver); out: if (ret == -1) { - gf_log (clnt->conn.trans->name, GF_LOG_ERROR, + gf_log (clnt->conn.name, GF_LOG_ERROR, "Program registration failed:" " %s, Num: %d, Ver: %d", program->progname, program->prognum, program->progver); @@ -1419,10 +1457,6 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, conn = &rpc->conn; - if (conn->trans == NULL) { - goto out; - } - rpcreq = mem_get (rpc->reqpool); if (rpcreq == NULL) { goto out; @@ -1458,7 +1492,7 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, procnum, proglen, &rpchdr, callid); if (!request_iob) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "cannot build rpc-record"); goto out; } @@ -1487,15 +1521,14 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, conn->config.remote_port); } - ret = rpc_transport_submit_request (rpc->conn.trans, - &req); + ret = rpc_transport_submit_request (conn->trans, &req); if (ret == -1) { - gf_log (conn->trans->name, GF_LOG_WARNING, + gf_log (conn->name, GF_LOG_WARNING, "failed to submit rpc-request " "(XID: 0x%x Program: %s, ProgVers: %d, " "Proc: %d) to rpc-transport (%s)", rpcreq->xid, rpcreq->prog->progname, rpcreq->prog->progver, - rpcreq->procnum, rpc->conn.trans->name); + rpcreq->procnum, conn->name); } if ((ret >= 0) && frame) { @@ -1506,7 +1539,7 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, "(XID: 0x%x Program: %s, ProgVers: %d, " "Proc: %d) to rpc-transport (%s)", rpcreq->xid, rpcreq->prog->progname, rpcreq->prog->progver, - rpcreq->procnum, rpc->conn.trans->name); + rpcreq->procnum, conn->name); } } pthread_mutex_unlock (&conn->lock); @@ -1554,11 +1587,14 @@ rpc_clnt_ref (struct rpc_clnt *rpc) static void rpc_clnt_trigger_destroy (struct rpc_clnt *rpc) { + rpc_clnt_connection_t *conn = NULL; + if (!rpc) return; + conn = &rpc->conn; rpc_clnt_disable (rpc); - rpc_transport_unref (rpc->conn.trans); + rpc_transport_unref (conn->trans); } static void @@ -1627,6 +1663,7 @@ void rpc_clnt_disable (struct rpc_clnt *rpc) { rpc_clnt_connection_t *conn = NULL; + rpc_transport_t *trans = NULL; if (!rpc) { goto out; @@ -1654,11 +1691,15 @@ rpc_clnt_disable (struct rpc_clnt *rpc) conn->ping_timer = NULL; conn->ping_started = 0; } + trans = conn->trans; + conn->trans = NULL; } pthread_mutex_unlock (&conn->lock); - rpc_transport_disconnect (rpc->conn.trans); + if (trans) { + rpc_transport_disconnect (trans); + } out: return; @@ -1670,7 +1711,7 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) { if (config->rpc_timeout) { if (config->rpc_timeout != rpc->conn.config.rpc_timeout) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing timeout to %d (from %d)", config->rpc_timeout, rpc->conn.config.rpc_timeout); @@ -1679,7 +1720,7 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) if (config->remote_port) { if (config->remote_port != rpc->conn.config.remote_port) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing port to %d (from %d)", config->remote_port, rpc->conn.config.remote_port); @@ -1691,13 +1732,13 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config) if (rpc->conn.config.remote_host) { if (strcmp (rpc->conn.config.remote_host, config->remote_host)) - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "changing hostname to %s (from %s)", config->remote_host, rpc->conn.config.remote_host); GF_FREE (rpc->conn.config.remote_host); } else { - gf_log (rpc->conn.trans->name, GF_LOG_INFO, + gf_log (rpc->conn.name, GF_LOG_INFO, "setting hostname to %s", config->remote_host); } diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index 2596c3508c2..bd4820605d8 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -144,6 +144,7 @@ struct rpc_clnt_connection { struct timeval last_sent; struct timeval last_received; int32_t ping_started; + char *name; }; typedef struct rpc_clnt_connection rpc_clnt_connection_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b274e3367da..bdedf4c0446 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -126,7 +126,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, UNLOCK (&defrag->lock); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", - rpc->conn.trans->name); + rpc->conn.name); break; } @@ -161,7 +161,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, GF_FREE (defrag); gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", - rpc->conn.trans->name); + rpc->conn.name); break; } case RPC_CLNT_DESTROY: diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 7c8be42ede2..85b0f757bf4 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1458,7 +1458,7 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m gf_log (this->name, GF_LOG_INFO, "Connected to %s, attached to remote volume '%s'.", - conf->rpc->conn.trans->peerinfo.identifier, + conf->rpc->conn.name, remote_subvol); rpc_clnt_set_connected (&conf->rpc->conn); diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index b0a71d3f9ea..306e555ef40 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2205,7 +2205,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, "will keep trying to connect to " "glusterd until brick's port is " "available", - conf->rpc->conn.trans->peerinfo.identifier); + conf->rpc->conn.name); if (conf->portmap_err_logged) conf->disconnect_err_logged = 1; |