diff options
author | Kaushal M <kaushal@redhat.com> | 2015-01-08 19:24:59 +0530 |
---|---|---|
committer | Krishnan Parthasarathi <kparthas@redhat.com> | 2015-03-16 02:19:14 -0700 |
commit | c7785f78420c94220954eef538ed4698713ebcdb (patch) | |
tree | b10ad0468f21835121262463f517cad58614d49a | |
parent | 7d8be3613f7384f5118f26e194fe7c64ea69d11c (diff) |
glusterd: Protect the peer list and peerinfos with RCU.
The peer list and the peerinfo objects are now protected using RCU.
Design patterns described in the Paul McKenney's RCU dissertation [1]
(sections 5 and 6) have been used to convert existing non-RCU protected
code to RCU protected code.
Currently, we are only targetting guaranteeing the existence of the
peerinfo objects, ie., we are only looking to protect deletes, not all
updaters. We chose this, as protecting all updates is a much more
complex task.
The steps used to accomplish this are,
1. Remove all long lived direct references to peerinfo objects (apart
from the peerinfo list). This includes references in glusterd_peerctx_t
(RPC), glusterd_friend_sm_event_t (friend state machine) and others.
This way no one has a reference to deleted peerinfo object.
2. Replace the direct references with indirect references, ie., use
peer uuid and peer hostname as indirect references to the peerinfo
object. Any reader or updater now uses the indirect references to get to
the actual peerinfo object, using glusterd_peerinfo_find. Cases where a
peerinfo cannot be found are handled gracefully.
3. The readers get and use the peerinfo object only within a RCU read
critical section. This prevents the object from being deleted/freed when
in actual use.
4. The deletion of a peerinfo object is done in a ordered manner
(glusterd_peerinfo_destroy). The object is first removed from the
peerinfo list using an atomic list remove, but the list head is not
reset to allow existing list readers to complete correctly. We wait for
readers to complete, before resetting the list head. This removes the
object from the list completely. After this no new readers can get a
reference to the object, and it can be freed.
This change was developed on the git branch at [2]. This commit is a
combination of the following commits on the development branch.
d7999b9 Protect the glusterd_conf_t->peers_list with RCU.
0da85c4 Synchronize before INITing peerinfo list head after removing
from list.
32ec28a Add missing rcu_read_unlock
8fed0b8 Correctly exit read critical section once peer is found.
63db857 Free peerctx only on rpc destruction
56eff26 Cleanup style issues
e5f38b0 Indirection for events and friend_sm
3c84ac4 In __glusterd_probe_cbk goto unlock only if peer already
exists
141d855 Address review comments on 9695/1
aaeefed Protection during peer updates
6eda33d Revert "Synchronize before INITing peerinfo list head after
removing from list."
f69db96 Remove unneeded line
b43d2ec Address review comments on 9695/4
7781921 Address review comments on 9695/5
eb6467b Add some missing semi-colons
328a47f Remove synchronize_rcu from
glusterd_friend_sm_transition_state
186e429 Run part of glusterd_friend_remove in critical section
55c0a2e Fix gluster (peer status/ pool list) with no peers
93f8dcf Use call_rcu to free peerinfo
c36178c Introduce composite struct, gd_rcu_head
[1]: http://www.rdrop.com/~paulmck/RCU/RCUdissertation.2004.07.14e1.pdf
[2]: https://github.com/kshlm/glusterfs/tree/urcu
Change-Id: Ic1480e59c86d41d25a6a3d159aa3e11fbb3cbc7b
BUG: 1191030
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/9695
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Anand Nekkunti <anekkunt@redhat.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
18 files changed, 801 insertions, 290 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index fcade87fd3e..a3217c35574 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -34,7 +34,7 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \ glusterd-svc-mgmt.h glusterd-shd-svc.h glusterd-nfs-svc.h \ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \ - glusterd-snapd-svc-helper.h \ + glusterd-snapd-svc-helper.h glusterd-rcu.h \ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index eaa05969656..cc97baf6f21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -103,6 +103,9 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, port = GF_DEFAULT_BASE_PORT; ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost)); + + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (uuid, rhost); if (peerinfo == NULL) { @@ -120,10 +123,11 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, if (ret) { gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret); - return ret; + goto out; } - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t); @@ -164,9 +168,13 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, } ret = 0; + if (peerinfo && (0 == peerinfo->connected)) + ret = GLUSTERD_CONNECTION_AWAITED; out: - if (0 != ret) { + rcu_read_unlock (); + + if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) { if (ctx && ctx->hostname) GF_FREE (ctx->hostname); GF_FREE (ctx); @@ -178,11 +186,12 @@ out: } else { free (friend_req->vols.vols_val); } + if (event) + GF_FREE (event->peername); GF_FREE (event); - } else { - if (peerinfo && (0 == peerinfo->connected)) - ret = GLUSTERD_CONNECTION_AWAITED; } + + return ret; } @@ -198,6 +207,8 @@ glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, if (!port) port = GF_DEFAULT_BASE_PORT; + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (uuid, hostname); if (peerinfo == NULL) { @@ -214,10 +225,11 @@ glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, if (ret) { gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret); - return ret; + goto out; } - event->peerinfo = peerinfo; + event->peername = gf_strdup (hostname); + uuid_copy (event->peerid, uuid); ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t); @@ -245,10 +257,15 @@ glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, ret = 0; out: + rcu_read_unlock (); + if (0 != ret) { if (ctx && ctx->hostname) GF_FREE (ctx->hostname); GF_FREE (ctx); + if (event) + GF_FREE (event->peername); + GF_FREE (event); } return ret; @@ -698,7 +715,10 @@ __glusterd_handle_cluster_lock (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_DEBUG, "Received LOCK from uuid: %s", uuid_utoa (lock_req.uuid)); - if (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find_by_uuid (lock_req.uuid) == NULL); + rcu_read_unlock (); + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s doesn't " "belong to the cluster. Ignoring request.", uuid_utoa (lock_req.uuid)); @@ -846,7 +866,10 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); - if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL); + rcu_read_unlock (); + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s doesn't " "belong to the cluster. Ignoring request.", uuid_utoa (op_req.uuid)); @@ -922,7 +945,10 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) goto out; } - if (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find_by_uuid (op_req.uuid) == NULL); + rcu_read_unlock (); + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s doesn't " "belong to the cluster. Ignoring request.", uuid_utoa (op_req.uuid)); @@ -1037,14 +1063,22 @@ __glusterd_handle_cli_probe (rpcsvc_request_t *req) goto out; } + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find_by_hostname (hostname); - if (peerinfo && gd_peer_has_address (peerinfo, hostname)) { + ret = (peerinfo && gd_peer_has_address (peerinfo, hostname)); + + rcu_read_unlock (); + + if (ret) { gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d " "already a peer", hostname, port); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL, hostname, port, dict); + ret = 0; goto out; } + ret = glusterd_probe_begin (req, hostname, port, dict, &op_errno); if (ret == GLUSTERD_CONNECTION_AWAITED) { @@ -1931,27 +1965,32 @@ __glusterd_handle_fsm_log (rpcsvc_request_t *req) goto out; } + dict = dict_new (); + if (!dict) { + ret = -1; + goto out; + } + if (strcmp ("", cli_req.name) == 0) { this = THIS; conf = this->private; - log = &conf->op_sm_log; + ret = glusterd_sm_tr_log_add_to_dict (dict, &conf->op_sm_log); } else { + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find_by_hostname (cli_req.name); if (!peerinfo) { + ret = -1; snprintf (msg, sizeof (msg), "%s is not a peer", cli_req.name); - goto out; + } else { + ret = glusterd_sm_tr_log_add_to_dict + (dict, &peerinfo->sm_log); } - log = &peerinfo->sm_log; - } - dict = dict_new (); - if (!dict) { - ret = -1; - goto out; + rcu_read_unlock (); } - ret = glusterd_sm_tr_log_add_to_dict (dict, log); out: (void)glusterd_fsm_log_send_resp (req, ret, msg, dict); free (cli_req.name);//malloced by xdr @@ -2089,7 +2128,10 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_DEBUG, "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid)); - if (glusterd_peerinfo_find_by_uuid (unlock_req.uuid) == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find_by_uuid (unlock_req.uuid) == NULL); + rcu_read_unlock (); + if (ret) { gf_log (this->name, GF_LOG_WARNING, "%s doesn't " "belong to the cluster. Ignoring request.", uuid_utoa (unlock_req.uuid)); @@ -2370,12 +2412,18 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) goto out; } + ret = 0; + rcu_read_lock (); if (glusterd_peerinfo_find (friend_req.uuid, NULL) == NULL) { ret = -1; + } + rcu_read_unlock (); + if (ret) { gf_log ("", GF_LOG_CRITICAL, "Received friend update request " "from unknown peer %s", uuid_utoa (friend_req.uuid)); goto out; } + gf_log ("glusterd", GF_LOG_INFO, "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid)); @@ -2428,6 +2476,7 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "friend%d", i); + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (uuid, NULL); if (peerinfo == NULL) { /* Create a new peer and add it to the list as there is @@ -2439,7 +2488,7 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_ERROR, "Could not create peerinfo from dict " "for prefix %s", key); - goto out; + goto unlock; } /* As this is a new peer, it should be added as a @@ -2459,9 +2508,12 @@ __glusterd_handle_friend_update (rpcsvc_request_t *req) if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to " "update peer %s", peerinfo->hostname); - goto out; } } +unlock: + rcu_read_unlock (); + if (ret) + break; peerinfo = NULL; i++; @@ -2549,6 +2601,8 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname"); goto out; } + + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (probe_req.uuid, remote_hostname); if ((peerinfo == NULL) && (!cds_list_empty (&conf->peers))) { rsp.op_ret = -1; @@ -2566,6 +2620,7 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) rsp.op_errno = GF_PROBE_ADD_FAILED; } } + rcu_read_unlock (); respond: uuid_copy (rsp.uuid, MY_UUID); @@ -2882,6 +2937,8 @@ glusterd_friend_remove (uuid_t uuid, char *hostname) int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (uuid, hostname); if (peerinfo == NULL) goto out; @@ -2889,6 +2946,11 @@ glusterd_friend_remove (uuid_t uuid, char *hostname) ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid); if (ret) gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed"); + + rcu_read_unlock (); + /* Giving up the critical section here as glusterd_peerinfo_cleanup must + * be called from outside a critical section + */ ret = glusterd_peerinfo_cleanup (peerinfo); out: gf_log ("", GF_LOG_DEBUG, "returning %d", ret); @@ -3008,7 +3070,8 @@ glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, if (args) peerctx->args = *args; - peerctx->peerinfo = peerinfo; + uuid_copy (peerctx->peerid, peerinfo->uuid); + peerctx->peername = gf_strdup (peerinfo->hostname); ret = glusterd_transport_inet_options_build (&options, peerinfo->hostname, @@ -3079,7 +3142,7 @@ glusterd_friend_add (const char *hoststr, int port, * invalid peer name). That would mean we're adding something that had * just been free, and we're likely to crash later. */ - cds_list_add_tail (&(*friend)->uuid_list, &conf->peers); + cds_list_add_tail_rcu (&(*friend)->uuid_list, &conf->peers); //restore needs to first create the list of peers, then create rpcs //to keep track of quorum in race-free manner. In restore for each peer @@ -3132,7 +3195,7 @@ glusterd_friend_add_from_peerinfo (glusterd_peerinfo_t *friend, * invalid peer name). That would mean we're adding something that had * just been free, and we're likely to crash later. */ - cds_list_add_tail (&friend->uuid_list, &conf->peers); + cds_list_add_tail_rcu (&friend->uuid_list, &conf->peers); //restore needs to first create the list of peers, then create rpcs //to keep track of quorum in race-free manner. In restore for each peer @@ -3165,6 +3228,7 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, GF_ASSERT (hoststr); + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (NULL, hoststr); if (peerinfo == NULL) { @@ -3196,7 +3260,9 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC, &event); if (!ret) { - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); + ret = glusterd_friend_sm_inject_event (event); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS, NULL, (char*)hoststr, @@ -3208,6 +3274,7 @@ glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, } out: + rcu_read_unlock (); gf_log ("", GF_LOG_DEBUG, "returning %d", ret); return ret; } @@ -3224,8 +3291,9 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, GF_ASSERT (hoststr); GF_ASSERT (req); - peerinfo = glusterd_peerinfo_find (uuid, hoststr); + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (uuid, hoststr); if (peerinfo == NULL) { ret = -1; gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" @@ -3251,7 +3319,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get new event"); - return ret; + goto out; } ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); @@ -3267,7 +3335,8 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, event->ctx = ctx; - event->peerinfo = peerinfo; + event->peername = gf_strdup (hoststr); + uuid_copy (event->peerid, uuid); ret = glusterd_friend_sm_inject_event (event); @@ -3279,6 +3348,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, peerinfo->detaching = _gf_true; out: + rcu_read_unlock (); return ret; } @@ -3590,15 +3660,23 @@ glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) gf_log ("", GF_LOG_WARNING, "Out of Memory"); goto out; } + + /* Reset ret to 0, needed to prevent failure incase no peers exist */ + ret = 0; + rcu_read_lock (); if (!cds_list_empty (&priv->peers)) { - cds_list_for_each_entry (entry, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { count++; ret = gd_add_peer_detail_to_dict (entry, friends, count); if (ret) - goto out; + goto unlock; } } +unlock: + rcu_read_unlock (); + if (ret) + goto out; if (flags == GF_CLI_LIST_POOL_NODES) { count++; @@ -4417,14 +4495,23 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) { int ret = -1; glusterd_friend_sm_event_t *new_event = NULL; - glusterd_peerinfo_t *peerinfo = peerctx->peerinfo; - rpcsvc_request_t *req = peerctx->args.req; - char *errstr = peerctx->errstr; + glusterd_peerinfo_t *peerinfo = NULL; + rpcsvc_request_t *req = NULL; + char *errstr = NULL; dict_t *dict = NULL; GF_ASSERT (peerctx); - peerinfo = peerctx->peerinfo; + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + gf_log (THIS->name, GF_LOG_DEBUG, "Could not find peer %s(%s). " + "Peer could have been deleted.", peerctx->peername, + uuid_utoa (peerctx->peerid)); + ret = 0; + goto out; + } + req = peerctx->args.req; dict = peerctx->args.dict; errstr = peerctx->errstr; @@ -4443,7 +4530,8 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) peerinfo->hostname, peerinfo->port, dict); - new_event->peerinfo = peerinfo; + new_event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (new_event->peerid, peerinfo->uuid); ret = glusterd_friend_sm_inject_event (new_event); } else { @@ -4453,6 +4541,7 @@ glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) } out: + rcu_read_unlock (); return ret; } @@ -4473,10 +4562,29 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, if (!peerctx) return 0; - peerinfo = peerctx->peerinfo; this = THIS; conf = this->private; + if (RPC_CLNT_DESTROY == event) { + GF_FREE (peerctx->errstr); + GF_FREE (peerctx->peername); + GF_FREE (peerctx); + return 0; + } + + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + /* Peerinfo should be available at this point. Not finding it + * means that something terrible has happened + */ + gf_log (THIS->name, GF_LOG_CRITICAL, "Could not find peer " + "%s(%s)", peerctx->peername, uuid_utoa (peerctx->peerid)); + ret = -1; + goto out; + } + switch (event) { case RPC_CLNT_CONNECT: { @@ -4545,6 +4653,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, peerinfo->connected = 0; break; } + default: gf_log (this->name, GF_LOG_TRACE, "got some other RPC event %d", event); @@ -4553,6 +4662,8 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, } out: + rcu_read_unlock (); + glusterd_friend_sm (); glusterd_op_sm (); if (quorum_action) diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 611e9fbf545..ddb1403b391 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -975,9 +975,11 @@ gd_validate_mgmt_hndsk_req (rpcsvc_request_t *req) if (ret) return _gf_false; - peer = glusterd_peerinfo_find (NULL, hostname); - if (peer == NULL) { - ret = -1; + rcu_read_lock (); + ret = (glusterd_peerinfo_find (NULL, hostname) == NULL); + rcu_read_unlock (); + + if (ret) { gf_log (this->name, GF_LOG_ERROR, "Rejecting management " "handshake request from unknown peer %s", req->trans->peerinfo.identifier); @@ -1504,22 +1506,31 @@ glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) goto out; } - peerinfo = peerctx->peerinfo; + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + goto unlock; + } ctx->hostname = gf_strdup (peerinfo->hostname); ctx->port = peerinfo->port; ctx->req = peerctx->args.req; ctx->dict = peerctx->args.dict; - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); event->ctx = ctx; ret = glusterd_friend_sm_inject_event (event); - if (ret) { + if (ret) gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject " "EVENT_CONNECTED ret = %d", ret); - goto out; - } +unlock: + rcu_read_unlock (); out: gf_log ("", GF_LOG_DEBUG, "returning %d", ret); @@ -1589,7 +1600,15 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, this = THIS; frame = myframe; peerctx = frame->local; - peerinfo = peerctx->peerinfo; + + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_DEBUG, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + ret = -1; + goto out; + } if (-1 == req->rpc_status) { snprintf (msg, sizeof (msg), @@ -1636,20 +1655,22 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, peerctx->args.mode); } - glusterd_friend_sm (); - ret = 0; out: + if (ret != 0 && peerinfo) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + + rcu_read_unlock (); + frame->local = NULL; STACK_DESTROY (frame->root); - if (ret != 0) - rpc_transport_disconnect (peerinfo->rpc->conn.trans); - if (rsp.hndsk.hndsk_val) free (rsp.hndsk.hndsk_val); + glusterd_friend_sm (); + return 0; } @@ -1682,7 +1703,16 @@ __glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, conf = this->private; frame = myframe; peerctx = frame->local; - peerinfo = peerctx->peerinfo; + + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + ret = -1; + gf_log (this->name, GF_LOG_DEBUG, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + goto out; + } if (-1 == req->rpc_status) { ret = -1; @@ -1738,7 +1768,7 @@ __glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, GF_PROTOCOL_DICT_SERIALIZE (this, rsp_dict, (&arg.hndsk.hndsk_val), arg.hndsk.hndsk_len, op_errno, out); - ret = glusterd_submit_request (peerctx->peerinfo->rpc, &arg, frame, + ret = glusterd_submit_request (peerinfo->rpc, &arg, frame, &gd_clnt_mgmt_hndsk_prog, GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this, glusterd_mgmt_hndsk_version_ack_cbk, @@ -1748,9 +1778,12 @@ out: if (ret) { frame->local = NULL; STACK_DESTROY (frame->root); - rpc_transport_disconnect (peerinfo->rpc->conn.trans); + if (peerinfo) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); } + rcu_read_unlock (); + if (rsp.hndsk.hndsk_val) free (rsp.hndsk.hndsk_val); @@ -1779,6 +1812,7 @@ glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) { call_frame_t *frame = NULL; gf_mgmt_hndsk_req req = {{0,},}; + glusterd_peerinfo_t *peerinfo = NULL; int ret = -1; frame = create_frame (this, this->ctx->pool); @@ -1787,12 +1821,23 @@ glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) frame->local = peerctx; - ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + gf_log (THIS->name, GF_LOG_DEBUG, "Could not find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + goto unlock; + } + + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, &gd_clnt_mgmt_hndsk_prog, GD_MGMT_HNDSK_VERSIONS, NULL, this, glusterd_mgmt_hndsk_version_cbk, (xdrproc_t)xdr_gf_mgmt_hndsk_req); ret = 0; +unlock: + rcu_read_unlock (); out: if (ret && frame) STACK_DESTROY (frame->root); @@ -1904,7 +1949,15 @@ __glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, conf = this->private; frame = myframe; peerctx = frame->local; - peerinfo = peerctx->peerinfo; + + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_DEBUG, "Couldn't find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + goto out; + } if (-1 == req->rpc_status) { snprintf (msg, sizeof (msg), @@ -1964,12 +2017,16 @@ __glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, peerctx->args.mode); } - glusterd_friend_sm(); - glusterd_op_sm(); - ret = 0; out: + if (ret != 0 && peerinfo) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + + rcu_read_unlock (); + + glusterd_friend_sm (); + glusterd_op_sm (); /* don't use GF_FREE, buffer was allocated by libc */ if (rsp.prog) { @@ -1985,9 +2042,6 @@ out: frame->local = NULL; STACK_DESTROY (frame->root); - if (ret != 0) - rpc_transport_disconnect (peerinfo->rpc->conn.trans); - return 0; } @@ -2006,6 +2060,7 @@ glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, { call_frame_t *frame = NULL; gf_dump_req req = {0,}; + glusterd_peerinfo_t *peerinfo = NULL; int ret = -1; frame = create_frame (this, this->ctx->pool); @@ -2013,14 +2068,27 @@ glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, goto out; frame->local = peerctx; + if (!peerctx) + goto out; + + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (peerctx->peerid, peerctx->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_DEBUG, "Couldn't find peer %s(%s)", + peerctx->peername, uuid_utoa (peerctx->peerid)); + goto unlock; + } req.gfs_id = 0xcafe; - ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, &glusterd_dump_prog, GF_DUMP_DUMP, NULL, this, glusterd_peer_dump_version_cbk, (xdrproc_t)xdr_gf_dump_req); +unlock: + rcu_read_unlock (); out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 59708cbeb18..a78c80eceb4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1157,23 +1157,23 @@ glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) ret = 0; } } else { - peerinfo = glusterd_peerinfo_find (NULL, hostname); - if (peerinfo == NULL) { - ret = -1; - snprintf (msg, sizeof (msg), "%s, is not a friend", - hostname); - *op_errstr = gf_strdup (msg); - goto out; - } - - if (!peerinfo->connected) { - snprintf (msg, sizeof (msg), "%s, is not connected at " - "the moment", hostname); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (NULL, hostname); + if (peerinfo == NULL) { + ret = -1; + snprintf (msg, sizeof (msg), "%s, is not a friend", + hostname); + *op_errstr = gf_strdup (msg); + + } else if (!peerinfo->connected) { + snprintf (msg, sizeof (msg), "%s, is not connected at " + "the moment", hostname); + *op_errstr = gf_strdup (msg); + ret = -1; + } + rcu_read_unlock (); } out: diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c index 3a145264b79..49fab4cb8b9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c @@ -12,44 +12,21 @@ #include "glusterd-store.h" #include "common-utils.h" -int32_t -glusterd_peerinfo_cleanup (glusterd_peerinfo_t *peerinfo) -{ - GF_ASSERT (peerinfo); - glusterd_peerctx_t *peerctx = NULL; - gf_boolean_t quorum_action = _gf_false; - glusterd_conf_t *priv = THIS->private; - - if (peerinfo->quorum_contrib != QUORUM_NONE) - quorum_action = _gf_true; - if (peerinfo->rpc) { - peerctx = peerinfo->rpc->mydata; - peerinfo->rpc->mydata = NULL; - peerinfo->rpc = glusterd_rpc_clnt_unref (priv, peerinfo->rpc); - peerinfo->rpc = NULL; - if (peerctx) { - GF_FREE (peerctx->errstr); - GF_FREE (peerctx); - } - } - glusterd_peerinfo_destroy (peerinfo); - - if (quorum_action) - glusterd_do_quorum_action (); - return 0; -} - -int32_t -glusterd_peerinfo_destroy (glusterd_peerinfo_t *peerinfo) +void +glusterd_peerinfo_destroy (struct rcu_head *head) { - int32_t ret = -1; + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; glusterd_peer_hostname_t *hostname = NULL; - glusterd_peer_hostname_t *tmp = NULL; + glusterd_peer_hostname_t *tmp = NULL; - if (!peerinfo) - goto out; + /* This works as rcu_head is the first member of gd_rcu_head */ + peerinfo = caa_container_of (head, glusterd_peerinfo_t, head); + + /* Set THIS to the saved this. Needed by some functions below */ + THIS = peerinfo->head.this; - cds_list_del_init (&peerinfo->uuid_list); + CDS_INIT_LIST_HEAD (&peerinfo->uuid_list); ret = glusterd_store_delete_peerinfo (peerinfo); if (ret) { @@ -65,13 +42,44 @@ glusterd_peerinfo_destroy (glusterd_peerinfo_t *peerinfo) } glusterd_sm_tr_log_delete (&peerinfo->sm_log); + pthread_mutex_destroy (&peerinfo->delete_lock); GF_FREE (peerinfo); + peerinfo = NULL; - ret = 0; + return; +} -out: - return ret; +int32_t +glusterd_peerinfo_cleanup (glusterd_peerinfo_t *peerinfo) +{ + GF_ASSERT (peerinfo); + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_conf_t *priv = THIS->private; + + if (pthread_mutex_trylock (&peerinfo->delete_lock)) { + /* Someone else is already deleting the peer, so give up */ + return 0; + } + + uatomic_set (&peerinfo->deleting, _gf_true); + + if (peerinfo->quorum_contrib != QUORUM_NONE) + quorum_action = _gf_true; + if (peerinfo->rpc) { + peerinfo->rpc = glusterd_rpc_clnt_unref (priv, peerinfo->rpc); + peerinfo->rpc = NULL; + } + + cds_list_del_rcu (&peerinfo->uuid_list); + /* Saving THIS, as it is needed by the callback function */ + peerinfo->head.this = THIS; + call_rcu (&peerinfo->head.head, glusterd_peerinfo_destroy); + + if (quorum_action) + glusterd_do_quorum_action (); + return 0; } /* glusterd_peerinfo_find_by_hostname searches for a peer which matches the @@ -166,6 +174,7 @@ glusterd_peerinfo_find_by_uuid (uuid_t uuid) { glusterd_conf_t *priv = NULL; glusterd_peerinfo_t *entry = NULL; + glusterd_peerinfo_t *found = NULL; xlator_t *this = NULL; this = THIS; @@ -178,19 +187,23 @@ glusterd_peerinfo_find_by_uuid (uuid_t uuid) if (uuid_is_null (uuid)) return NULL; - cds_list_for_each_entry (entry, &priv->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { if (!uuid_compare (entry->uuid, uuid)) { gf_log (this->name, GF_LOG_DEBUG, "Friend found... state: %s", glusterd_friend_sm_state_name_get (entry->state.state)); - return entry; + found = entry; /* Probably should be rcu_dereferenced */ + break; } } + rcu_read_unlock (); - gf_log (this->name, GF_LOG_DEBUG, "Friend with uuid: %s, not found", - uuid_utoa (uuid)); - return NULL; + if (!found) + gf_log (this->name, GF_LOG_DEBUG, + "Friend with uuid: %s, not found", uuid_utoa (uuid)); + return found; } /* glusterd_peerinfo_find will search for a peer matching either @uuid or @@ -282,6 +295,8 @@ glusterd_peerinfo_new (glusterd_friend_sm_state_t state, uuid_t *uuid, if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) new_peer->quorum_contrib = QUORUM_WAITING; new_peer->port = port; + + pthread_mutex_init (&new_peer->delete_lock, NULL); out: if (ret && new_peer) { glusterd_peerinfo_cleanup (new_peer); @@ -303,7 +318,8 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) priv= THIS->private; GF_ASSERT (priv); - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { if (!uuid_is_null (skip_uuid) && !uuid_compare (skip_uuid, peerinfo->uuid)) @@ -315,6 +331,8 @@ glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) break; } } + rcu_read_unlock (); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %s", (ret?"TRUE":"FALSE")); return ret; @@ -336,14 +354,16 @@ glusterd_uuid_to_hostname (uuid_t uuid) if (!uuid_compare (MY_UUID, uuid)) { hostname = gf_strdup ("localhost"); } + rcu_read_lock (); if (!cds_list_empty (&priv->peers)) { - cds_list_for_each_entry (entry, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (entry, &priv->peers, uuid_list) { if (!uuid_compare (entry->uuid, uuid)) { hostname = gf_strdup (entry->hostname); break; } } } + rcu_read_unlock (); return hostname; } @@ -373,7 +393,8 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, if (!uuid_compare (brickinfo->uuid, MY_UUID)) continue; - cds_list_for_each_entry (peerinfo, peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) continue; @@ -385,9 +406,11 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, *down_peerstr = gf_strdup (peerinfo->hostname); gf_log ("", GF_LOG_DEBUG, "Peer %s is down. ", peerinfo->hostname); + rcu_read_unlock (); goto out; } } + rcu_read_unlock (); } ret = _gf_true; @@ -479,7 +502,7 @@ gd_add_address_to_peer (glusterd_peerinfo_t *peerinfo, const char *address) if (ret) goto out; - cds_list_add_tail (&hostname->hostname_list, &peerinfo->hostnames); + cds_list_add_tail_rcu (&hostname->hostname_list, &peerinfo->hostnames); ret = 0; out: @@ -584,6 +607,7 @@ gd_peerinfo_find_from_hostname (const char *hoststr) xlator_t *this = NULL; glusterd_conf_t *priv = NULL; glusterd_peerinfo_t *peer = NULL; + glusterd_peerinfo_t *found = NULL; glusterd_peer_hostname_t *tmphost = NULL; this = THIS; @@ -593,19 +617,24 @@ gd_peerinfo_find_from_hostname (const char *hoststr) GF_VALIDATE_OR_GOTO (this->name, (hoststr != NULL), out); - cds_list_for_each_entry (peer, &priv->peers, uuid_list) { - cds_list_for_each_entry (tmphost, &peer->hostnames, - hostname_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peer, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (tmphost, &peer->hostnames, + hostname_list) { if (!strncasecmp (tmphost->hostname, hoststr, 1024)) { gf_log (this->name, GF_LOG_DEBUG, "Friend %s found.. state: %d", tmphost->hostname, peer->state.state); - return peer; + found = peer; /* Probably needs to be + dereferenced*/ + goto unlock; } } } +unlock: + rcu_read_unlock (); out: - return NULL; + return found; } /* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each @@ -624,6 +653,7 @@ gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr) xlator_t *this = NULL; glusterd_conf_t *conf = NULL; glusterd_peerinfo_t *peer = NULL; + glusterd_peerinfo_t *found = NULL; glusterd_peer_hostname_t *address = NULL; int ret = 0; struct addrinfo *paddr = NULL; @@ -636,9 +666,10 @@ gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr) GF_VALIDATE_OR_GOTO (this->name, (addr != NULL), out); - cds_list_for_each_entry (peer, &conf->peers, uuid_list) { - cds_list_for_each_entry (address, &peer->hostnames, - hostname_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peer, &conf->peers, uuid_list) { + cds_list_for_each_entry_rcu (address, &peer->hostnames, + hostname_list) { /* TODO: Cache the resolved addrinfos to improve * performance */ @@ -658,14 +689,20 @@ gd_peerinfo_find_from_addrinfo (const struct addrinfo *addr) for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) { if (gf_compare_sockaddr (addr->ai_addr, tmp->ai_addr)) { - freeaddrinfo (paddr); - return peer; + found = peer; /* (de)referenced? */ + break; } } + + freeaddrinfo (paddr); + if (found) + goto unlock; } } +unlock: + rcu_read_unlock (); out: - return NULL; + return found; } /* gd_update_peerinfo_from_dict will update the hostnames for @peerinfo from diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h index f4039620b28..3a1aee7cd15 100644 --- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h @@ -17,9 +17,6 @@ int32_t glusterd_peerinfo_cleanup (glusterd_peerinfo_t *peerinfo); -int32_t -glusterd_peerinfo_destroy (glusterd_peerinfo_t *peerinfo); - glusterd_peerinfo_t * glusterd_peerinfo_find_by_hostname (const char *hoststr); diff --git a/xlators/mgmt/glusterd/src/glusterd-rcu.h b/xlators/mgmt/glusterd/src/glusterd-rcu.h new file mode 100644 index 00000000000..d4e78753e69 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-rcu.h @@ -0,0 +1,36 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_RCU_H +#define _GLUSTERD_RCU_H + +#include <urcu-bp.h> +#include <urcu/rculist.h> +#include <urcu/compiler.h> +#include <urcu/uatomic.h> +#include <urcu-call-rcu.h> + +#ifdef URCU_0_7 +#include "rculist-extra.h" +#endif + +#include "xlator.h" + +/* gd_rcu_head is a composite struct, composed of struct rcu_head and a this + * pointer, which is used to pass the THIS pointer to call_rcu callbacks. + * + * Use this in place of struct rcu_head when embedding into another struct + */ +typedef struct glusterd_rcu_head_ { + struct rcu_head head; + xlator_t *this; +} gd_rcu_head; + +#endif /* _GLUSTERD_RCU_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index a6e48ca14b8..3098761c2b3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -517,30 +517,33 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, } if (!gf_is_local_addr (host)) { + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (NULL, host); if (peerinfo == NULL) { ret = -1; snprintf (msg, sizeof (msg), "%s, is not a friend", host); *op_errstr = gf_strdup (msg); - goto out; - } - if (!peerinfo->connected) { + } else if (!peerinfo->connected) { snprintf (msg, sizeof (msg), "%s, is not connected at " "the moment", host); *op_errstr = gf_strdup (msg); ret = -1; - goto out; - } - if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { + } else if (GD_FRIEND_STATE_BEFRIENDED != + peerinfo->state.state) { snprintf (msg, sizeof (msg), "%s, is not befriended " "at the moment", host); *op_errstr = gf_strdup (msg); ret = -1; - goto out; } + rcu_read_unlock (); + + if (ret) + goto out; + } else if (priv->op_version >= GD_OP_VERSION_3_6_0) { /* A bricks mount dir is required only by snapshots which were * introduced in gluster-3.6.0 diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 8dd65168bb6..6025a0748a0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -264,9 +264,13 @@ __glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, goto out; } + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname); if (peerinfo == NULL) { - GF_ASSERT (0); + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Could not find peerd %s(%s)", + rsp.hostname, uuid_utoa (rsp.uuid)); + goto unlock; } /* @@ -315,7 +319,9 @@ __glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC, &event); if (!ret) { - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); + ret = glusterd_friend_sm_inject_event (event); } rsp.op_errno = GF_PROBE_FRIEND; @@ -324,7 +330,10 @@ reply: ctx = ((call_frame_t *)myframe)->local; ((call_frame_t *)myframe)->local = NULL; - GF_ASSERT (ctx); + if (!ctx) { + ret = -1; + goto unlock; + } if (ctx->req) { glusterd_xfer_cli_probe_resp (ctx->req, ret, @@ -336,7 +345,7 @@ reply: glusterd_destroy_probe_ctx (ctx); - goto out; + goto unlock; } else if (strncasecmp (rsp.hostname, peerinfo->hostname, 1024)) { gf_log (THIS->name, GF_LOG_INFO, "Host: %s with uuid: %s " @@ -346,7 +355,10 @@ reply: ctx = ((call_frame_t *)myframe)->local; ((call_frame_t *)myframe)->local = NULL; - GF_ASSERT (ctx); + if (!ctx) { + ret = -1; + goto unlock; + } rsp.op_errno = GF_PROBE_FRIEND; if (ctx->req) { @@ -360,8 +372,10 @@ reply: glusterd_destroy_probe_ctx (ctx); (void) glusterd_friend_remove (NULL, rsp.hostname); ret = rsp.op_ret; - goto out; + + goto unlock; } + cont: uuid_copy (peerinfo->uuid, rsp.uuid); @@ -371,25 +385,34 @@ cont: if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get event"); - goto out; + goto unlock; } - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); + event->ctx = ((call_frame_t *)myframe)->local; ((call_frame_t *)myframe)->local = NULL; ret = glusterd_friend_sm_inject_event (event); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - gf_log ("glusterd", GF_LOG_INFO, "Received resp to probe req"); +unlock: + rcu_read_unlock (); + out: free (rsp.hostname);//malloced by xdr GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + + /* Attempt to start the state machine. Needed as no state machine could + * be running at time this RPC reply was recieved + */ + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + return ret; } @@ -437,12 +460,14 @@ __glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, "Received %s from uuid: %s, host: %s, port: %d", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (rsp.uuid, rsp.hostname); if (peerinfo == NULL) { ret = -1; gf_log ("", GF_LOG_ERROR, "received friend add response from" " unknown peer uuid: %s", uuid_utoa (rsp.uuid)); - goto out; + goto unlock; } if (op_ret) @@ -455,25 +480,26 @@ __glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get event"); - goto out; + goto unlock; } - event->peerinfo = peerinfo; + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_friend_update_ctx_t); if (!ev_ctx) { ret = -1; - goto out; + goto unlock; } uuid_copy (ev_ctx->uuid, rsp.uuid); ev_ctx->hostname = gf_strdup (rsp.hostname); + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); event->ctx = ev_ctx; ret = glusterd_friend_sm_inject_event (event); - if (ret) - goto out; - +unlock: + rcu_read_unlock (); out: ctx = ((call_frame_t *)myframe)->local; ((call_frame_t *)myframe)->local = NULL; @@ -548,12 +574,14 @@ __glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); inject: + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (rsp.uuid, ctx->hostname); if (peerinfo == NULL) { //can happen as part of rpc clnt connection cleanup //when the frame timeout happens after 30 minutes ret = -1; - goto respond; + goto unlock; } event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; @@ -563,14 +591,15 @@ inject: if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get event"); - goto respond; + goto unlock; } - event->peerinfo = peerinfo; + event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (event->peerid, peerinfo->uuid); ret = glusterd_friend_sm_inject_event (event); if (ret) - goto respond; + goto unlock; /*friend_sm would be moved on CLNT_DISCONNECT, consequently cleaning up peerinfo. Else, we run the risk of triggering @@ -578,6 +607,8 @@ inject: */ op_ret = 0; +unlock: + rcu_read_unlock (); respond: ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL, @@ -695,8 +726,11 @@ __glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, "Received lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); - peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); - if (peerinfo == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); + rcu_read_unlock (); + + if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "cluster lock response received from unknown peer: %s." "Ignoring response", uuid_utoa (rsp.uuid)); @@ -751,7 +785,6 @@ glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; call_frame_t *frame = NULL; uuid_t *txn_id = NULL; @@ -794,8 +827,11 @@ glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, "Received mgmt_v3 lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); - peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); - if (peerinfo == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); + rcu_read_unlock (); + + if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "mgmt_v3 lock response received " "from unknown peer: %s. Ignoring response", @@ -841,7 +877,6 @@ glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; call_frame_t *frame = NULL; uuid_t *txn_id = NULL; @@ -888,8 +923,11 @@ glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); - peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); - if (peerinfo == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); + rcu_read_unlock (); + + if (ret) { gf_msg (this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, "mgmt_v3 unlock response received " @@ -980,8 +1018,11 @@ __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, "Received unlock %s from uuid: %s", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); - peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); - if (peerinfo == NULL) { + rcu_read_lock (); + ret = (glusterd_peerinfo_find (rsp.uuid, NULL) == NULL); + rcu_read_unlock (); + + if (ret) { gf_msg (this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED, "Unlock response received from unknown peer %s", @@ -1091,6 +1132,7 @@ out: gf_log (this->name, GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); if (peerinfo == NULL) { gf_log (this->name, GF_LOG_CRITICAL, "Stage response received " @@ -1118,6 +1160,8 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } + rcu_read_unlock (); + switch (rsp.op) { case GD_OP_REPLACE_BRICK: glusterd_rb_use_rsp_dict (NULL, dict); @@ -1227,6 +1271,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, gf_log (this->name, GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (rsp.uuid, NULL); if (peerinfo == NULL) { gf_log (this->name, GF_LOG_CRITICAL, "Commit response for " @@ -1250,7 +1295,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } if (!opinfo.op_errstr) { ret = -1; - goto out; + goto unlock; } } else { event_type = GD_OP_EVENT_RCVD_ACC; @@ -1258,44 +1303,44 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, case GD_OP_REPLACE_BRICK: ret = glusterd_rb_use_rsp_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; case GD_OP_SYNC_VOLUME: ret = glusterd_sync_use_rsp_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; case GD_OP_PROFILE_VOLUME: ret = glusterd_profile_volume_use_rsp_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; case GD_OP_GSYNC_SET: ret = glusterd_gsync_use_rsp_dict (NULL, dict, rsp.op_errstr); if (ret) - goto out; + goto unlock; break; case GD_OP_STATUS_VOLUME: ret = glusterd_volume_status_copy_to_op_ctx_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; case GD_OP_REBALANCE: case GD_OP_DEFRAG_BRICK_VOLUME: ret = glusterd_volume_rebalance_use_rsp_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; case GD_OP_HEAL_VOLUME: ret = glusterd_volume_heal_use_rsp_dict (NULL, dict); if (ret) - goto out; + goto unlock; break; @@ -1303,6 +1348,8 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, break; } } +unlock: + rcu_read_unlock (); out: ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); @@ -1397,7 +1444,22 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, GF_ASSERT (priv); - peerinfo = event->peerinfo; + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + rcu_read_unlock (); + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + goto out; + } + + uuid_copy (req.uuid, MY_UUID); + req.hostname = gf_strdup (peerinfo->hostname); + req.port = peerinfo->port; + + rcu_read_unlock (); ret = glusterd_add_volumes_to_export_dict (&peer_data); if (ret) { @@ -1425,10 +1487,6 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, } } - uuid_copy (req.uuid, MY_UUID); - req.hostname = peerinfo->hostname; - req.port = peerinfo->port; - ret = dict_allocate_and_serialize (peer_data, &req.vols.vols_val, &req.vols.vols_len); if (ret) @@ -1442,6 +1500,7 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, out: GF_FREE (req.vols.vols_val); + GF_FREE (req.hostname); if (peer_data) dict_unref (peer_data); @@ -1470,17 +1529,31 @@ glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this, GF_ASSERT (priv); - peerinfo = event->peerinfo; + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + rcu_read_unlock (); + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + goto out; + } uuid_copy (req.uuid, MY_UUID); - req.hostname = peerinfo->hostname; + req.hostname = gf_strdup (peerinfo->hostname); req.port = peerinfo->port; + + rcu_read_unlock (); + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_REMOVE, NULL, this, glusterd_friend_remove_cbk, (xdrproc_t)xdr_gd1_mgmt_friend_req); out: + GF_FREE (req.hostname); + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -1507,6 +1580,8 @@ glusterd_rpc_friend_update (call_frame_t *frame, xlator_t *this, ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo)); if (ret) goto out; + /* Don't want to send the pointer over */ + dict_del (friends, "peerinfo"); ret = dict_allocate_and_serialize (friends, &req.friends.friends_val, &req.friends.friends_len); diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 9d0c49f04c2..db34ef1ddf8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -156,14 +156,19 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) if (ret) goto out; - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { if (!peerinfo->connected || !peerinfo->peer) continue; + /* Setting a direct reference to peerinfo in the dict is okay as + * it is only going to be used within this read critical section + * (in glusterd_rpc_friend_update) + */ ret = dict_set_static_ptr (friends, "peerinfo", peerinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); - goto out; + goto unlock; } proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; @@ -171,6 +176,8 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) ret = proc->fn (NULL, this, friends); } } +unlock: + rcu_read_unlock (); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -213,7 +220,16 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) GF_ASSERT (event); GF_ASSERT (ctx); - peerinfo = event->peerinfo; + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_log (THIS->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + ret = -1; + goto out; + } + ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_PROBE, &new_event); @@ -234,7 +250,8 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) new_ev_ctx->port = peerinfo->port; new_ev_ctx->req = NULL; - new_event->peerinfo = peerinfo; + new_event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (new_event->peerid, peerinfo->uuid); new_event->ctx = new_ev_ctx; ret = glusterd_friend_sm_inject_event (new_event); @@ -245,7 +262,11 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) } out: + rcu_read_unlock (); + if (ret) { + if (new_event) + GF_FREE (new_event->peername); GF_FREE (new_event); if (new_ev_ctx) GF_FREE (new_ev_ctx->hostname); @@ -266,13 +287,21 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) xlator_t *this = NULL; GF_ASSERT (event); - peerinfo = event->peerinfo; this = THIS; conf = this->private; GF_ASSERT (conf); + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + goto out; + } + if (!peerinfo->peer) goto out; proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD]; @@ -286,8 +315,9 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + rcu_read_unlock (); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } @@ -315,10 +345,11 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) GF_ASSERT (conf); + rcu_read_lock (); peerinfo = glusterd_peerinfo_find (NULL, probe_ctx->hostname); if (peerinfo == NULL) { //We should not reach this state ideally - GF_ASSERT (0); + ret = -1; goto out; } @@ -342,6 +373,10 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) if (ret) goto out; + /* The peerinfo reference being set here is going to be used + * only within this critical section, in glusterd_rpc_probe + * (ie. proc->fn). + */ ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); if (ret) { gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); @@ -354,8 +389,9 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) } - out: + rcu_read_unlock (); + if (dict) dict_unref (dict); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -378,13 +414,20 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, glusterd_friend_sm_event_t *new_event = NULL; GF_ASSERT (event); - peerinfo = event->peerinfo; this = THIS; conf = this->private; GF_ASSERT (conf); + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + goto out; + } ctx = event->ctx; if (!peerinfo->connected) { @@ -393,22 +436,19 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, ret = glusterd_friend_sm_new_event (event_type, &new_event); if (!ret) { - new_event->peerinfo = peerinfo; + new_event->peername = peerinfo->hostname; + uuid_copy (new_event->peerid, peerinfo->uuid); ret = glusterd_friend_sm_inject_event (new_event); } else { gf_log ("glusterd", GF_LOG_ERROR, "Unable to get event"); } - if (ctx) + if (ctx) { ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0, NULL, ctx->hostname, ctx->dict); - glusterd_friend_sm (); - glusterd_op_sm (); - - if (ctx) { glusterd_broadcast_friend_delete (ctx->hostname, NULL); glusterd_destroy_probe_ctx (ctx); } @@ -428,6 +468,8 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, } out: + rcu_read_unlock (); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -461,13 +503,22 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) int32_t count = 0; GF_ASSERT (event); - cur_peerinfo = event->peerinfo; this = THIS; priv = this->private; GF_ASSERT (priv); + rcu_read_lock (); + + cur_peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!cur_peerinfo) { + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + ret = -1; + goto out; + } + ev_ctx.op = GD_FRIEND_UPDATE_ADD; friends = dict_new (); @@ -479,7 +530,7 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) if (ret) goto out; - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { if (!glusterd_should_update_peer (peerinfo, cur_peerinfo)) continue; @@ -496,7 +547,7 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) if (ret) goto out; - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { if (!peerinfo->connected || !peerinfo->peer) continue; @@ -518,6 +569,8 @@ glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); out: + rcu_read_unlock (); + if (friends) dict_unref (friends); @@ -574,8 +627,6 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, GF_ASSERT (ctx); ev_ctx = ctx; - peerinfo = event->peerinfo; - GF_ASSERT (peerinfo); priv = THIS->private; GF_ASSERT (priv); @@ -583,19 +634,29 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname, ev_ctx->port); - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND, &new_event); - if (ret) + if (ret) { + rcu_read_unlock (); goto out; + } - new_event->peerinfo = peerinfo; + new_event->peername = gf_strdup (peerinfo->hostname); + uuid_copy (new_event->peerid, peerinfo->uuid); ret = glusterd_friend_sm_inject_event (new_event); - if (ret) + if (ret) { + rcu_read_unlock (); goto out; + } + + new_event = NULL; } + rcu_read_unlock (); + ret = glusterd_peer_detach_cleanup (priv); if (ret) { gf_log (THIS->name, GF_LOG_WARNING, @@ -603,26 +664,46 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, ret = 0; } out: - gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); + if (new_event) + GF_FREE (new_event->peername); + GF_FREE (new_event); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) { - int ret = -1; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + + GF_ASSERT (event); + + rcu_read_lock (); - ret = glusterd_friend_remove_cleanup_vols (event->peerinfo->uuid); + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_log (THIS->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + rcu_read_unlock (); + goto out; + } + ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid); if (ret) gf_msg (THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL, "Volumes cleanup failed"); - ret = glusterd_peerinfo_cleanup (event->peerinfo); + rcu_read_unlock (); + /* Exiting read critical section as glusterd_peerinfo_cleanup calls + * synchronize_rcu before freeing the peerinfo + */ + + ret = glusterd_peerinfo_cleanup (peerinfo); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Cleanup returned: %d", ret); } - +out: return 0; } @@ -654,19 +735,41 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) this = THIS; GF_ASSERT (this); + GF_ASSERT (ctx); ev_ctx = ctx; uuid_copy (uuid, ev_ctx->uuid); - peerinfo = event->peerinfo; - GF_ASSERT (peerinfo); + + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (event->peerid, event->peername); + if (!peerinfo) { + gf_log (this->name, GF_LOG_ERROR, "Could not find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + ret = -1; + rcu_read_unlock (); + goto out; + } + + /* TODO: How do you do an atomic copy of uuid_t */ + /* TODO: Updating within a read-critical section is also invalid + * Update properly with updater synchronization + */ uuid_copy (peerinfo->uuid, ev_ctx->uuid); + rcu_read_unlock (); + conf = this->private; GF_ASSERT (conf); + /* Passing the peername from the event. glusterd_compare_friend_data + * updates volumes and will use synchronize_rcu. If we were to pass + * peerinfo->hostname, we would have to do it under a read critical + * section which would lead to a deadlock + */ + //Build comparison logic here. ret = glusterd_compare_friend_data (ev_ctx->vols, &status, - peerinfo->hostname); + event->peername); if (ret) goto out; @@ -693,8 +796,15 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) op_ret = -1; } + /* glusterd_compare_friend_snapshots and functions only require + * a peers hostname and uuid. It also does updates, which + * require use of synchronize_rcu. So we pass the hostname and + * id from the event instead of the peerinfo object to prevent + * deadlocks as above. + */ ret = glusterd_compare_friend_snapshots (ev_ctx->vols, - peerinfo); + event->peername, + event->peerid); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Conflict in comparing peer's snapshots"); @@ -710,7 +820,8 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) gf_log ("", GF_LOG_ERROR, "Out of Memory"); } - new_event->peerinfo = peerinfo; + new_event->peername = gf_strdup (event->peername); + uuid_copy (new_event->peerid, event->peerid); new_ev_ctx = GF_CALLOC (1, sizeof (*new_ev_ctx), gf_gld_mt_friend_update_ctx_t); @@ -726,33 +837,49 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) new_event->ctx = new_ev_ctx; glusterd_friend_sm_inject_event (new_event); + new_event = NULL; ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname, - peerinfo->hostname, ev_ctx->port, + event->peername, ev_ctx->port, op_ret, op_errno); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (new_event) + GF_FREE (new_event->peername); + GF_FREE (new_event); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int -glusterd_friend_sm_transition_state (glusterd_peerinfo_t *peerinfo, +glusterd_friend_sm_transition_state (uuid_t peerid, char *peername, glusterd_sm_t *state, glusterd_friend_sm_event_type_t event_type) { + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; GF_ASSERT (state); - GF_ASSERT (peerinfo); + GF_ASSERT (peername); + + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (peerid, peername); + if (!peerinfo) { + goto out; + } (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log, peerinfo->state.state, state[event_type].next_state, event_type); - peerinfo->state.state = state[event_type].next_state; - return 0; + uatomic_set (&peerinfo->state.state, state[event_type].next_state); + + ret = 0; +out: + rcu_read_unlock (); + return ret; } @@ -1049,13 +1176,18 @@ glusterd_friend_sm () cds_list_del_init (&event->list); event_type = event->event; - peerinfo = event->peerinfo; + + rcu_read_lock (); + + peerinfo = glusterd_peerinfo_find (event->peerid, + event->peername); if (!peerinfo) { gf_log ("glusterd", GF_LOG_CRITICAL, "Received" " event %s with empty peer info", glusterd_friend_sm_event_name_get (event_type)); GF_FREE (event); + rcu_read_unlock (); continue; } gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'", @@ -1063,7 +1195,17 @@ glusterd_friend_sm () old_state = peerinfo->state.state; - state = glusterd_friend_state_table[peerinfo->state.state]; + + rcu_read_unlock (); + /* Giving up read-critical section here as we only need + * the current state to call the handler. + * + * We cannot continue into the handler in a read + * critical section as there are handlers who do + * updates, and could cause deadlocks. + */ + + state = glusterd_friend_state_table[old_state]; GF_ASSERT (state); @@ -1091,18 +1233,40 @@ glusterd_friend_sm () continue; } - ret = glusterd_friend_sm_transition_state (peerinfo, - state, event_type); + ret = glusterd_friend_sm_transition_state + (event->peerid, event->peername, state, + event_type); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to transition" " state from '%s' to '%s' for event '%s'", - glusterd_friend_sm_state_name_get(peerinfo->state.state), + glusterd_friend_sm_state_name_get(old_state), glusterd_friend_sm_state_name_get(state[event_type].next_state), glusterd_friend_sm_event_name_get(event_type)); goto out; } + peerinfo = NULL; + /* We need to obtain peerinfo reference once again as we + * had exited the read critical section above. + */ + rcu_read_lock (); + peerinfo = glusterd_peerinfo_find (event->peerid, + event->peername); + if (!peerinfo) { + rcu_read_unlock (); + /* A peer can only be deleted as a effect of + * this state machine, and two such state + * machines can never run at the same time. + * So if we cannot find the peerinfo here, + * something has gone terribly wrong. + */ + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, + "Cannot find peer %s(%s)", + event->peername, uuid_utoa (event->peerid)); + goto out; + } if (gd_does_peer_affect_quorum (old_state, event_type, peerinfo)) { peerinfo->quorum_contrib = QUORUM_UP; @@ -1113,6 +1277,7 @@ glusterd_friend_sm () } ret = glusterd_store_peerinfo (peerinfo); + rcu_read_unlock (); glusterd_destroy_friend_event_context (event); GF_FREE (event); diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index 8dc6146baf2..e2f12038cd6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -29,7 +29,7 @@ #include "rpcsvc.h" #include "store.h" -#include <urcu/rculist.h> +#include "glusterd-rcu.h" typedef enum gd_quorum_contribution_ { QUORUM_NONE, @@ -101,6 +101,10 @@ struct glusterd_peerinfo_ { gd_quorum_contrib_t quorum_contrib; gf_boolean_t locked; gf_boolean_t detaching; + /* Members required for proper cleanup using RCU */ + gd_rcu_head head; + pthread_mutex_t delete_lock; + gf_boolean_t deleting; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; @@ -124,7 +128,8 @@ typedef struct glusterd_peer_ctx_args_ { typedef struct glusterd_peer_ctx_ { glusterd_peerctx_args_t args; - glusterd_peerinfo_t *peerinfo; + uuid_t peerid; + char *peername; char *errstr; } glusterd_peerctx_t; @@ -153,10 +158,11 @@ typedef enum glusterd_friend_update_op_ { struct glusterd_friend_sm_event_ { - struct cds_list_head list; - glusterd_peerinfo_t *peerinfo; - void *ctx; - glusterd_friend_sm_event_type_t event; + struct cds_list_head list; + uuid_t peerid; + char *peername; + void *ctx; + glusterd_friend_sm_event_type_t event; }; typedef struct glusterd_friend_sm_event_ glusterd_friend_sm_event_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c index 077d48852a1..97935b6a975 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c @@ -1216,8 +1216,7 @@ out: * or restore for the given snap_id */ gf_boolean_t -glusterd_peer_has_missed_snap_delete (glusterd_peerinfo_t *peerinfo, - char *peer_snap_id) +glusterd_peer_has_missed_snap_delete (uuid_t peerid, char *peer_snap_id) { char *peer_uuid = NULL; gf_boolean_t missed_delete = _gf_false; @@ -1230,10 +1229,9 @@ glusterd_peer_has_missed_snap_delete (glusterd_peerinfo_t *peerinfo, GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - GF_ASSERT (peerinfo); GF_ASSERT (peer_snap_id); - peer_uuid = uuid_utoa (peerinfo->uuid); + peer_uuid = uuid_utoa (peerid); cds_list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, missed_snaps) { @@ -1526,7 +1524,7 @@ out: */ int32_t glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, - glusterd_peerinfo_t *peerinfo) + char *peername, uuid_t peerid) { char buf[NAME_MAX] = ""; char prefix[NAME_MAX] = ""; @@ -1544,7 +1542,7 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, this = THIS; GF_ASSERT (this); GF_ASSERT (peer_data); - GF_ASSERT (peerinfo); + GF_ASSERT (peername); snprintf (prefix, sizeof(prefix), "snap%d", snap_count); @@ -1553,8 +1551,7 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, ret = dict_get_str (peer_data, buf, &peer_snap_name); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to fetch snapname from peer: %s", - peerinfo->hostname); + "Unable to fetch snapname from peer: %s", peername); goto out; } @@ -1563,20 +1560,19 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, ret = dict_get_str (peer_data, buf, &peer_snap_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to fetch snap_id from peer: %s", - peerinfo->hostname); + "Unable to fetch snap_id from peer: %s", peername); goto out; } /* Check if the peer has missed a snap delete or restore * resulting in stale data for the snap in question */ - missed_delete = glusterd_peer_has_missed_snap_delete (peerinfo, + missed_delete = glusterd_peer_has_missed_snap_delete (peerid, peer_snap_id); if (missed_delete == _gf_true) { /* Peer has missed delete on the missing/conflicting snap_id */ gf_log (this->name, GF_LOG_INFO, "Peer %s has missed a delete " - "on snap %s", peerinfo->hostname, peer_snap_name); + "on snap %s", peername, peer_snap_name); ret = 0; goto out; } @@ -1585,8 +1581,7 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, * peer data is already present */ glusterd_is_peer_snap_conflicting (peer_snap_name, peer_snap_id, - &conflict, &snap, - peerinfo->hostname); + &conflict, &snap, peername); if (conflict == _gf_false) { if (snap) { /* Peer has snap with the same snapname @@ -1615,7 +1610,7 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to fetch host_bricks from peer: %s " - "for %s", peerinfo->hostname, peer_snap_name); + "for %s", peername, peer_snap_name); goto out; } @@ -1627,8 +1622,8 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, */ if (is_hosted == is_local) { gf_log (this->name, GF_LOG_ERROR, - "Conflict in snapshot %s with peer %s", - peer_snap_name, peerinfo->hostname); + "Conflict in snapshot %s with peer %s", peer_snap_name, + peername); ret = -1; goto out; } @@ -1676,8 +1671,8 @@ accept_peer_data: peer_snap_name, peer_snap_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Failed to import snap %s from peer %s", - peer_snap_name, peerinfo->hostname); + "Failed to import snap %s from peer %s", peer_snap_name, + peername); goto out; } @@ -1693,8 +1688,8 @@ out: * the current node */ int32_t -glusterd_compare_friend_snapshots (dict_t *peer_data, - glusterd_peerinfo_t *peerinfo) +glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername, + uuid_t peerid) { int32_t ret = -1; int32_t snap_count = 0; @@ -1704,7 +1699,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, this = THIS; GF_ASSERT (this); GF_ASSERT (peer_data); - GF_ASSERT (peerinfo); + GF_ASSERT (peername); ret = dict_get_int32 (peer_data, "snap_count", &snap_count); if (ret) { @@ -1714,11 +1709,12 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, for (i = 1; i <= snap_count; i++) { /* Compare one snapshot from peer_data at a time */ - ret = glusterd_compare_and_update_snap (peer_data, i, peerinfo); + ret = glusterd_compare_and_update_snap (peer_data, i, peername, + peerid); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to compare snapshots with peer %s", - peerinfo->hostname); + peername); goto out; } } diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h index 3f50a061b18..d152d8ca6a5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h @@ -74,8 +74,8 @@ int32_t glusterd_add_snapshots_to_export_dict (dict_t *peer_data); int32_t -glusterd_compare_friend_snapshots (dict_t *peer_data, - glusterd_peerinfo_t *peerinfo); +glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername, + uuid_t peerid); int32_t glusterd_store_create_snap_dir (glusterd_snap_t *snap); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index f3d20f3e1e9..93e695c85f5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -169,7 +169,8 @@ glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, continue; } - cds_list_for_each_entry (peerinfo, peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { /* If the brick doesnt belong to this peer */ continue; @@ -192,10 +193,12 @@ glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, "info for %s:%s in the " "rsp_dict", brickinfo->hostname, brickinfo->path); + rcu_read_unlock (); goto out; } } } + rcu_read_unlock (); brick_count++; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 047ff942cec..5b2b14503ae 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -4096,11 +4096,14 @@ glusterd_store_retrieve_peers (xlator_t *this) } args.mode = GD_MODE_ON; - cds_list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { ret = glusterd_friend_rpc_create (this, peerinfo, &args); if (ret) - goto out; + break; } + rcu_read_unlock (); peerinfo = NULL; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 2dc1b7c282c..f87a5787860 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -1075,7 +1075,8 @@ gd_build_peers_list (struct cds_list_head *peers, GF_ASSERT (peers); GF_ASSERT (xact_peers); - cds_list_for_each_entry (peerinfo, peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { if (!peerinfo->connected) continue; if (op != GD_OP_SYNC_VOLUME && @@ -1085,6 +1086,8 @@ gd_build_peers_list (struct cds_list_head *peers, cds_list_add_tail (&peerinfo->op_peers_list, xact_peers); npeers++; } + rcu_read_unlock (); + return npeers; } @@ -1100,7 +1103,8 @@ gd_build_local_xaction_peers_list (struct cds_list_head *peers, GF_ASSERT (peers); GF_ASSERT (xact_peers); - cds_list_for_each_entry (peerinfo, peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { if (!peerinfo->connected) continue; if (op != GD_OP_SYNC_VOLUME && @@ -1110,13 +1114,17 @@ gd_build_local_xaction_peers_list (struct cds_list_head *peers, local_peers = GF_CALLOC (1, sizeof (*local_peers), gf_gld_mt_local_peers_t); if (!local_peers) { - return -1; + npeers = -1; + goto unlock; } CDS_INIT_LIST_HEAD (&local_peers->op_peers_list); local_peers->peerinfo = peerinfo; cds_list_add_tail (&local_peers->op_peers_list, xact_peers); npeers++; } +unlock: + rcu_read_unlock (); + return npeers; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 18ac27e0fcb..f98c3b5102c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2919,10 +2919,13 @@ glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, *active_count = 1; if (!peer_list) { - cds_list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { glusterd_quorum_count(peerinfo, inquorum_count, active_count, out); } + rcu_read_unlock (); } else { if (_local_xaction_peers) { list_for_each_local_xaction_peers (peerinfo, @@ -8080,13 +8083,16 @@ glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) node_uuid_str = gf_strdup (node_uuid); /* Finding the index of the node-uuid in the peer-list */ - cds_list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { peer_uuid_str = gd_peer_uuid_str (peerinfo); if (strcmp (peer_uuid_str, node_uuid_str) == 0) break; current_index++; } + rcu_read_unlock (); /* Setting the largest index value as the total count. */ ret = dict_get_int32 (ctx_dict, "count", &count); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 5e80bb636aa..98019f81d4c 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -20,12 +20,6 @@ #include <pthread.h> #include <libgen.h> -#include <urcu-bp.h> -#include <urcu/rculist.h> -#ifdef URCU_0_7 -#include "rculist-extra.h" -#endif - #include "uuid.h" #include "rpc-clnt.h" @@ -45,6 +39,7 @@ #include "cli1-xdr.h" #include "syncop.h" #include "store.h" +#include "glusterd-rcu.h" #define GLUSTERD_TR_LOG_SIZE 50 #define GLUSTERD_NAME "glusterd" @@ -629,13 +624,15 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); snprintf (key, sizeof (key), \ "glusterd.xaction_peer"); \ \ - cds_list_for_each_entry (_peerinfo, head, member) { \ + rcu_read_lock (); \ + cds_list_for_each_entry_rcu (_peerinfo, head, member) { \ glusterd_dump_peer (_peerinfo, key, index, xpeers); \ if (!xpeers) \ glusterd_dump_peer_rpcstat (_peerinfo, key, \ index); \ index++; \ } \ + rcu_read_unlock (); \ \ } while (0) |