diff options
author | Kaushal M <kaushal@redhat.com> | 2015-03-26 15:18:54 +0530 |
---|---|---|
committer | Krishnan Parthasarathi <kparthas@redhat.com> | 2015-04-13 06:30:02 +0000 |
commit | 1efa50861b2cee68de9c9b751d9fc5eed08f5e5b (patch) | |
tree | 218ca5aba1eb404ff0d7ac25f20716915768f4ec | |
parent | 7c7bbc027feb4c5b233e3078951e5bb1d9fc4618 (diff) |
glusterd: Replace transaction peers lists
Transaction peer lists were used in GlusterD to peers belonging to a
transaction. This was needed to prevent newly added peers performing
partial transactions, which could be incorrect.
This was accomplished by creating a seperate transaction peers list at
the beginning of every transaction. A transaction peers list referenced
the peerinfo data structures of the peers which were present at the
beginning of the transaction. RCU protection of peerinfos referenced by
the transaction peers list is a hard problem and difficult to do
correctly.
To have proper RCU protection of peerinfos, the transaction peers lists
have been replaced by an alternative method to identify peers that
belong to a transaction. The alternative method is to the global peers
list along with generation numbers to identify peers that should belong
to a transaction.
This change introduces a global peer list generation number, and a
generation number for each peerinfo object. Whenever a peerinfo object
is created, the global generation number is bumped, and the peerinfos
generation number is set to the bumped global generation.
With the above changes, the algorithm to identify peers belonging to a
transaction with RCU protection is as follows,
- At the beginning of a transaction, the current global generation
number is saved
- To identify if a peers belonging to the transaction,
- Start a RCU read critical section
- For each peer in the global peers list,
- If the peers generation number is not greater than the saved
generation number, continue with the action on the peer
- End the RCU read critical section
The above algorithm guarantees that,
- The peer list is not modified when a transaction is iterating through
it
- The transaction actions are only done on peers that were present when
the transaction started
But, as a transaction could iterate over the peers list multiple times,
the algorithm cannot guarantee that same set of peers will be selected
every time. A peer could get deleted between two iterations of the list
within a transaction. This problem existed with transaction peers list
as well, but unlike before now it will not lead to invalid memory access
and potential crashes. This problem will be addressed seprately.
This change was developed on the git branch at [1]. This commit is a
combination of the following commits on the development branch.
52ded5b Add timespec_cmp
44aedd8 Add create timestamp to peerinfo
7bcbea5 Fix some silly mistakes
13e3241 Add start time to opinfo
17a6727 Use timestamp comparisions to identify xaction peers instead
of a xaction peer list
3be05b6 Correct check for peerinfo age
70d5b58 Use read-critical sections for peer list iteration
ba4dbca Use peerinfo timestamp checks in op-sm instead of xaction peer
list
d63f811 Add more peer status checks when iterating peers list in
glusterd-syncop
1998a2a Timestamp based peer list traversal of mgmtv3 xactions
f3c1a42 Remove transaction peer lists
b8b08ee Remove unused labels
32e5f5b Remove 'npeers' usage
a075fb7 Remove 'npeers' from mgmt-v3 framework
12c9df2 Use generation number instead of timestamps.
9723021 Remove timespec_cmp
80ae2c6 Remove timespec.h include
a9479b0 Address review comments on 10147/4
[1]: https://github.com/kshlm/glusterfs/tree/urcu
Change-Id: I9be1033525c0a89276f5b5d83dc2eb061918b97f
BUG: 1205186
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/10147
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
Reviewed-by: Anand Nekkunti <anekkunt@redhat.com>
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
-rw-r--r-- | libglusterfs/src/mem-types.h | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 20 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mgmt.c | 319 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 67 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-peer-utils.c | 13 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.h | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.c | 250 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-syncop.h | 9 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 14 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 3 |
13 files changed, 404 insertions, 319 deletions
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index f4d3974f0b2..fc06d52239b 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -150,7 +150,6 @@ enum gf_common_mem_types_ { gf_common_mt_nfs_exports = 131, gf_common_mt_gf_brick_spec_t = 132, gf_common_mt_gf_timer_entry_t = 133, - gf_common_mt_list_head_t = 134, gf_common_mt_end }; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 469c95c9890..ac69fc8712d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -626,22 +626,6 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); local_locking_done: - txn_op_info.local_xaction_peers = - GF_CALLOC (1, sizeof (struct cds_list_head), - gf_common_mt_list_head_t); - if (!txn_op_info.local_xaction_peers) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "Out of memory"); - goto out; - } - CDS_INIT_LIST_HEAD (txn_op_info.local_xaction_peers); - - /* Maintain xaction_peers on per transaction basis */ - npeers = gd_build_local_xaction_peers_list - (&priv->peers, - txn_op_info.local_xaction_peers, - op); - /* If no volname is given as a part of the command, locks will * not be held, hence sending stage event. */ if (volname || (priv->op_version < GD_OP_VERSION_3_6_0)) @@ -898,8 +882,8 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) "No transaction's opinfo set"); state.state = GD_OP_STATE_LOCKED; - glusterd_txn_opinfo_init (&txn_op_info, &state, - &op_req.op, req_ctx->dict, req); + glusterd_txn_opinfo_init (&txn_op_info, &state, &op_req.op, + req_ctx->dict, req); ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c index a200c8e1230..859690eee65 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -93,8 +93,8 @@ glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req, GF_ASSERT (this); GF_ASSERT (req); - glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, - ctx->dict, req); + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, ctx->dict, + req); ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index 2a34c78b35b..4e4dd047281 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -383,9 +383,9 @@ out: int glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, - char **op_errstr, int npeers, + char **op_errstr, gf_boolean_t *is_acquired, - struct cds_list_head *peers) + uint64_t txn_generation) { char *volname = NULL; glusterd_peerinfo_t *peerinfo = NULL; @@ -394,9 +394,13 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, struct syncargs args = {0}; uuid_t peer_uuid = {0}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (dict); GF_ASSERT (op_errstr); GF_ASSERT (is_acquired); @@ -411,20 +415,36 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict, *is_acquired = _gf_true; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending mgmt_v3 lock req to other nodes in the cluster */ gd_syncargs_init (&args, NULL); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_lock (op, dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.errstr) @@ -633,8 +653,8 @@ out: int glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, - char **op_errstr, int npeers, - struct cds_list_head *peers) + char **op_errstr, + uint64_t txn_generation) { int32_t ret = -1; int32_t peer_cnt = 0; @@ -643,9 +663,13 @@ glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, struct syncargs args = {0}; uuid_t peer_uuid = {0}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (req_dict); GF_ASSERT (op_errstr); @@ -691,20 +715,36 @@ glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict, dict_unref (rsp_dict); rsp_dict = NULL; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending Pre Validation req to other nodes in the cluster */ gd_syncargs_init (&args, req_dict); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -865,9 +905,8 @@ out: } int -glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, - char **op_errstr, int npeers, - struct cds_list_head *peers) +glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, char **op_errstr, + uint64_t txn_generation) { int32_t ret = -1; int32_t peer_cnt = 0; @@ -876,9 +915,13 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, struct syncargs args = {0}; uuid_t peer_uuid = {0}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (req_dict); GF_ASSERT (op_errstr); @@ -915,20 +958,36 @@ glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict, dict_unref (rsp_dict); rsp_dict = NULL; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending brick op req to other nodes in the cluster */ gd_syncargs_init (&args, NULL); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -1084,9 +1143,8 @@ out: } int -glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, - dict_t *req_dict, char **op_errstr, - int npeers, struct cds_list_head *peers) +glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, uint64_t txn_generation) { int32_t ret = -1; int32_t peer_cnt = 0; @@ -1095,9 +1153,13 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, struct syncargs args = {0}; uuid_t peer_uuid = {0}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (op_ctx); GF_ASSERT (req_dict); GF_ASSERT (op_errstr); @@ -1144,20 +1206,36 @@ glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx, dict_unref (rsp_dict); rsp_dict = NULL; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending commit req to other nodes in the cluster */ gd_syncargs_init (&args, op_ctx); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -1282,8 +1360,8 @@ out: int glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, - dict_t *req_dict, char **op_errstr, int npeers, - struct cds_list_head *peers) + dict_t *req_dict, char **op_errstr, + uint64_t txn_generation) { int32_t ret = -1; int32_t peer_cnt = 0; @@ -1292,9 +1370,13 @@ glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, struct syncargs args = {0}; uuid_t peer_uuid = {0}; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (dict); GF_VALIDATE_OR_GOTO (this->name, req_dict, out); GF_ASSERT (op_errstr); @@ -1335,20 +1417,36 @@ glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict, dict_unref (rsp_dict); rsp_dict = NULL; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending Post Validation req to other nodes in the cluster */ gd_syncargs_init (&args, req_dict); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -1468,11 +1566,10 @@ out: } int -glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, - dict_t *dict, int32_t op_ret, - char **op_errstr, int npeers, +glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, dict_t *dict, + int32_t op_ret, char **op_errstr, gf_boolean_t is_acquired, - struct cds_list_head *peers) + uint64_t txn_generation) { int32_t ret = -1; int32_t peer_cnt = 0; @@ -1480,9 +1577,13 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; struct syncargs args = {0}; + glusterd_conf_t *conf = NULL; this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (dict); GF_ASSERT (op_errstr); @@ -1491,20 +1592,36 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op, if (!is_acquired) goto out; - if (!npeers) { - ret = 0; - goto out; - } - /* Sending mgmt_v3 unlock req to other nodes in the cluster */ gd_syncargs_init (&args, NULL); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_mgmt_v3_unlock (op, dict, peerinfo, &args, MY_UUID, peer_uuid); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -1530,7 +1647,6 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, { int32_t ret = -1; int32_t op_ret = -1; - int32_t npeers = 0; dict_t *req_dict = NULL; dict_t *tmp_dict = NULL; glusterd_conf_t *conf = NULL; @@ -1538,7 +1654,7 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, xlator_t *this = NULL; gf_boolean_t is_acquired = _gf_false; uuid_t *originator_uuid = NULL; - struct cds_list_head xaction_peers = {0,}; + uint64_t txn_generation = 0; this = THIS; GF_ASSERT (this); @@ -1547,14 +1663,14 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, conf = this->private; GF_ASSERT (conf); - CDS_INIT_LIST_HEAD (&xaction_peers); - npeers = gd_build_local_xaction_peers_list (&conf->peers, - &xaction_peers, op); - if (npeers == -1) { - gf_log (this->name, GF_LOG_ERROR, "building local peers list " - "failed"); - goto rsp; - } + /* Save the peer list generation */ + txn_generation = conf->generation; + cmm_smp_rmb (); + /* This read memory barrier makes sure that this assignment happens here + * only and is not reordered and optimized by either the compiler or the + * processor. + */ + /* Save the MY_UUID as the originator_uuid. This originator_uuid * will be used by is_origin_glusterd() to determine if a node @@ -1594,8 +1710,7 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr, - npeers, &is_acquired, - &xaction_peers); + &is_acquired, txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); goto out; @@ -1612,17 +1727,16 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, } /* PRE-COMMIT VALIDATE PHASE */ - ret = glusterd_mgmt_v3_pre_validate (op, req_dict, - &op_errstr, npeers, - &xaction_peers); + ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); goto out; } /* COMMIT OP PHASE */ - ret = glusterd_mgmt_v3_commit (op, dict, req_dict, - &op_errstr, npeers, &xaction_peers); + ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); goto out; @@ -1633,9 +1747,8 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, commands other than snapshot. So as of now, I am sending 0 (op_ret as 0). */ - ret = glusterd_mgmt_v3_post_validate (op, 0, dict, req_dict, - &op_errstr, npeers, - &xaction_peers); + ret = glusterd_mgmt_v3_post_validate (op, 0, dict, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); goto out; @@ -1645,10 +1758,9 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, out: op_ret = ret; /* UNLOCK PHASE FOR PEERS*/ - (void) glusterd_mgmt_v3_release_peer_locks (op, dict, - op_ret, &op_errstr, - npeers, is_acquired, - &xaction_peers); + (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret, + &op_errstr, is_acquired, + txn_generation); /* LOCAL VOLUME(S) UNLOCK */ if (is_acquired) { @@ -1660,12 +1772,10 @@ out: op_ret = ret; } } -rsp: + /* SEND CLI RESPONSE */ glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); - gd_cleanup_local_xaction_peers_list (&xaction_peers); - if (req_dict) dict_unref (req_dict); @@ -1748,7 +1858,6 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, { int32_t ret = -1; int32_t op_ret = -1; - int32_t npeers = 0; dict_t *req_dict = NULL; dict_t *tmp_dict = NULL; glusterd_conf_t *conf = NULL; @@ -1758,7 +1867,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, uuid_t *originator_uuid = NULL; gf_boolean_t success = _gf_false; char *cli_errstr = NULL; - struct cds_list_head xaction_peers = {0,}; + uint64_t txn_generation = 0; this = THIS; GF_ASSERT (this); @@ -1767,14 +1876,13 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, conf = this->private; GF_ASSERT (conf); - CDS_INIT_LIST_HEAD (&xaction_peers); - npeers = gd_build_local_xaction_peers_list (&conf->peers, - &xaction_peers, op); - if (npeers == -1) { - gf_log (this->name, GF_LOG_ERROR, "building local peers list " - "failed"); - goto rsp; - } + /* Save the peer list generation */ + txn_generation = conf->generation; + cmm_smp_rmb (); + /* This read memory barrier makes sure that this assignment happens here + * only and is not reordered and optimized by either the compiler or the + * processor. + */ /* Save the MY_UUID as the originator_uuid. This originator_uuid * will be used by is_origin_glusterd() to determine if a node @@ -1814,8 +1922,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr, - npeers, &is_acquired, - &xaction_peers); + &is_acquired, txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); goto out; @@ -1832,8 +1939,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, } /* PRE-COMMIT VALIDATE PHASE */ - ret = glusterd_mgmt_v3_pre_validate (op, req_dict, - &op_errstr, npeers, &xaction_peers); + ret = glusterd_mgmt_v3_pre_validate (op, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); goto out; @@ -1857,8 +1964,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, goto out; } - ret = glusterd_mgmt_v3_brick_op (op, req_dict, - &op_errstr, npeers, &xaction_peers); + ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); goto unbarrier; @@ -1888,8 +1995,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, goto unbarrier; } - ret = glusterd_mgmt_v3_commit (op, dict, req_dict, - &op_errstr, npeers, &xaction_peers); + ret = glusterd_mgmt_v3_commit (op, dict, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); /* If the main op fails, we should save the error string. @@ -1914,8 +2021,8 @@ unbarrier: goto out; } - ret = glusterd_mgmt_v3_brick_op (op, req_dict, - &op_errstr, npeers, &xaction_peers); + ret = glusterd_mgmt_v3_brick_op (op, req_dict, &op_errstr, + txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); @@ -1943,18 +2050,16 @@ out: /* POST-COMMIT VALIDATE PHASE */ ret = glusterd_mgmt_v3_post_validate (op, op_ret, dict, req_dict, - &op_errstr, npeers, - &xaction_peers); + &op_errstr, txn_generation); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); op_ret = -1; } /* UNLOCK PHASE FOR PEERS*/ - (void) glusterd_mgmt_v3_release_peer_locks (op, dict, - op_ret, &op_errstr, - npeers, is_acquired, - &xaction_peers); + (void) glusterd_mgmt_v3_release_peer_locks (op, dict, op_ret, + &op_errstr, is_acquired, + txn_generation); /* If the commit op (snapshot taking) failed, then the error is stored in cli_errstr and unbarrier is called. Suppose, if unbarrier also @@ -1978,12 +2083,10 @@ out: op_ret = ret; } } -rsp: + /* SEND CLI RESPONSE */ glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); - gd_cleanup_local_xaction_peers_list (&xaction_peers); - if (req_dict) dict_unref (req_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d7694258301..5bfdb0bb43e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -119,13 +119,16 @@ glusterd_txn_opinfo_dict_fini () void glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, - glusterd_op_sm_state_info_t *state, - glusterd_op_t *op, - dict_t *op_ctx, - rpcsvc_request_t *req) + glusterd_op_sm_state_info_t *state, glusterd_op_t *op, + dict_t *op_ctx, rpcsvc_request_t *req) { + glusterd_conf_t *conf = NULL; + GF_ASSERT (opinfo); + conf = THIS->private; + GF_ASSERT (conf); + if (state) opinfo->state = *state; @@ -140,6 +143,9 @@ glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, if (req) opinfo->req = req; + opinfo->txn_generation = conf->generation; + cmm_smp_rmb (); + return; } @@ -314,9 +320,6 @@ glusterd_clear_txn_opinfo (uuid_t *txn_id) dict_del(priv->glusterd_txn_opinfo, uuid_utoa (*txn_id)); - if (txn_op_info.local_xaction_peers) - GF_FREE (txn_op_info.local_xaction_peers); - gf_log ("", GF_LOG_DEBUG, "Successfully cleared opinfo for transaction ID : %s", uuid_utoa (*txn_id)); @@ -2919,9 +2922,13 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) priv = this->private; GF_ASSERT (priv); - list_for_each_local_xaction_peers (peerinfo, - opinfo.local_xaction_peers) { - GF_ASSERT (peerinfo); + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; if (!peerinfo->connected || !peerinfo->mgmt) continue; @@ -2936,6 +2943,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) if (proc->fn) { ret = proc->fn (NULL, this, peerinfo); if (ret) { + rcu_read_unlock (); gf_log (this->name, GF_LOG_WARNING, "Failed to send lock request " "for operation 'Volume %s' to " @@ -2958,6 +2966,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); if (ret) { + rcu_read_unlock (); gf_log (this->name, GF_LOG_ERROR, "failed to set peerinfo"); dict_unref (dict); @@ -2966,6 +2975,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) ret = proc->fn (NULL, this, dict); if (ret) { + rcu_read_unlock (); gf_log (this->name, GF_LOG_WARNING, "Failed to send mgmt_v3 lock " "request for operation " @@ -2981,6 +2991,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) } } } + rcu_read_unlock (); opinfo.pending_count = pending_count; if (!opinfo.pending_count) @@ -3009,9 +3020,13 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) priv = this->private; GF_ASSERT (priv); - list_for_each_local_xaction_peers (peerinfo, - opinfo.local_xaction_peers) { - GF_ASSERT (peerinfo); + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; if (!peerinfo->connected || !peerinfo->mgmt || !peerinfo->locked) @@ -3083,6 +3098,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) } } } + rcu_read_unlock (); opinfo.pending_count = pending_count; if (!opinfo.pending_count) @@ -3562,9 +3578,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) if (op == GD_OP_REPLACE_BRICK) glusterd_rb_use_rsp_dict (NULL, rsp_dict); - list_for_each_local_xaction_peers (peerinfo, - opinfo.local_xaction_peers) { - GF_ASSERT (peerinfo); + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; if (!peerinfo->connected || !peerinfo->mgmt) continue; @@ -3577,6 +3597,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); if (ret) { + rcu_read_unlock (); gf_log (this->name, GF_LOG_ERROR, "failed to " "set peerinfo"); goto out; @@ -3593,6 +3614,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) pending_count++; } } + rcu_read_unlock (); opinfo.pending_count = pending_count; out: @@ -4212,9 +4234,13 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) goto out; } - list_for_each_local_xaction_peers (peerinfo, - opinfo.local_xaction_peers) { - GF_ASSERT (peerinfo); + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > opinfo.txn_generation) + continue; if (!peerinfo->connected || !peerinfo->mgmt) continue; @@ -4227,6 +4253,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); if (ret) { + rcu_read_unlock (); gf_log (this->name, GF_LOG_ERROR, "failed to set peerinfo"); goto out; @@ -4242,6 +4269,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) pending_count++; } } + rcu_read_unlock (); opinfo.pending_count = pending_count; gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' " @@ -4528,7 +4556,6 @@ glusterd_op_txn_complete (uuid_t *txn_id) glusterd_op_clear_op (); glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - gd_cleanup_local_xaction_peers_list (opinfo.local_xaction_peers); /* Based on the op-version, we release the cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_3_6_0) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 69bfd4c92a5..f6eaa372f35 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -103,8 +103,7 @@ struct glusterd_op_info_ { int32_t op_errno; char *op_errstr; struct cds_list_head pending_bricks; - struct cds_list_head *local_xaction_peers; - + uint64_t txn_generation; }; typedef struct glusterd_op_info_ glusterd_op_info_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c index 5f098839398..f3241e918f7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c @@ -258,8 +258,15 @@ glusterd_peerinfo_t * glusterd_peerinfo_new (glusterd_friend_sm_state_t state, uuid_t *uuid, const char *hostname, int port) { - glusterd_peerinfo_t *new_peer = NULL; - int ret = -1; + glusterd_peerinfo_t *new_peer = NULL; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); new_peer = GF_CALLOC (1, sizeof (*new_peer), gf_gld_mt_peerinfo_t); if (!new_peer) @@ -297,6 +304,8 @@ glusterd_peerinfo_new (glusterd_friend_sm_state_t state, uuid_t *uuid, new_peer->port = port; pthread_mutex_init (&new_peer->delete_lock, NULL); + + new_peer->generation = uatomic_add_return (&conf->generation, 1); out: if (ret && new_peer) { glusterd_peerinfo_cleanup (new_peer); diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index be137802a64..dceaa3e46b9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -104,6 +104,7 @@ struct glusterd_peerinfo_ { /* Members required for proper cleanup using RCU */ gd_rcu_head rcu_head; pthread_mutex_t delete_lock; + uint64_t generation; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index a00438e6d96..c5066b015a3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -1067,89 +1067,36 @@ out: int -gd_build_peers_list (struct cds_list_head *peers, - struct cds_list_head *xact_peers, glusterd_op_t op) +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, uuid_t txn_id, + glusterd_op_info_t *txn_opinfo) { - glusterd_peerinfo_t *peerinfo = NULL; - int npeers = 0; + int ret = -1; + int peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; - GF_ASSERT (peers); - GF_ASSERT (xact_peers); + this = THIS; + synctask_barrier_init((&args)); + peer_cnt = 0; rcu_read_lock (); - cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { - if (!peerinfo->connected) + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) continue; - if (op != GD_OP_SYNC_VOLUME && - peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) - continue; - - cds_list_add_tail (&peerinfo->op_peers_list, xact_peers); - npeers++; - } - rcu_read_unlock (); - - return npeers; -} -int -gd_build_local_xaction_peers_list (struct cds_list_head *peers, - struct cds_list_head *xact_peers, - glusterd_op_t op) -{ - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_local_peers_t *local_peers = NULL; - int npeers = 0; - - GF_ASSERT (peers); - GF_ASSERT (xact_peers); - - rcu_read_lock (); - cds_list_for_each_entry_rcu (peerinfo, peers, uuid_list) { if (!peerinfo->connected) continue; if (op != GD_OP_SYNC_VOLUME && peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) continue; - local_peers = GF_CALLOC (1, sizeof (*local_peers), - gf_gld_mt_local_peers_t); - if (!local_peers) { - npeers = -1; - goto unlock; - } - CDS_INIT_LIST_HEAD (&local_peers->op_peers_list); - local_peers->peerinfo = peerinfo; - cds_list_add_tail (&local_peers->op_peers_list, xact_peers); - npeers++; - } -unlock: - rcu_read_unlock (); - - return npeers; -} -int -gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, - char **op_errstr, int npeers, uuid_t txn_id, - struct cds_list_head *peers) -{ - int ret = -1; - int peer_cnt = 0; - uuid_t peer_uuid = {0}; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - struct syncargs args = {0}; - - if (!npeers) { - ret = 0; - goto out; - } - - this = THIS; - synctask_barrier_init((&args)); - peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { if (conf->op_version < GD_OP_VERSION_3_6_0) { /* Reset lock status */ peerinfo->locked = _gf_false; @@ -1160,6 +1107,13 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, MY_UUID, peer_uuid, txn_id); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); if (args.op_ret) { @@ -1187,15 +1141,15 @@ out: } int -gd_stage_op_phase (struct cds_list_head *peers, glusterd_op_t op, - dict_t *op_ctx, dict_t *req_dict, char **op_errstr, - int npeers) +gd_stage_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, glusterd_op_info_t *txn_opinfo) { int ret = -1; int peer_cnt = 0; dict_t *rsp_dict = NULL; char *hostname = NULL; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; glusterd_peerinfo_t *peerinfo = NULL; uuid_t tmp_uuid = {0}; char *errstr = NULL; @@ -1204,6 +1158,8 @@ gd_stage_op_phase (struct cds_list_head *peers, glusterd_op_t op, this = THIS; GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); rsp_dict = dict_new (); if (!rsp_dict) @@ -1252,21 +1208,36 @@ stage_done: goto out; } - if (!npeers) { - ret = 0; - goto out; - } - gd_syncargs_init (&args, aggr_dict); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + ret = gd_syncop_mgmt_stage_op (peerinfo, &args, MY_UUID, tmp_uuid, op, req_dict, op_ctx); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Sent stage op req for 'Volume %s' " "to %d peers", gd_op_list[op], peer_cnt); @@ -1295,9 +1266,8 @@ out: } int -gd_commit_op_phase (struct cds_list_head *peers, glusterd_op_t op, - dict_t *op_ctx, dict_t *req_dict, char **op_errstr, - int npeers) +gd_commit_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, glusterd_op_info_t *txn_opinfo) { dict_t *rsp_dict = NULL; int peer_cnt = -1; @@ -1305,12 +1275,17 @@ gd_commit_op_phase (struct cds_list_head *peers, glusterd_op_t op, char *hostname = NULL; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; uuid_t tmp_uuid = {0}; char *errstr = NULL; struct syncargs args = {0}; int type = GF_QUOTA_OPTION_TYPE_NONE; this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + rsp_dict = dict_new (); if (!rsp_dict) { ret = -1; @@ -1359,21 +1334,36 @@ commit_done: goto out; } - if (!npeers) { - ret = 0; - goto out; - } - gd_syncargs_init (&args, op_ctx); synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_local_xaction_peers (peerinfo, peers) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, uuid_list) { + /* Only send requests to peers who were available before the + * transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + ret = gd_syncop_mgmt_commit_op (peerinfo, &args, MY_UUID, tmp_uuid, op, req_dict, op_ctx); peer_cnt++; } + rcu_read_unlock (); + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); ret = args.op_ret; if (args.errstr) @@ -1399,8 +1389,8 @@ out: int gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, - int npeers, char *volname, gf_boolean_t is_acquired, - uuid_t txn_id, struct cds_list_head *peers) + char *volname, gf_boolean_t is_acquired, uuid_t txn_id, + glusterd_op_info_t *txn_opinfo) { glusterd_peerinfo_t *peerinfo = NULL; uuid_t tmp_uuid = {0}; @@ -1412,11 +1402,6 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, this = THIS; GF_ASSERT (this); - if (!npeers) { - ret = 0; - goto out; - } - /* If the lock has not been held during this * transaction, do not send unlock requests */ if (!is_acquired) { @@ -1428,7 +1413,21 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, peer_cnt = 0; if (conf->op_version < GD_OP_VERSION_3_6_0) { - list_for_each_local_xaction_peers (peerinfo, peers) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { + /* Only send requests to peers who were available before + * the transaction started + */ + if (peerinfo->generation > txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + /* Only unlock peers that were locked */ if (peerinfo->locked) { gd_syncop_mgmt_unlock (peerinfo, &args, @@ -1436,16 +1435,39 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, peer_cnt++; } } + rcu_read_unlock (); } else { if (volname) { - list_for_each_local_xaction_peers (peerinfo, peers) { + rcu_read_lock (); + cds_list_for_each_entry_rcu (peerinfo, &conf->peers, + uuid_list) { + /* Only send requests to peers who were + * available before the transaction started + */ + if (peerinfo->generation > + txn_opinfo->txn_generation) + continue; + + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo, &args, MY_UUID, tmp_uuid, txn_id); peer_cnt++; } + rcu_read_unlock (); } } + + if (0 == peer_cnt) { + ret = 0; + goto out; + } + gd_synctask_barrier_wait((&args), peer_cnt); ret = args.op_ret; @@ -1585,7 +1607,6 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) { int ret = -1; int op_ret = -1; - int npeers = 0; dict_t *req_dict = NULL; glusterd_conf_t *conf = NULL; glusterd_op_t op = 0; @@ -1596,7 +1617,6 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) xlator_t *this = NULL; gf_boolean_t is_acquired = _gf_false; uuid_t *txn_id = NULL; - struct cds_list_head xaction_peers = {0,}; glusterd_op_info_t txn_opinfo = {{0},}; this = THIS; @@ -1604,8 +1624,6 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) conf = this->private; GF_ASSERT (conf); - CDS_INIT_LIST_HEAD (&xaction_peers); - ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " @@ -1686,20 +1704,11 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) local_locking_done: - /* Maintain xaction_peers on per transaction basis */ - npeers = gd_build_local_xaction_peers_list (&conf->peers, - &xaction_peers, op); - if (npeers == -1) { - gf_log (this->name, GF_LOG_ERROR, "building local peers list " - "failed"); - goto out; - } - /* If no volname is given as a part of the command, locks will * not be held */ if (volname || (conf->op_version < GD_OP_VERSION_3_6_0)) { - ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, - npeers, *txn_id, &xaction_peers); + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, *txn_id, + &txn_opinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Locking Peers Failed."); @@ -1716,8 +1725,7 @@ local_locking_done: goto out; } - ret = gd_stage_op_phase (&xaction_peers, op, op_ctx, req_dict, - &op_errstr, npeers); + ret = gd_stage_op_phase (op, op_ctx, req_dict, &op_errstr, &txn_opinfo); if (ret) goto out; @@ -1725,8 +1733,8 @@ local_locking_done: if (ret) goto out; - ret = gd_commit_op_phase (&xaction_peers, op, op_ctx, req_dict, - &op_errstr, npeers); + ret = gd_commit_op_phase (op, op_ctx, req_dict, &op_errstr, + &txn_opinfo); if (ret) goto out; @@ -1734,11 +1742,9 @@ local_locking_done: out: op_ret = ret; if (txn_id) { - (void) gd_unlock_op_phase (conf, op, &op_ret, req, - op_ctx, op_errstr, - npeers, volname, - is_acquired, *txn_id, - &xaction_peers); + (void) gd_unlock_op_phase (conf, op, &op_ret, req, op_ctx, + op_errstr, volname, is_acquired, + *txn_id, &txn_opinfo); /* Clearing the transaction opinfo */ ret = glusterd_clear_txn_opinfo (txn_id); @@ -1751,8 +1757,6 @@ out: glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); - gd_cleanup_local_xaction_peers_list (&xaction_peers); - if (volname) GF_FREE (volname); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h index d86a5ba2131..87a3c76f9fb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.h +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -59,15 +59,6 @@ void gd_synctask_barrier_wait (struct syncargs *args, int count); int -gd_build_peers_list (struct cds_list_head *peers, - struct cds_list_head *xact_peers, glusterd_op_t op); - -int -gd_build_local_xaction_peers_list (struct cds_list_head *peers, - struct cds_list_head *xact_peers, - glusterd_op_t op); - -int gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index aa27ebb18d3..afc3faaefb5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -9557,22 +9557,3 @@ glusterd_list_add_order (struct cds_list_head *new, struct cds_list_head *head, cds_list_add_rcu (new, rcu_dereference (pos->prev)); } - -void -gd_cleanup_local_xaction_peers_list (struct cds_list_head *xact_peers) -{ - glusterd_local_peers_t *local_peers = NULL; - glusterd_local_peers_t *tmp = NULL; - - GF_ASSERT (xact_peers); - - if (cds_list_empty (xact_peers)) - return; - - cds_list_for_each_entry_safe (local_peers, tmp, xact_peers, - op_peers_list) { - GF_FREE (local_peers); - /* local_peers->peerinfo need not be freed because it does not - * ownership of peerinfo, but merely refer it */ - } -} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 80c7c19d508..d2dbddec3f1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -37,16 +37,6 @@ volinfo->volname, brickid);\ } while (0) -#define list_for_each_local_xaction_peers(xact_peer, xact_peers_head) \ - glusterd_local_peers_t *pos = NULL; \ - for (pos = cds_list_entry ((xact_peers_head)->next, \ - glusterd_local_peers_t, op_peers_list), \ - xact_peer = pos->peerinfo; \ - &pos->op_peers_list != (xact_peers_head); \ - pos = cds_list_entry(pos->op_peers_list.next, \ - glusterd_local_peers_t, op_peers_list), \ - xact_peer = pos->peerinfo) - struct glusterd_lock_ { uuid_t owner; time_t timestamp; @@ -670,8 +660,4 @@ void glusterd_list_add_order (struct cds_list_head *new, struct cds_list_head *head, int (*compare)(struct cds_list_head *, struct cds_list_head *)); - -void -gd_cleanup_local_xaction_peers_list (struct cds_list_head *peers); - #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 480cfc66269..ff63cce2234 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -171,7 +171,8 @@ typedef struct { char *snap_bricks_directory; gf_store_handle_t *missed_snaps_list_shandle; struct cds_list_head missed_snaps_list; - int ping_timeout; + int ping_timeout; + uint64_t generation; } glusterd_conf_t; |