summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-op-sm.c
diff options
context:
space:
mode:
authorKaushal M <kaushal@redhat.com>2015-03-26 15:18:54 +0530
committerKrishnan Parthasarathi <kparthas@redhat.com>2015-04-13 06:30:02 +0000
commit1efa50861b2cee68de9c9b751d9fc5eed08f5e5b (patch)
tree218ca5aba1eb404ff0d7ac25f20716915768f4ec /xlators/mgmt/glusterd/src/glusterd-op-sm.c
parent7c7bbc027feb4c5b233e3078951e5bb1d9fc4618 (diff)
glusterd: Replace transaction peers lists
Transaction peer lists were used in GlusterD to peers belonging to a transaction. This was needed to prevent newly added peers performing partial transactions, which could be incorrect. This was accomplished by creating a seperate transaction peers list at the beginning of every transaction. A transaction peers list referenced the peerinfo data structures of the peers which were present at the beginning of the transaction. RCU protection of peerinfos referenced by the transaction peers list is a hard problem and difficult to do correctly. To have proper RCU protection of peerinfos, the transaction peers lists have been replaced by an alternative method to identify peers that belong to a transaction. The alternative method is to the global peers list along with generation numbers to identify peers that should belong to a transaction. This change introduces a global peer list generation number, and a generation number for each peerinfo object. Whenever a peerinfo object is created, the global generation number is bumped, and the peerinfos generation number is set to the bumped global generation. With the above changes, the algorithm to identify peers belonging to a transaction with RCU protection is as follows, - At the beginning of a transaction, the current global generation number is saved - To identify if a peers belonging to the transaction, - Start a RCU read critical section - For each peer in the global peers list, - If the peers generation number is not greater than the saved generation number, continue with the action on the peer - End the RCU read critical section The above algorithm guarantees that, - The peer list is not modified when a transaction is iterating through it - The transaction actions are only done on peers that were present when the transaction started But, as a transaction could iterate over the peers list multiple times, the algorithm cannot guarantee that same set of peers will be selected every time. A peer could get deleted between two iterations of the list within a transaction. This problem existed with transaction peers list as well, but unlike before now it will not lead to invalid memory access and potential crashes. This problem will be addressed seprately. This change was developed on the git branch at [1]. This commit is a combination of the following commits on the development branch. 52ded5b Add timespec_cmp 44aedd8 Add create timestamp to peerinfo 7bcbea5 Fix some silly mistakes 13e3241 Add start time to opinfo 17a6727 Use timestamp comparisions to identify xaction peers instead of a xaction peer list 3be05b6 Correct check for peerinfo age 70d5b58 Use read-critical sections for peer list iteration ba4dbca Use peerinfo timestamp checks in op-sm instead of xaction peer list d63f811 Add more peer status checks when iterating peers list in glusterd-syncop 1998a2a Timestamp based peer list traversal of mgmtv3 xactions f3c1a42 Remove transaction peer lists b8b08ee Remove unused labels 32e5f5b Remove 'npeers' usage a075fb7 Remove 'npeers' from mgmt-v3 framework 12c9df2 Use generation number instead of timestamps. 9723021 Remove timespec_cmp 80ae2c6 Remove timespec.h include a9479b0 Address review comments on 10147/4 [1]: https://github.com/kshlm/glusterfs/tree/urcu Change-Id: I9be1033525c0a89276f5b5d83dc2eb061918b97f BUG: 1205186 Signed-off-by: Kaushal M <kaushal@redhat.com> Reviewed-on: http://review.gluster.org/10147 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-by: Anand Nekkunti <anekkunt@redhat.com> Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Tested-by: Krishnan Parthasarathi <kparthas@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-op-sm.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c67
1 files changed, 47 insertions, 20 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d7694258301..5bfdb0bb43e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -119,13 +119,16 @@ glusterd_txn_opinfo_dict_fini ()
void
glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo,
- glusterd_op_sm_state_info_t *state,
- glusterd_op_t *op,
- dict_t *op_ctx,
- rpcsvc_request_t *req)
+ glusterd_op_sm_state_info_t *state, glusterd_op_t *op,
+ dict_t *op_ctx, rpcsvc_request_t *req)
{
+ glusterd_conf_t *conf = NULL;
+
GF_ASSERT (opinfo);
+ conf = THIS->private;
+ GF_ASSERT (conf);
+
if (state)
opinfo->state = *state;
@@ -140,6 +143,9 @@ glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo,
if (req)
opinfo->req = req;
+ opinfo->txn_generation = conf->generation;
+ cmm_smp_rmb ();
+
return;
}
@@ -314,9 +320,6 @@ glusterd_clear_txn_opinfo (uuid_t *txn_id)
dict_del(priv->glusterd_txn_opinfo, uuid_utoa (*txn_id));
- if (txn_op_info.local_xaction_peers)
- GF_FREE (txn_op_info.local_xaction_peers);
-
gf_log ("", GF_LOG_DEBUG,
"Successfully cleared opinfo for transaction ID : %s",
uuid_utoa (*txn_id));
@@ -2919,9 +2922,13 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
priv = this->private;
GF_ASSERT (priv);
- list_for_each_local_xaction_peers (peerinfo,
- opinfo.local_xaction_peers) {
- GF_ASSERT (peerinfo);
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
if (!peerinfo->connected || !peerinfo->mgmt)
continue;
@@ -2936,6 +2943,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = proc->fn (NULL, this, peerinfo);
if (ret) {
+ rcu_read_unlock ();
gf_log (this->name, GF_LOG_WARNING,
"Failed to send lock request "
"for operation 'Volume %s' to "
@@ -2958,6 +2966,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
ret = dict_set_static_ptr (dict, "peerinfo",
peerinfo);
if (ret) {
+ rcu_read_unlock ();
gf_log (this->name, GF_LOG_ERROR,
"failed to set peerinfo");
dict_unref (dict);
@@ -2966,6 +2975,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
ret = proc->fn (NULL, this, dict);
if (ret) {
+ rcu_read_unlock ();
gf_log (this->name, GF_LOG_WARNING,
"Failed to send mgmt_v3 lock "
"request for operation "
@@ -2981,6 +2991,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
}
}
}
+ rcu_read_unlock ();
opinfo.pending_count = pending_count;
if (!opinfo.pending_count)
@@ -3009,9 +3020,13 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx)
priv = this->private;
GF_ASSERT (priv);
- list_for_each_local_xaction_peers (peerinfo,
- opinfo.local_xaction_peers) {
- GF_ASSERT (peerinfo);
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
if (!peerinfo->connected || !peerinfo->mgmt ||
!peerinfo->locked)
@@ -3083,6 +3098,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx)
}
}
}
+ rcu_read_unlock ();
opinfo.pending_count = pending_count;
if (!opinfo.pending_count)
@@ -3562,9 +3578,13 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
if (op == GD_OP_REPLACE_BRICK)
glusterd_rb_use_rsp_dict (NULL, rsp_dict);
- list_for_each_local_xaction_peers (peerinfo,
- opinfo.local_xaction_peers) {
- GF_ASSERT (peerinfo);
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
if (!peerinfo->connected || !peerinfo->mgmt)
continue;
@@ -3577,6 +3597,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
if (ret) {
+ rcu_read_unlock ();
gf_log (this->name, GF_LOG_ERROR, "failed to "
"set peerinfo");
goto out;
@@ -3593,6 +3614,7 @@ glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
pending_count++;
}
}
+ rcu_read_unlock ();
opinfo.pending_count = pending_count;
out:
@@ -4212,9 +4234,13 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
goto out;
}
- list_for_each_local_xaction_peers (peerinfo,
- opinfo.local_xaction_peers) {
- GF_ASSERT (peerinfo);
+ rcu_read_lock ();
+ cds_list_for_each_entry_rcu (peerinfo, &priv->peers, uuid_list) {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
if (!peerinfo->connected || !peerinfo->mgmt)
continue;
@@ -4227,6 +4253,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
if (proc->fn) {
ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
if (ret) {
+ rcu_read_unlock ();
gf_log (this->name, GF_LOG_ERROR,
"failed to set peerinfo");
goto out;
@@ -4242,6 +4269,7 @@ glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
pending_count++;
}
}
+ rcu_read_unlock ();
opinfo.pending_count = pending_count;
gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' "
@@ -4528,7 +4556,6 @@ glusterd_op_txn_complete (uuid_t *txn_id)
glusterd_op_clear_op ();
glusterd_op_reset_ctx ();
glusterd_op_clear_errstr ();
- gd_cleanup_local_xaction_peers_list (opinfo.local_xaction_peers);
/* Based on the op-version, we release the cluster or mgmt_v3 lock */
if (priv->op_version < GD_OP_VERSION_3_6_0) {