summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2014-12-12 07:21:19 +0530
committerKaushal M <kaushal@redhat.com>2014-12-22 20:14:14 -0800
commitda9deb54df91dedc51ebe165f3a0be646455cb5b (patch)
treec3fdd61e31881807dc7dcbfd7ec09145fe0248b0
parent0e78a12381e988a06e1d5a2dd592d132e24a4e10 (diff)
glusterd: Maintain per transaction xaction_peers list in syncop & mgmt_v3
In current implementation xaction_peers list is maintained in a global variable (glustrd_priv_t) for syncop/mgmt_v3. This means consistency and atomicity of peerinfo list across transactions is not guranteed when multiple syncop/mgmt_v3 transaction are going through. We had got into a problem in mgmt_v3-locks.t which was failing spuriously, the reason for that was two volume set operations (in two different volume) was going through simultaneouly and both of these transaction were manipulating the same xaction_peers structure which lead to a corrupted list. Because of which in some cases unlock request to peer was never triggered and we end up with having stale locks. Solution is to maintain a per transaction local xaction_peers list for every syncop. Please note I've identified this problem in op-sm area as well and a separate patch will be attempted to fix it. Finally thanks to Krishnan Parthasarathi and Kaushal M for your constant help to get to the root cause. Change-Id: Ib1eaac9e5c8fc319f4e7f8d2ad965bc1357a7c63 BUG: 1173414 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: http://review.gluster.org/9269 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Kaushal M <kaushal@redhat.com>
-rwxr-xr-xtests/bugs/bug-1173414-mgmt-v3-remote-lock-failure.t34
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c162
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c170
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h21
6 files changed, 256 insertions, 139 deletions
diff --git a/tests/bugs/bug-1173414-mgmt-v3-remote-lock-failure.t b/tests/bugs/bug-1173414-mgmt-v3-remote-lock-failure.t
new file mode 100755
index 00000000000..adc3fe30dd4
--- /dev/null
+++ b/tests/bugs/bug-1173414-mgmt-v3-remote-lock-failure.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0
+TEST $CLI_1 volume create $V1 $H1:$B1/$V1
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume start $V1
+
+for i in {1..20}
+do
+ $CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+ $CLI_1 volume set $V1 barrier on
+ $CLI_2 volume set $V0 diagnostics.client-log-level DEBUG &
+ $CLI_2 volume set $V1 barrier on
+done
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+TEST $CLI_1 volume status
+TEST $CLI_2 volume status
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index 95908cf62fc..ed171b69b66 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -70,7 +70,8 @@ typedef enum gf_gld_mem_types_ {
gf_gld_mt_snap_t = gf_common_mt_end + 54,
gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55,
gf_gld_mt_snap_create_args_t = gf_common_mt_end + 56,
- gf_gld_mt_end = gf_common_mt_end + 57,
+ gf_gld_mt_local_peers_t = gf_common_mt_end + 57,
+ gf_gld_mt_end = gf_common_mt_end + 58,
} gf_gld_mem_types_t;
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
index f26c676cd67..0cdaaaeda9a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -384,28 +384,25 @@ out:
}
int
-glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op,
- dict_t *dict, char **op_errstr, int npeers,
- gf_boolean_t *is_acquired)
+glusterd_mgmt_v3_initiate_lockdown (glusterd_op_t op, dict_t *dict,
+ char **op_errstr, int npeers,
+ gf_boolean_t *is_acquired,
+ struct list_head *peers)
{
char *volname = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
int32_t ret = -1;
int32_t peer_cnt = 0;
struct syncargs args = {0};
- struct list_head *peers = NULL;
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (dict);
GF_ASSERT (op_errstr);
GF_ASSERT (is_acquired);
- peers = &conf->xaction_peers;
-
/* Trying to acquire multiple mgmt_v3 locks on local node */
ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID);
if (ret) {
@@ -425,7 +422,7 @@ glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, NULL);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_lock (op, dict, peerinfo, &args,
MY_UUID, peer_uuid);
peer_cnt++;
@@ -639,26 +636,23 @@ out:
}
int
-glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op,
- dict_t *req_dict, char **op_errstr, int npeers)
+glusterd_mgmt_v3_pre_validate (glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, int npeers,
+ struct list_head *peers)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
dict_t *rsp_dict = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
struct syncargs args = {0};
- struct list_head *peers = NULL;
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (req_dict);
GF_ASSERT (op_errstr);
- peers = &conf->xaction_peers;
-
rsp_dict = dict_new ();
if (!rsp_dict) {
gf_log (this->name, GF_LOG_ERROR,
@@ -710,7 +704,7 @@ glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, req_dict);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args,
MY_UUID, peer_uuid);
peer_cnt++;
@@ -878,26 +872,23 @@ out:
}
int
-glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op,
- dict_t *req_dict, char **op_errstr, int npeers)
+glusterd_mgmt_v3_brick_op (glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, int npeers,
+ struct list_head *peers)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
dict_t *rsp_dict = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
struct syncargs args = {0};
- struct list_head *peers = NULL;
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (req_dict);
GF_ASSERT (op_errstr);
- peers = &conf->xaction_peers;
-
rsp_dict = dict_new ();
if (!rsp_dict) {
gf_log (this->name, GF_LOG_ERROR,
@@ -940,7 +931,7 @@ glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, NULL);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args,
MY_UUID, peer_uuid);
peer_cnt++;
@@ -1103,28 +1094,24 @@ out:
}
int
-glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op,
- dict_t *op_ctx, dict_t *req_dict,
- char **op_errstr, int npeers)
+glusterd_mgmt_v3_commit (glusterd_op_t op, dict_t *op_ctx,
+ dict_t *req_dict, char **op_errstr,
+ int npeers, struct list_head *peers)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
dict_t *rsp_dict = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
struct syncargs args = {0};
- struct list_head *peers = NULL;
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (op_ctx);
GF_ASSERT (req_dict);
GF_ASSERT (op_errstr);
- peers = &conf->xaction_peers;
-
rsp_dict = dict_new ();
if (!rsp_dict) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1176,7 +1163,7 @@ glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, op_ctx);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args,
MY_UUID, peer_uuid);
peer_cnt++;
@@ -1307,29 +1294,24 @@ out:
}
int
-glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
- int32_t op_ret, dict_t *dict, dict_t *req_dict,
- char **op_errstr, int npeers)
+glusterd_mgmt_v3_post_validate (glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ dict_t *req_dict, char **op_errstr, int npeers,
+ struct list_head *peers)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
dict_t *rsp_dict = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
struct syncargs args = {0};
- struct list_head *peers = NULL;
uuid_t peer_uuid = {0};
xlator_t *this = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (dict);
GF_VALIDATE_OR_GOTO (this->name, req_dict, out);
GF_ASSERT (op_errstr);
- peers = &conf->xaction_peers;
- GF_ASSERT (peers);
-
rsp_dict = dict_new ();
if (!rsp_dict) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1375,7 +1357,7 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, req_dict);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo,
&args, MY_UUID, peer_uuid);
peer_cnt++;
@@ -1502,10 +1484,11 @@ out:
}
int
-glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op,
+glusterd_mgmt_v3_release_peer_locks (glusterd_op_t op,
dict_t *dict, int32_t op_ret,
char **op_errstr, int npeers,
- gf_boolean_t is_acquired)
+ gf_boolean_t is_acquired,
+ struct list_head *peers)
{
int32_t ret = -1;
int32_t peer_cnt = 0;
@@ -1513,16 +1496,12 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op,
xlator_t *this = NULL;
glusterd_peerinfo_t *peerinfo = NULL;
struct syncargs args = {0};
- struct list_head *peers = NULL;
this = THIS;
GF_ASSERT (this);
- GF_ASSERT (conf);
GF_ASSERT (dict);
GF_ASSERT (op_errstr);
- peers = &conf->xaction_peers;
-
/* If the lock has not been held during this
* transaction, do not send unlock requests */
if (!is_acquired)
@@ -1537,7 +1516,7 @@ glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op,
gd_syncargs_init (&args, NULL);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_mgmt_v3_unlock (op, dict, peerinfo, &args,
MY_UUID, peer_uuid);
peer_cnt++;
@@ -1575,6 +1554,7 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
xlator_t *this = NULL;
gf_boolean_t is_acquired = _gf_false;
uuid_t *originator_uuid = NULL;
+ struct list_head xaction_peers = {0,};
this = THIS;
GF_ASSERT (this);
@@ -1583,6 +1563,15 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
conf = this->private;
GF_ASSERT (conf);
+ INIT_LIST_HEAD (&xaction_peers);
+ npeers = gd_build_local_xaction_peers_list (&conf->peers,
+ &xaction_peers, op);
+ if (npeers == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "building local peers list "
+ "failed");
+ goto rsp;
+ }
+
/* Save the MY_UUID as the originator_uuid. This originator_uuid
* will be used by is_origin_glusterd() to determine if a node
* is the originator node for a command. */
@@ -1619,13 +1608,10 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
}
dict_copy (dict, tmp_dict);
- /* BUILD PEERS LIST */
- INIT_LIST_HEAD (&conf->xaction_peers);
- npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op);
-
/* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
- ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr,
- npeers, &is_acquired);
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ npeers, &is_acquired,
+ &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed.");
goto out;
@@ -1642,16 +1628,17 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
}
/* PRE-COMMIT VALIDATE PHASE */
- ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict,
+ &op_errstr, npeers,
+ &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed");
goto out;
}
/* COMMIT OP PHASE */
- ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict,
+ &op_errstr, npeers, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed");
goto out;
@@ -1662,8 +1649,9 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
commands other than snapshot. So as of now, I am
sending 0 (op_ret as 0).
*/
- ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_post_validate (op, 0, dict, req_dict,
+ &op_errstr, npeers,
+ &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed");
goto out;
@@ -1673,9 +1661,10 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op,
out:
op_ret = ret;
/* UNLOCK PHASE FOR PEERS*/
- (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict,
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict,
op_ret, &op_errstr,
- npeers, is_acquired);
+ npeers, is_acquired,
+ &xaction_peers);
/* LOCAL VOLUME(S) UNLOCK */
if (is_acquired) {
@@ -1687,10 +1676,12 @@ out:
op_ret = ret;
}
}
-
+rsp:
/* SEND CLI RESPONSE */
glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr);
+ gd_cleanup_local_xaction_peers_list (&xaction_peers);
+
if (req_dict)
dict_unref (req_dict);
@@ -1783,6 +1774,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
uuid_t *originator_uuid = NULL;
gf_boolean_t success = _gf_false;
char *cli_errstr = NULL;
+ struct list_head xaction_peers = {0,};
this = THIS;
GF_ASSERT (this);
@@ -1791,6 +1783,15 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
conf = this->private;
GF_ASSERT (conf);
+ INIT_LIST_HEAD (&xaction_peers);
+ npeers = gd_build_local_xaction_peers_list (&conf->peers,
+ &xaction_peers, op);
+ if (npeers == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "building local peers list "
+ "failed");
+ goto rsp;
+ }
+
/* Save the MY_UUID as the originator_uuid. This originator_uuid
* will be used by is_origin_glusterd() to determine if a node
* is the originator node for a command. */
@@ -1827,13 +1828,10 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
}
dict_copy (dict, tmp_dict);
- /* BUILD PEERS LIST */
- INIT_LIST_HEAD (&conf->xaction_peers);
- npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op);
-
/* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
- ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr,
- npeers, &is_acquired);
+ ret = glusterd_mgmt_v3_initiate_lockdown (op, dict, &op_errstr,
+ npeers, &is_acquired,
+ &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed.");
goto out;
@@ -1850,8 +1848,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
}
/* PRE-COMMIT VALIDATE PHASE */
- ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_pre_validate (op, req_dict,
+ &op_errstr, npeers, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed");
goto out;
@@ -1875,8 +1873,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
goto out;
}
- ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_brick_op (op, req_dict,
+ &op_errstr, npeers, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed");
goto unbarrier;
@@ -1906,8 +1904,8 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op,
goto unbarrier;
}
- ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_commit (op, dict, req_dict,
+ &op_errstr, npeers, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed");
/* If the main op fails, we should save the error string.
@@ -1932,8 +1930,8 @@ unbarrier:
goto out;
}
- ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_brick_op (op, req_dict,
+ &op_errstr, npeers, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed");
@@ -1960,17 +1958,19 @@ out:
op_ret = -1;
/* POST-COMMIT VALIDATE PHASE */
- ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict,
- &op_errstr, npeers);
+ ret = glusterd_mgmt_v3_post_validate (op, op_ret, dict, req_dict,
+ &op_errstr, npeers,
+ &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed");
op_ret = -1;
}
/* UNLOCK PHASE FOR PEERS*/
- (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict,
+ (void) glusterd_mgmt_v3_release_peer_locks (op, dict,
op_ret, &op_errstr,
- npeers, is_acquired);
+ npeers, is_acquired,
+ &xaction_peers);
/* If the commit op (snapshot taking) failed, then the error is stored
in cli_errstr and unbarrier is called. Suppose, if unbarrier also
@@ -1994,10 +1994,12 @@ out:
op_ret = ret;
}
}
-
+rsp:
/* SEND CLI RESPONSE */
glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr);
+ gd_cleanup_local_xaction_peers_list (&xaction_peers);
+
if (req_dict)
dict_unref (req_dict);
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
index 294dbacc9d0..fb873f75601 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
@@ -103,6 +103,11 @@ struct glusterd_peerinfo_ {
typedef struct glusterd_peerinfo_ glusterd_peerinfo_t;
+typedef struct glusterd_local_peers_ {
+ glusterd_peerinfo_t *peerinfo;
+ struct list_head op_peers_list;
+} glusterd_local_peers_t;
+
typedef enum glusterd_ev_gen_mode_ {
GD_MODE_OFF,
GD_MODE_ON,
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 534e3032105..a910406f158 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1030,6 +1030,9 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
glusterd_peerinfo_t *peerinfo = NULL;
int npeers = 0;
+ GF_ASSERT (peers);
+ GF_ASSERT (xact_peers);
+
list_for_each_entry (peerinfo, peers, uuid_list) {
if (!peerinfo->connected)
continue;
@@ -1044,18 +1047,65 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
}
int
-gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
- char **op_errstr, int npeers, uuid_t txn_id)
+gd_build_local_xaction_peers_list (struct list_head *peers,
+ struct list_head *xact_peers,
+ glusterd_op_t op)
{
- int ret = -1;
- int peer_cnt = 0;
- uuid_t peer_uuid = {0};
- xlator_t *this = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- struct syncargs args = {0};
- struct list_head *peers = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_local_peers_t *local_peers = NULL;
+ int npeers = 0;
+
+ GF_ASSERT (peers);
+ GF_ASSERT (xact_peers);
- peers = &conf->xaction_peers;
+ list_for_each_entry (peerinfo, peers, uuid_list) {
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ local_peers = GF_CALLOC (1, sizeof (*local_peers),
+ gf_gld_mt_local_peers_t);
+ if (!local_peers) {
+ return -1;
+ }
+ INIT_LIST_HEAD (&local_peers->op_peers_list);
+ local_peers->peerinfo = peerinfo;
+ list_add_tail (&local_peers->op_peers_list, xact_peers);
+ npeers++;
+ }
+ return npeers;
+}
+
+void
+gd_cleanup_local_xaction_peers_list (struct list_head *xact_peers)
+{
+ GF_ASSERT (xact_peers);
+
+ if (list_empty (xact_peers))
+ return;
+
+ glusterd_local_peers_t *local_peers = NULL;
+
+ list_for_each_entry (local_peers, xact_peers, op_peers_list) {
+ GF_FREE (local_peers);
+ /* local_peers->peerinfo need not be freed because it does not
+ * ownership of peerinfo, but merely refer it */
+ }
+}
+
+int
+gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
+ char **op_errstr, int npeers, uuid_t txn_id,
+ struct list_head *peers)
+{
+ int ret = -1;
+ int peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
if (!npeers) {
ret = 0;
@@ -1065,7 +1115,7 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx,
this = THIS;
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
if (conf->op_version < GD_OP_VERSION_3_6_0) {
/* Reset lock status */
peerinfo->locked = _gf_false;
@@ -1106,16 +1156,16 @@ int
gd_stage_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
dict_t *req_dict, char **op_errstr, int npeers)
{
- int ret = -1;
- int peer_cnt = 0;
- dict_t *rsp_dict = NULL;
- char *hostname = NULL;
- xlator_t *this = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- uuid_t tmp_uuid = {0};
- char *errstr = NULL;
- struct syncargs args = {0};
- dict_t *aggr_dict = NULL;
+ int ret = -1;
+ int peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+ dict_t *aggr_dict = NULL;
this = THIS;
rsp_dict = dict_new ();
@@ -1165,7 +1215,8 @@ stage_done:
gd_syncargs_init (&args, aggr_dict);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+
+ list_for_each_local_xaction_peers (peerinfo, peers) {
ret = gd_syncop_mgmt_stage_op (peerinfo, &args,
MY_UUID, tmp_uuid,
op, req_dict, op_ctx);
@@ -1202,16 +1253,16 @@ int
gd_commit_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx,
dict_t *req_dict, char **op_errstr, int npeers)
{
- dict_t *rsp_dict = NULL;
- int peer_cnt = -1;
- int ret = -1;
- char *hostname = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- xlator_t *this = NULL;
- uuid_t tmp_uuid = {0};
- char *errstr = NULL;
- struct syncargs args = {0};
- int type = GF_QUOTA_OPTION_TYPE_NONE;
+ dict_t *rsp_dict = NULL;
+ int peer_cnt = -1;
+ int ret = -1;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t tmp_uuid = {0};
+ char *errstr = NULL;
+ struct syncargs args = {0};
+ int type = GF_QUOTA_OPTION_TYPE_NONE;
this = THIS;
rsp_dict = dict_new ();
@@ -1269,7 +1320,8 @@ commit_done:
gd_syncargs_init (&args, op_ctx);
synctask_barrier_init((&args));
peer_cnt = 0;
- list_for_each_entry (peerinfo, peers, op_peers_list) {
+
+ list_for_each_local_xaction_peers (peerinfo, peers) {
ret = gd_syncop_mgmt_commit_op (peerinfo, &args,
MY_UUID, tmp_uuid,
op, req_dict, op_ctx);
@@ -1301,18 +1353,15 @@ int
gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr,
int npeers, char *volname, gf_boolean_t is_acquired,
- uuid_t txn_id)
+ uuid_t txn_id, struct list_head *peers)
{
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_peerinfo_t *tmp = NULL;
- uuid_t tmp_uuid = {0};
- int peer_cnt = 0;
- int ret = -1;
- xlator_t *this = NULL;
- struct syncargs args = {0};
- struct list_head *peers = NULL;
-
- peers = &conf->xaction_peers;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uuid_t tmp_uuid = {0};
+ int peer_cnt = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ struct syncargs args = {0};
+
this = THIS;
GF_ASSERT (this);
@@ -1330,25 +1379,23 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret,
synctask_barrier_init((&args));
peer_cnt = 0;
+
if (conf->op_version < GD_OP_VERSION_3_6_0) {
- list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
/* Only unlock peers that were locked */
if (peerinfo->locked) {
gd_syncop_mgmt_unlock (peerinfo, &args,
MY_UUID, tmp_uuid);
peer_cnt++;
- list_del_init (&peerinfo->op_peers_list);
}
}
} else {
if (volname) {
- list_for_each_entry_safe (peerinfo, tmp,
- peers, op_peers_list) {
+ list_for_each_local_xaction_peers (peerinfo, peers) {
gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo,
&args, MY_UUID,
tmp_uuid, txn_id);
peer_cnt++;
- list_del_init (&peerinfo->op_peers_list);
}
}
}
@@ -1502,6 +1549,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
xlator_t *this = NULL;
gf_boolean_t is_acquired = _gf_false;
uuid_t *txn_id = NULL;
+ struct list_head xaction_peers = {0,};
glusterd_op_info_t txn_opinfo;
this = THIS;
@@ -1509,6 +1557,8 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
conf = this->private;
GF_ASSERT (conf);
+ INIT_LIST_HEAD (&xaction_peers);
+
ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op);
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "Failed to get volume "
@@ -1591,15 +1641,20 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req)
local_locking_done:
- INIT_LIST_HEAD (&conf->xaction_peers);
-
- npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op);
+ /* Maintain xaction_peers on per transaction basis */
+ npeers = gd_build_local_xaction_peers_list (&conf->peers,
+ &xaction_peers, op);
+ if (npeers == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "building local peers list "
+ "failed");
+ goto out;
+ }
/* If no volname is given as a part of the command, locks will
* not be held */
if (volname || (conf->op_version < GD_OP_VERSION_3_6_0)) {
ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr,
- npeers, *txn_id);
+ npeers, *txn_id, &xaction_peers);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Locking Peers Failed.");
@@ -1616,7 +1671,7 @@ local_locking_done:
goto out;
}
- ret = gd_stage_op_phase (&conf->xaction_peers, op, op_ctx, req_dict,
+ ret = gd_stage_op_phase (&xaction_peers, op, op_ctx, req_dict,
&op_errstr, npeers);
if (ret)
goto out;
@@ -1625,7 +1680,7 @@ local_locking_done:
if (ret)
goto out;
- ret = gd_commit_op_phase (&conf->xaction_peers, op, op_ctx, req_dict,
+ ret = gd_commit_op_phase (&xaction_peers, op, op_ctx, req_dict,
&op_errstr, npeers);
if (ret)
goto out;
@@ -1637,7 +1692,8 @@ out:
(void) gd_unlock_op_phase (conf, op, &op_ret, req,
op_ctx, op_errstr,
npeers, volname,
- is_acquired, *txn_id);
+ is_acquired, *txn_id,
+ &xaction_peers);
/* Clearing the transaction opinfo */
ret = glusterd_clear_txn_opinfo (txn_id);
@@ -1650,6 +1706,8 @@ out:
glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr);
+ gd_cleanup_local_xaction_peers_list (&xaction_peers);
+
if (volname)
GF_FREE (volname);
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h
index c7924b5a025..e0733dc8006 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.h
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h
@@ -37,11 +37,19 @@
} while (0)
+#define list_for_each_local_xaction_peers(xact_peer, xact_peers_head) \
+ glusterd_local_peers_t *pos = NULL; \
+ for (pos = list_entry((xact_peers_head)->next, glusterd_local_peers_t,\
+ op_peers_list), \
+ xact_peer = pos->peerinfo; \
+ &pos->op_peers_list != (xact_peers_head); \
+ pos = list_entry(pos->op_peers_list.next, glusterd_local_peers_t,\
+ op_peers_list), \
+ xact_peer = pos->peerinfo)
+
int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local,
void *cookie, rpc_clnt_prog_t *prog, int procnum,
fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
-
-
int gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg,
uuid_t my_uuid, uuid_t recv_uuid);
@@ -64,6 +72,15 @@ gd_synctask_barrier_wait (struct syncargs *args, int count);
int
gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers,
glusterd_op_t op);
+
+int
+gd_build_local_xaction_peers_list (struct list_head *peers,
+ struct list_head *xact_peers,
+ glusterd_op_t op);
+
+void
+gd_cleanup_local_xaction_peers_list (struct list_head *peers);
+
int
gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
char **op_errstr);