summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd
diff options
context:
space:
mode:
authorAtin Mukherjee <amukherj@redhat.com>2018-08-10 09:12:05 +0530
committerAtin Mukherjee <amukherj@redhat.com>2018-08-13 03:01:42 +0000
commit29d5557854703f61a4aa1fc53d6b49de9a99fe9d (patch)
treebd9399bd9420ea52ad991e7a2ac4a7d482ba33a6 /xlators/mgmt/glusterd
parent48b93c292c0069da9ac2fe77e66d08a1cdeacfdc (diff)
glusterd: compare friend data within mutex
During friend handshake if the glusterd receives more than one friend updates, it might very well become possible that two threads would end up working on two different volinfo references and glusterd might end up updating the store with a old volinfo reference. While debugging glusterd crash from validating-server-quorum.t test file from the line-coverage regression the same was observed. Solution is to run glusterd_compare_friend_data under a mutex. Test: As the crash was more visible in the line-coverage run (given lcov does some instrumentation and exposes the races), 6 manual lcov runs were triggered starting from https://build.gluster.org/job/line-coverage/443 to https://build.gluster.org/job/line-coverage/449/ and no crash was observed from validating-server-quorum.t Change-Id: I86fce473a76fd24742d51bf17a685d28b90a8941 Fixes: bz#1603063 Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c87
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c1
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
3 files changed, 48 insertions, 41 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index a952a4c179e..cbd1bb0aaea 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -937,54 +937,59 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
*/
//Build comparison logic here.
- ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
- event->peername);
- if (ret)
- goto out;
-
- if (GLUSTERD_VOL_COMP_RJT != status) {
- event_type = GD_FRIEND_EVENT_LOCAL_ACC;
- op_ret = 0;
- } else {
- event_type = GD_FRIEND_EVENT_LOCAL_RJT;
- op_errno = GF_PROBE_VOLUME_CONFLICT;
- op_ret = -1;
- }
-
- /* Compare missed_snapshot list with the peer *
- * if volume comparison is successful */
- if ((op_ret == 0) &&
- (conf->op_version >= GD_OP_VERSION_3_6_0)) {
- ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+ pthread_mutex_lock (&conf->import_volumes);
+ {
+ ret = glusterd_compare_friend_data (ev_ctx->vols, &status,
+ event->peername);
if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
- "Failed to import peer's "
- "missed_snaps_list.");
- event_type = GD_FRIEND_EVENT_LOCAL_RJT;
- op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
- op_ret = -1;
+ pthread_mutex_unlock (&conf->import_volumes);
+ goto out;
}
- /* glusterd_compare_friend_snapshots and functions only require
- * a peers hostname and uuid. It also does updates, which
- * require use of synchronize_rcu. So we pass the hostname and
- * id from the event instead of the peerinfo object to prevent
- * deadlocks as above.
- */
- ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
- event->peername,
- event->peerid);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- GD_MSG_SNAP_COMPARE_CONFLICT,
- "Conflict in comparing peer's snapshots");
+ if (GLUSTERD_VOL_COMP_RJT != status) {
+ event_type = GD_FRIEND_EVENT_LOCAL_ACC;
+ op_ret = 0;
+ } else {
event_type = GD_FRIEND_EVENT_LOCAL_RJT;
- op_errno = GF_PROBE_SNAP_CONFLICT;
+ op_errno = GF_PROBE_VOLUME_CONFLICT;
op_ret = -1;
}
- }
+ /* Compare missed_snapshot list with the peer *
+ * if volume comparison is successful */
+ if ((op_ret == 0) &&
+ (conf->op_version >= GD_OP_VERSION_3_6_0)) {
+ ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to import peer's "
+ "missed_snaps_list.");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+
+ /* glusterd_compare_friend_snapshots and functions only require
+ * a peers hostname and uuid. It also does updates, which
+ * require use of synchronize_rcu. So we pass the hostname and
+ * id from the event instead of the peerinfo object to prevent
+ * deadlocks as above.
+ */
+ ret = glusterd_compare_friend_snapshots (ev_ctx->vols,
+ event->peername,
+ event->peerid);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_COMPARE_CONFLICT,
+ "Conflict in comparing peer's snapshots");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conf->import_volumes);
ret = glusterd_friend_sm_new_event (event_type, &new_event);
if (ret) {
diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c
index 0714714d33e..f72953cd319 100644
--- a/xlators/mgmt/glusterd/src/glusterd.c
+++ b/xlators/mgmt/glusterd/src/glusterd.c
@@ -1854,6 +1854,7 @@ init (xlator_t *this)
synclock_init (&conf->big_lock, SYNC_LOCK_RECURSIVE);
pthread_mutex_init (&conf->xprt_lock, NULL);
INIT_LIST_HEAD (&conf->xprt_list);
+ pthread_mutex_init (&conf->import_volumes, NULL);
glusterd_friend_sm_init ();
glusterd_op_sm_init ();
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 66b7fbb2297..39232d2d5eb 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -162,6 +162,7 @@ typedef struct {
struct cds_list_head brick_procs; /* List of brick processes */
pthread_mutex_t xprt_lock;
struct list_head xprt_list;
+ pthread_mutex_t import_volumes;
gf_store_handle_t *handle;
gf_timer_t *timer;
glusterd_sm_tr_log_t op_sm_log;