diff options
author | Atin Mukherjee <amukherj@redhat.com> | 2018-08-10 09:12:05 +0530 |
---|---|---|
committer | Atin Mukherjee <amukherj@redhat.com> | 2018-08-13 03:01:42 +0000 |
commit | 29d5557854703f61a4aa1fc53d6b49de9a99fe9d (patch) | |
tree | bd9399bd9420ea52ad991e7a2ac4a7d482ba33a6 /xlators/mgmt/glusterd/src | |
parent | 48b93c292c0069da9ac2fe77e66d08a1cdeacfdc (diff) |
glusterd: compare friend data within mutex
During friend handshake if the glusterd receives more than one friend
updates, it might very well become possible that two threads would end
up working on two different volinfo references and glusterd might end up
updating the store with a old volinfo reference. While debugging
glusterd crash from validating-server-quorum.t test file from the
line-coverage regression the same was observed.
Solution is to run glusterd_compare_friend_data under a mutex.
Test:
As the crash was more visible in the line-coverage run (given lcov does
some instrumentation and exposes the races), 6 manual lcov runs were
triggered starting from https://build.gluster.org/job/line-coverage/443
to https://build.gluster.org/job/line-coverage/449/ and no crash was
observed from validating-server-quorum.t
Change-Id: I86fce473a76fd24742d51bf17a685d28b90a8941
Fixes: bz#1603063
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 87 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 |
3 files changed, 48 insertions, 41 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index a952a4c179e..cbd1bb0aaea 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -937,54 +937,59 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) */ //Build comparison logic here. - ret = glusterd_compare_friend_data (ev_ctx->vols, &status, - event->peername); - if (ret) - goto out; - - if (GLUSTERD_VOL_COMP_RJT != status) { - event_type = GD_FRIEND_EVENT_LOCAL_ACC; - op_ret = 0; - } else { - event_type = GD_FRIEND_EVENT_LOCAL_RJT; - op_errno = GF_PROBE_VOLUME_CONFLICT; - op_ret = -1; - } - - /* Compare missed_snapshot list with the peer * - * if volume comparison is successful */ - if ((op_ret == 0) && - (conf->op_version >= GD_OP_VERSION_3_6_0)) { - ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols); + pthread_mutex_lock (&conf->import_volumes); + { + ret = glusterd_compare_friend_data (ev_ctx->vols, &status, + event->peername); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, - "Failed to import peer's " - "missed_snaps_list."); - event_type = GD_FRIEND_EVENT_LOCAL_RJT; - op_errno = GF_PROBE_MISSED_SNAP_CONFLICT; - op_ret = -1; + pthread_mutex_unlock (&conf->import_volumes); + goto out; } - /* glusterd_compare_friend_snapshots and functions only require - * a peers hostname and uuid. It also does updates, which - * require use of synchronize_rcu. So we pass the hostname and - * id from the event instead of the peerinfo object to prevent - * deadlocks as above. - */ - ret = glusterd_compare_friend_snapshots (ev_ctx->vols, - event->peername, - event->peerid); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_SNAP_COMPARE_CONFLICT, - "Conflict in comparing peer's snapshots"); + if (GLUSTERD_VOL_COMP_RJT != status) { + event_type = GD_FRIEND_EVENT_LOCAL_ACC; + op_ret = 0; + } else { event_type = GD_FRIEND_EVENT_LOCAL_RJT; - op_errno = GF_PROBE_SNAP_CONFLICT; + op_errno = GF_PROBE_VOLUME_CONFLICT; op_ret = -1; } - } + /* Compare missed_snapshot list with the peer * + * if volume comparison is successful */ + if ((op_ret == 0) && + (conf->op_version >= GD_OP_VERSION_3_6_0)) { + ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, + "Failed to import peer's " + "missed_snaps_list."); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_MISSED_SNAP_CONFLICT; + op_ret = -1; + } + + /* glusterd_compare_friend_snapshots and functions only require + * a peers hostname and uuid. It also does updates, which + * require use of synchronize_rcu. So we pass the hostname and + * id from the event instead of the peerinfo object to prevent + * deadlocks as above. + */ + ret = glusterd_compare_friend_snapshots (ev_ctx->vols, + event->peername, + event->peerid); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_SNAP_COMPARE_CONFLICT, + "Conflict in comparing peer's snapshots"); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_SNAP_CONFLICT; + op_ret = -1; + } + } + } + pthread_mutex_unlock (&conf->import_volumes); ret = glusterd_friend_sm_new_event (event_type, &new_event); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 0714714d33e..f72953cd319 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1854,6 +1854,7 @@ init (xlator_t *this) synclock_init (&conf->big_lock, SYNC_LOCK_RECURSIVE); pthread_mutex_init (&conf->xprt_lock, NULL); INIT_LIST_HEAD (&conf->xprt_list); + pthread_mutex_init (&conf->import_volumes, NULL); glusterd_friend_sm_init (); glusterd_op_sm_init (); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 66b7fbb2297..39232d2d5eb 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -162,6 +162,7 @@ typedef struct { struct cds_list_head brick_procs; /* List of brick processes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; + pthread_mutex_t import_volumes; gf_store_handle_t *handle; gf_timer_t *timer; glusterd_sm_tr_log_t op_sm_log; |