diff options
author | karthik-us <ksubrahm@redhat.com> | 2017-08-16 17:26:48 +0530 |
---|---|---|
committer | Ravishankar N <ravishankar@redhat.com> | 2017-11-18 00:38:20 +0000 |
commit | 19f9bcff4aada589d4321356c2670ed283f02c03 (patch) | |
tree | 60917d166c9fc4ae9813601ebb062411f51f4e4b /xlators/cluster/afr/src/afr-lk-common.c | |
parent | d71b53d2487ef93fc8a362ecc9fb735222e713e4 (diff) |
cluster/afr: Fix for arbiter becoming source
Problem:
When eager-lock is on, and two writes happen in parallel on a FD
we were observing the following behaviour:
- First write fails on one data brick
- Since the post-op is not yet happened, the inode refresh will get
both the data bricks as readable and set it in the inode context
- In flight split brain check see both the data bricks as readable
and allows the second write
- Second write fails on the other data brick
- Now the post-op happens and marks both the data bricks as bad and
arbiter will become source for healing
Fix:
Adding one more variable called write_suvol in inode context and it
will have the in memory representation of the writable subvols. Inode
refresh will not update this value and its lifetime is pre-op through
unlock in the afr transaction. Initially the pre-op will set this
value same as read_subvol in inode context and then in the in flight
split brain check we will use this value instead of read_subvol.
After all the checks we will update the value of this and set the
read_subvol same as this to avoid having incorrect value in that.
Change-Id: I2ef6904524ab91af861d59690974bbc529ab1af3
BUG: 1482064
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-lk-common.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 1f2a11755bf..c17f60f62c4 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -613,12 +613,16 @@ static int32_t afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - afr_local_t *local = NULL; - afr_internal_lock_t *int_lock = NULL; - int call_count = 0; + afr_local_t *local = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + afr_private_t *priv = NULL; + int call_count = 0; + int ret = 0; local = frame->local; int_lock = &local->internal_lock; + priv = this->private; LOCK (&frame->lock); { @@ -629,11 +633,15 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (call_count == 0) { gf_msg_trace (this->name, 0, "All internal locks unlocked"); - + if (local->fd) { + fd_ctx = afr_fd_ctx_get (local->fd, this); + if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count)) + ret = afr_write_subvol_reset (frame, this); + } int_lock->lock_cbk (frame, this); } - return 0; + return ret; } void |