diff options
| author | karthik-us <ksubrahm@redhat.com> | 2017-08-16 17:26:48 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-11-27 18:13:32 +0000 | 
| commit | 2475bfb4d25b20c20bc806771857a2de443835a2 (patch) | |
| tree | c8d3fe9d34e845372aa136c2c72b5a1c1e09d80e | |
| parent | ae88dc134a89fa4af4ff2356abbf32f6f220dd53 (diff) | |
cluster/afr: Fix for arbiter becoming source
Problem:
When eager-lock is on, and two writes happen in parallel on a FD
we were observing the following behaviour:
- First write fails on one data brick
- Since the post-op is not yet happened, the inode refresh will get
  both the data bricks as readable and set it in the inode context
- In flight split brain check see both the data bricks as readable
  and allows the second write
- Second write fails on the other data brick
- Now the post-op happens and marks both the data bricks as bad and
  arbiter will become source for healing
Fix:
Adding one more variable called write_suvol in inode context and it
will have the in memory representation of the writable subvols. Inode
refresh will not update this value and its lifetime is pre-op through
unlock in the afr transaction. Initially the pre-op will set this
value same as read_subvol in inode context and then in the in flight
split brain check we will use this value instead of read_subvol.
After all the checks we will update the value of this and set the
read_subvol same as this to avoid having incorrect value in that.
Change-Id: I2ef6904524ab91af861d59690974bbc529ab1af3
BUG: 1516313
Signed-off-by: karthik-us <ksubrahm@redhat.com>
(cherry picked from commit 19f9bcff4aada589d4321356c2670ed283f02c03)
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 76 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 18 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 10 | 
4 files changed, 102 insertions, 6 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e0468a4006c..2702ed31de0 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -147,6 +147,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)                  }                  tmp_ctx->spb_choice = -1;                  tmp_ctx->read_subvol = 0; +                tmp_ctx->write_subvol = 0;          } else {                  tmp_ctx = (afr_inode_ctx_t *) ctx_int;          } @@ -214,7 +215,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,          if (ret < 0)                  return ret; -        val = ctx->read_subvol; +        val = ctx->write_subvol;          metadatamap_old = metadatamap = (val & 0x000000000000ffff);          datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; @@ -275,6 +276,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                  (((uint64_t) datamap) << 16) |                  (((uint64_t) event) << 32); +        ctx->write_subvol = val;          ctx->read_subvol = val;          return ret; @@ -6562,3 +6564,75 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,  out:          return ret;  } + +int +afr_write_subvol_set (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t      *local = NULL; +        afr_inode_ctx_t  *ctx   = NULL; +        uint64_t          val   = 0; +        uint64_t          val1  = 0; +        int               ret   = -1; + +        local = frame->local; +        LOCK(&local->inode->lock); +        { +                ret = __afr_inode_ctx_get (this, local->inode, &ctx); +                if (ret < 0) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_DICT_GET_FAILED, +                                "ERROR GETTING INODE CTX"); +                        UNLOCK(&local->inode->lock); +                        return ret; +                } + +                val = ctx->write_subvol; +                /* +                 * We need to set the value of write_subvol to read_subvol in 2 +                 * cases: +                 * 1. Initially when the value is 0. i.e., it's the first lock +                 * request. +                 * 2. If it's a metadata transaction. If metadata transactions +                 * comes in between data transactions and we have a brick +                 * disconnect, the next metadata transaction won't get the +                 * latest value of readables, since we do resetting of +                 * write_subvol in unlock code path only if it's a data +                 * transaction. To handle those scenarios we need to set the +                 * value of write_subvol to read_subvol in case of metadata +                 * transactions. +                */ +                if (val == 0 || +                    local->transaction.type == AFR_METADATA_TRANSACTION) { +                        val1 = ctx->read_subvol; +                        ctx->write_subvol = val1; +                } +        } +        UNLOCK (&local->inode->lock); + +        return 0; +} + +int +afr_write_subvol_reset (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t      *local = NULL; +        afr_inode_ctx_t  *ctx   = NULL; +        int               ret   = -1; + +        local = frame->local; +        LOCK(&local->inode->lock); +        { +                ret = __afr_inode_ctx_get (this, local->inode, &ctx); +                if (ret < 0) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                AFR_MSG_DICT_GET_FAILED, +                                "ERROR GETTING INODE CTX"); +                        UNLOCK(&local->inode->lock); +                        return ret; +                } +                ctx->write_subvol = 0; +        } +        UNLOCK(&local->inode->lock); + +        return 0; +} diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 1f2a11755bf..c17f60f62c4 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -613,12 +613,16 @@ static int32_t  afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                         int32_t op_ret, int32_t op_errno, dict_t *xdata)  { -        afr_local_t         *local    = NULL; -        afr_internal_lock_t *int_lock = NULL; -        int call_count = 0; +        afr_local_t             *local          = NULL; +        afr_internal_lock_t     *int_lock       = NULL; +        afr_fd_ctx_t            *fd_ctx         = NULL; +        afr_private_t           *priv           = NULL; +        int                      call_count     = 0; +        int                      ret            = 0;          local    = frame->local;          int_lock = &local->internal_lock; +        priv = this->private;          LOCK (&frame->lock);          { @@ -629,11 +633,15 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (call_count == 0) {                  gf_msg_trace (this->name, 0,                                "All internal locks unlocked"); - +                if (local->fd) { +                        fd_ctx = afr_fd_ctx_get (local->fd, this); +                        if (0 == AFR_COUNT (fd_ctx->lock_acquired, priv->child_count)) +                                ret = afr_write_subvol_reset (frame, this); +                }                  int_lock->lock_cbk (frame, this);          } -        return 0; +        return ret;  }  void diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 791c70f6fcc..a04636f25a0 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1750,6 +1750,10 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  	if (pre_nop)  		goto next; +        ret = afr_write_subvol_set (frame, this); +        if (ret) +                goto err; +  	if (!local->pre_op_compat) {  		dict_copy (xdata_req, local->xdata_req);  		goto next; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 25625665c42..b59b9439273 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -837,6 +837,7 @@ typedef struct _afr_local {  typedef struct _afr_inode_ctx {          uint64_t        read_subvol; +        uint64_t        write_subvol;          int             spb_choice;          gf_timer_t      *timer;          gf_boolean_t    need_refresh; @@ -1264,4 +1265,13 @@ afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,                                       int32_t *serz_len, char delimiter);  gf_boolean_t  afr_is_symmetric_error (call_frame_t *frame, xlator_t *this); + +int +__afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx); + +int +afr_write_subvol_set (call_frame_t *frame, xlator_t *this); + +int +afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);  #endif /* __AFR_H__ */  | 
