diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2017-05-10 10:03:08 +0530 | 
|---|---|---|
| committer | jiffin tony Thottan <jthottan@redhat.com> | 2017-05-17 08:51:57 +0000 | 
| commit | f3022d7a0bc55be0aba3c6bc0fafcdcd96b936e5 (patch) | |
| tree | 556f45f13d4dc5b7c2899a01c8e812ef5619eda9 | |
| parent | f453425865d7151f27cd43c4c1e96919ec25a4f8 (diff) | |
afr: propagate correct errno for fop failures in arbiter
Problem:
If quorum is not met in fop cbk, arbiter sends an ENOTCONN error to the
upper xlators. In a VM workload with sharding enabled, this was leading
to the VM pausing when replace-brick was performed as described in the BZ.
Fix:
Move the fop cbk arbitration logic to afr_handle_quorum() because in
normal replica volumes, that is the function that has the quorum and
errno checks in the fop cbk path before doing a post-op.
Thanks to Pranith for suggesting this approach.
> Reviewed-on: https://review.gluster.org/17235
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
(cherry picked from commit 93c850dd2a513fab75408df9634ad3c970a0e859)
Change-Id: Ie6315db30c5e36326b71b90a01da824109e86796
BUG: 1450937
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://review.gluster.org/17296
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 23 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | 
4 files changed, 12 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 8e483c382c4..9099b8c1eee 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)  		}  	} -        afr_txn_arbitrate_fop_cbk (frame, this);  } diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ddc257dbde4..8c312a89e53 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)  		}  	} -        afr_txn_arbitrate_fop_cbk (frame, this);          afr_set_in_flight_sb_status (this, local, local->inode);  } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 83e25f3a122..9b5063d8aa8 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -301,22 +301,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)                  }  } -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this) +gf_boolean_t +afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame)  {          afr_local_t *local = NULL;          afr_private_t *priv = NULL; +        xlator_t *this = NULL;          gf_boolean_t fop_failed = _gf_false;          unsigned char *pre_op_sources = NULL;          int i = 0;          local = frame->local; +        this = frame->this;          priv  = this->private;          pre_op_sources = local->transaction.pre_op_sources; -        if (priv->arbiter_count != 1 || local->op_ret < 0) -                return; -          /* If the fop failed on the brick, it is not a source. */          for (i = 0; i < priv->child_count; i++)                  if (local->transaction.failed_subvols[i]) @@ -332,12 +331,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)                  break;          } -        if (fop_failed) { -                local->op_ret = -1; -                local->op_errno = ENOTCONN; -        } +        if (fop_failed) +                return _gf_false; -        return; +        return _gf_true;  }  void @@ -785,8 +782,12 @@ afr_handle_quorum (call_frame_t *frame)           * no split-brain with the fix. The problem is eliminated completely.           */ -        if (afr_has_fop_cbk_quorum (frame)) +        if (priv->arbiter_count) { +                if (afr_has_arbiter_fop_cbk_quorum (frame)) +                        return; +        } else if (afr_has_fop_cbk_quorum (frame)) {                  return; +        }          for (i = 0; i < priv->child_count; i++) {                  if (local->transaction.pre_op[i]) diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index ca8fcfefa89..dcdadbc84f4 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -16,8 +16,6 @@  void  afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,  			    int child_index); -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this);  int  afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);  | 
