From 6945c9b3dc3afd290ee12cd5f90a6c538b2aaf14 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 12 Jun 2017 22:06:18 +0530 Subject: cluster/afr: Implement quorum for lk fop Problem: At the moment when we have replica 3 or arbiter setup, even when lk succeeds on just one brick we give success to application which is wrong Fix: Consider quorum-number of successes as success when quorum is enabled. >BUG: 1461792 >Change-Id: I5789e6eb5defb68f8a0eb9cd594d316f5cdebaea >Signed-off-by: Pranith Kumar K >Reviewed-on: https://review.gluster.org/17524 >Smoke: Gluster Build System >NetBSD-regression: NetBSD Build System >CentOS-regression: Gluster Build System >Reviewed-by: Ravishankar N BUG: 1462661 Change-Id: I5789e6eb5defb68f8a0eb9cd594d316f5cdebaea Signed-off-by: Pranith Kumar K Reviewed-on: https://review.gluster.org/17578 Smoke: Gluster Build System Reviewed-by: Ravishankar N NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Shyamsundar Ranganathan --- tests/basic/afr/lk-quorum.t | 255 +++++++++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 56 +++++--- xlators/cluster/afr/src/afr.h | 5 - 3 files changed, 293 insertions(+), 23 deletions(-) create mode 100644 tests/basic/afr/lk-quorum.t diff --git a/tests/basic/afr/lk-quorum.t b/tests/basic/afr/lk-quorum.t new file mode 100644 index 00000000000..ad143659bbe --- /dev/null +++ b/tests/basic/afr/lk-quorum.t @@ -0,0 +1,255 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc +cleanup; + +TEST glusterd; +TEST pidof glusterd + +#Tests for quorum-type option for replica 2 +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}; +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; + +TEST touch $M0/a + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'fixed' and quorum-count 2 +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 2 +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should fail +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'fixed' and quorum-count 1 +TEST $CLI volume set $V0 cluster.quorum-count 1 +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should succeed +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'auto' +TEST $CLI volume set $V0 cluster.quorum-type auto +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When first brick is down lock/unlock should fail +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#When second brick is down lock/unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +cleanup; +TEST glusterd; +TEST pidof glusterd + +#Tests for replica 3 +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; + +TEST touch $M0/a + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should succeed +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#kill 3rd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#When any two of the bricks are down lock/unlock should fail +#kill first,second bricks +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#kill 2nd,3rd bricks +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#kill 1st,3rd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6c05374d2b3..064320441b7 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3835,7 +3835,7 @@ unwind: static int afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; int child_index = (long)cookie; @@ -4215,15 +4215,27 @@ afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - afr_local_t * local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; int call_count = -1; + int child_index = (long)cookie; local = frame->local; - call_count = afr_frame_return (frame); + if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + AFR_MSG_UNLOCK_FAIL, + "gfid=%s: unlock failed on subvolume %s " + "with lock owner %s", + uuid_utoa (local->fd->inode->gfid), + priv->children[child_index]->name, + lkowner_utoa (&frame->root->lk_owner)); + } + + call_count = afr_frame_return (frame); if (call_count == 0) AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - lock, xdata); + NULL, local->xdata_rsp); return 0; } @@ -4245,7 +4257,7 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this) if (call_count == 0) { AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - &local->cont.lk.ret_flock, NULL); + NULL, local->xdata_rsp); return 0; } @@ -4255,8 +4267,8 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this) for (i = 0; i < priv->child_count; i++) { if (local->cont.lk.locked_nodes[i]) { - STACK_WIND (frame, afr_lk_unlock_cbk, - priv->children[i], + STACK_WIND_COOKIE (frame, afr_lk_unlock_cbk, + (void *) (long) i, priv->children[i], priv->children[i]->fops->lk, local->fd, F_SETLK, &local->cont.lk.user_flock, NULL); @@ -4272,12 +4284,12 @@ afr_lk_unlock (call_frame_t *frame, xlator_t *this) int32_t afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { afr_local_t *local = NULL; afr_private_t *priv = NULL; int child_index = -1; -/* int ret = 0; */ local = frame->local; @@ -4285,9 +4297,10 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, child_index = (long) cookie; - if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) { + afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); + if (op_ret < 0 && op_errno == EAGAIN) { local->op_ret = -1; - local->op_errno = op_errno; + local->op_errno = EAGAIN; afr_lk_unlock (frame, this); return 0; @@ -4307,15 +4320,20 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->children[child_index], priv->children[child_index]->fops->lk, local->fd, local->cont.lk.cmd, - &local->cont.lk.user_flock, xdata); - } else if (local->op_ret == -1) { - /* all nodes have gone down */ + &local->cont.lk.user_flock, + local->xdata_req); + } else if (priv->quorum_count && + !afr_has_quorum (local->cont.lk.locked_nodes, this)) { + local->op_ret = -1; + local->op_errno = afr_final_errno (local, priv); - AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN, - &local->cont.lk.ret_flock, NULL); + afr_lk_unlock (frame, this); } else { + if (local->op_ret < 0) + local->op_errno = afr_final_errno (local, priv); + AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno, - &local->cont.lk.ret_flock, NULL); + &local->cont.lk.ret_flock, local->xdata_rsp); } return 0; @@ -4354,11 +4372,13 @@ afr_lk (call_frame_t *frame, xlator_t *this, local->cont.lk.cmd = cmd; local->cont.lk.user_flock = *flock; local->cont.lk.ret_flock = *flock; + if (xdata) + local->xdata_req = dict_ref (xdata); STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0, priv->children[i], priv->children[i]->fops->lk, - fd, cmd, flock, xdata); + fd, cmd, flock, local->xdata_req); return 0; out: diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 3a47ba47241..cf736ed3d2e 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -875,11 +875,6 @@ typedef struct afr_granular_esh_args { mismatch */ } afr_granular_esh_args_t; -/* did a call fail due to a child failing? */ -#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \ - ((op_errno == ENOTCONN) || \ - (op_errno == EBADFD))) - int afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, unsigned char *readable, int *event_p, int type); -- cgit