diff options
-rwxr-xr-x | tests/bugs/replicate/bug-1696599-io-hang.t | 47 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 1 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 19 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 |
5 files changed, 61 insertions, 18 deletions
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t new file mode 100755 index 00000000000..869cdb94bda --- /dev/null +++ b/tests/bugs/replicate/bug-1696599-io-hang.t @@ -0,0 +1,47 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc + +#Tests that local structures in afr are removed from granted/blocked list of +#locks when inodelk fails on all bricks + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.client-io-threads off +TEST $CLI volume set $V0 delay-gen locks +TEST $CLI volume set $V0 delay-gen.delay-duration 5000000 +TEST $CLI volume set $V0 delay-gen.delay-percentage 100 +TEST $CLI volume set $V0 delay-gen.enable finodelk + +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' + +TEST $GFS -s $H0 --volfile-id $V0 $M0 +TEST touch $M0/file +#Trigger write and stop bricks so inodelks fail on all bricks leading to +#lock failure condition +echo abc >> $M0/file & + +TEST $CLI volume stop $V0 +TEST $CLI volume reset $V0 delay-gen +wait +TEST $CLI volume start $V0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2 +#Test that only one write succeeded, this tests that delay-gen worked as +#expected +echo abc >> $M0/file +EXPECT "abc" cat $M0/file + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 56f4833b36c..8e8d7eb7473 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5773,6 +5773,10 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) afr_private_t *priv = NULL; priv = this->private; + INIT_LIST_HEAD(&local->transaction.wait_list); + INIT_LIST_HEAD(&local->transaction.owner_list); + INIT_LIST_HEAD(&local->ta_waitq); + INIT_LIST_HEAD(&local->ta_onwireq); ret = afr_internal_lock_init(&local->internal_lock, priv->child_count); if (ret < 0) goto out; @@ -5810,10 +5814,6 @@ afr_transaction_local_init(afr_local_t *local, xlator_t *this) goto out; ret = 0; - INIT_LIST_HEAD(&local->transaction.wait_list); - INIT_LIST_HEAD(&local->transaction.owner_list); - INIT_LIST_HEAD(&local->ta_waitq); - INIT_LIST_HEAD(&local->ta_onwireq); out: return ret; } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 4091671fa3f..bc8eabe0f43 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -397,7 +397,6 @@ afr_unlock_now(call_frame_t *frame, xlator_t *this) int_lock->lk_call_count = call_count; if (!call_count) { - GF_ASSERT(!local->transaction.do_eager_unlock); gf_msg_trace(this->name, 0, "No internal locks unlocked"); int_lock->lock_cbk(frame, this); goto out; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 229820ba26a..15f3a7efa91 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -372,6 +372,8 @@ afr_transaction_done(call_frame_t *frame, xlator_t *this) } local->transaction.unwind(frame, this); + GF_ASSERT(list_empty(&local->transaction.owner_list)); + GF_ASSERT(list_empty(&local->transaction.wait_list)); AFR_STACK_DESTROY(frame); return 0; @@ -393,7 +395,7 @@ afr_lock_fail_shared(afr_local_t *local, struct list_head *list) } static void -afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) +afr_handle_lock_acquire_failure(afr_local_t *local) { struct list_head shared; afr_lock_t *lock = NULL; @@ -414,13 +416,8 @@ afr_handle_lock_acquire_failure(afr_local_t *local, gf_boolean_t locked) afr_lock_fail_shared(local, &shared); local->transaction.do_eager_unlock = _gf_true; out: - if (locked) { - local->internal_lock.lock_cbk = afr_transaction_done; - afr_unlock(local->transaction.frame, local->transaction.frame->this); - } else { - afr_transaction_done(local->transaction.frame, - local->transaction.frame->this); - } + local->internal_lock.lock_cbk = afr_transaction_done; + afr_unlock(local->transaction.frame, local->transaction.frame->this); } call_frame_t * @@ -619,7 +616,7 @@ afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this) failure_count = AFR_COUNT(local->transaction.failed_subvols, priv->child_count); if (failure_count == priv->child_count) { - afr_handle_lock_acquire_failure(local, _gf_true); + afr_handle_lock_acquire_failure(local); return 0; } else { lock = &local->inode_ctx->lock[local->transaction.type]; @@ -2092,7 +2089,7 @@ err: local->op_ret = -1; local->op_errno = op_errno; - afr_handle_lock_acquire_failure(local, _gf_true); + afr_handle_lock_acquire_failure(local); if (xdata_req) dict_unref(xdata_req); @@ -2361,7 +2358,7 @@ afr_internal_lock_finish(call_frame_t *frame, xlator_t *this) } else { lock = &local->inode_ctx->lock[local->transaction.type]; if (local->internal_lock.lock_op_ret < 0) { - afr_handle_lock_acquire_failure(local, _gf_false); + afr_handle_lock_acquire_failure(local); } else { lock->event_generation = local->event_generation; afr_changelog_pre_op(frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index b76788b693d..6f91c9172f1 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1092,8 +1092,8 @@ afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd); #define AFR_FRAME_INIT(frame, op_errno) \ ({ \ frame->local = mem_get0(THIS->local_pool); \ - if (afr_local_init(frame->local, THIS->private, &op_errno)) { \ - afr_local_cleanup(frame->local, THIS); \ + if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \ + afr_local_cleanup(frame->local, frame->this); \ mem_put(frame->local); \ frame->local = NULL; \ }; \ |