diff options
author | Brian Foster <bfoster@redhat.com> | 2013-01-10 10:49:17 -0500 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-01-18 09:32:21 -0800 |
commit | 679cb2399fc1f8e97f2b29654ec422f267b03783 (patch) | |
tree | adb9bb89d6c512ad57d8f0c07f2ad18426512d52 | |
parent | 9036bd1a7bab68351c38d65cd6a1c8af150467bb (diff) |
afr: conditionally prioritize EIO errors over ENOENT
The most important errno logic historically only prioritized ESTALE
over ENOENT. Commit c8c0942d added EIO prioritization over ENOENT
to ensure that split-brain was reported when it occurs in
conjunction with bricks missing the file entry. The unintended side
effect of this change is that (non split-brain) EIO errors reported
from the bricks themselves are now reported to the client when the
expectation is that afr should squash said errors in favor of
marking the file inconsistent.
The high-level problem is that EIO is overloaded with different
meanings from different contexts. This commit adds an eio parameter
to the errno priority logic to conditionally flag when EIO is of
higher priority and should be propagated to the client.
BUG: 892730
Change-Id: Ib692a8a1f1737ef190d57894f392ec53ffb33aab
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-on: http://review.gluster.org/4376
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rwxr-xr-x | tests/bugs/bug-892730.t | 76 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 |
4 files changed, 88 insertions, 7 deletions
diff --git a/tests/bugs/bug-892730.t b/tests/bugs/bug-892730.t new file mode 100755 index 000000000..0a677069e --- /dev/null +++ b/tests/bugs/bug-892730.t @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Bug 892730 - Verify that afr handles EIO errors from the brick properly. +# +# The associated bug describes a problem where EIO errors returned from the +# local filesystem of a brick that is part of a replica volume are exposed to +# the user. This test simulates such failures and verifies that the volume +# operates as expected. +# +######## + +. $(dirname $0)/../include.rc + +cleanup; + +TEST mkdir -p $B0/test{1,2} + +# The graph is a two brick replica with error-gen enabled on the second brick +# and configured to return EIO lookup errors 100% of the time. This simulates +# a brick with a crashed or shut down local filesystem. Note that the order in +# which errors occur is a factor in reproducing the original bug (error-gen +# must be enabled in the second brick for this test to be effective). + +cat > $B0/test.vol <<EOF +volume test-posix-0 + type storage/posix + option directory $B0/test1 +end-volume + +volume test-locks-0 + type features/locks + subvolumes test-posix-0 +end-volume + +volume test-posix-1 + type storage/posix + option directory $B0/test2 +end-volume + +volume test-error-1 + type debug/error-gen + option failure 100 + option enable lookup + option error-no EIO + subvolumes test-posix-1 +end-volume + +volume test-locks-1 + type features/locks + subvolumes test-error-1 +end-volume + +volume test-replicate-0 + type cluster/replicate + option background-self-heal-count 0 + subvolumes test-locks-0 test-locks-1 +end-volume +EOF + +TEST glusterd + +TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0 + +# We should be able to create and remove a file without interference from the +# "broken" brick. + +TEST touch $M0/file +TEST rm $M0/file + +TEST umount $M0 + +rm -f $B0/test.vol +rm -rf $B0/test1 $B0/test2 + +cleanup; + diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index ab1d9018c..97303d106 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1639,7 +1639,7 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { local->op_ret = -1; local->op_errno = afr_most_important_error(local->op_errno, - op_errno); + op_errno, _gf_true); goto out; } else { @@ -1993,11 +1993,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) * The hierarchy is ESTALE > EIO > ENOENT > others */ int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno) +afr_most_important_error(int32_t old_errno, int32_t new_errno, + gf_boolean_t eio) { if (old_errno == ESTALE || new_errno == ESTALE) return ESTALE; - if (old_errno == EIO || new_errno == EIO) + if (eio && (old_errno == EIO || new_errno == EIO)) return EIO; if (old_errno == ENOENT || new_errno == ENOENT) return ENOENT; @@ -2022,7 +2023,8 @@ afr_resultant_errno_get (int32_t *children, child = i; } op_errno = afr_most_important_error(op_errno, - child_errno[child]); + child_errno[child], + _gf_false); } return op_errno; } @@ -2034,7 +2036,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) if (op_errno == ENOENT) local->enoent_count++; - local->op_errno = afr_most_important_error(local->op_errno, op_errno); + local->op_errno = afr_most_important_error(local->op_errno, op_errno, + _gf_false); if (local->op_errno == ESTALE) { local->op_ret = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index e7026081b..f59a02557 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -112,7 +112,8 @@ void afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) { sh->op_ret = -1; - sh->op_errno = afr_most_important_error(sh->op_errno, op_errno); + sh->op_errno = afr_most_important_error(sh->op_errno, op_errno, + _gf_false); } void diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 93bd92f25..85b4b6831 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -954,7 +954,8 @@ afr_children_rm_child (int32_t *children, int32_t child, void afr_reset_children (int32_t *children, int32_t child_count); int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno); +afr_most_important_error(int32_t old_errno, int32_t new_errno, + gf_boolean_t eio); int afr_errno_count (int32_t *children, int *child_errno, unsigned int child_count, int32_t op_errno); |