diff options
-rwxr-xr-x | tests/bugs/bug-892730.t | 76 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 |
4 files changed, 88 insertions, 7 deletions
diff --git a/tests/bugs/bug-892730.t b/tests/bugs/bug-892730.t new file mode 100755 index 000000000..0a677069e --- /dev/null +++ b/tests/bugs/bug-892730.t @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Bug 892730 - Verify that afr handles EIO errors from the brick properly. +# +# The associated bug describes a problem where EIO errors returned from the +# local filesystem of a brick that is part of a replica volume are exposed to +# the user. This test simulates such failures and verifies that the volume +# operates as expected. +# +######## + +. $(dirname $0)/../include.rc + +cleanup; + +TEST mkdir -p $B0/test{1,2} + +# The graph is a two brick replica with error-gen enabled on the second brick +# and configured to return EIO lookup errors 100% of the time. This simulates +# a brick with a crashed or shut down local filesystem. Note that the order in +# which errors occur is a factor in reproducing the original bug (error-gen +# must be enabled in the second brick for this test to be effective). + +cat > $B0/test.vol <<EOF +volume test-posix-0 + type storage/posix + option directory $B0/test1 +end-volume + +volume test-locks-0 + type features/locks + subvolumes test-posix-0 +end-volume + +volume test-posix-1 + type storage/posix + option directory $B0/test2 +end-volume + +volume test-error-1 + type debug/error-gen + option failure 100 + option enable lookup + option error-no EIO + subvolumes test-posix-1 +end-volume + +volume test-locks-1 + type features/locks + subvolumes test-error-1 +end-volume + +volume test-replicate-0 + type cluster/replicate + option background-self-heal-count 0 + subvolumes test-locks-0 test-locks-1 +end-volume +EOF + +TEST glusterd + +TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0 + +# We should be able to create and remove a file without interference from the +# "broken" brick. + +TEST touch $M0/file +TEST rm $M0/file + +TEST umount $M0 + +rm -f $B0/test.vol +rm -rf $B0/test1 $B0/test2 + +cleanup; + diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index ab1d9018c..97303d106 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1639,7 +1639,7 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { local->op_ret = -1; local->op_errno = afr_most_important_error(local->op_errno, - op_errno); + op_errno, _gf_true); goto out; } else { @@ -1993,11 +1993,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) * The hierarchy is ESTALE > EIO > ENOENT > others */ int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno) +afr_most_important_error(int32_t old_errno, int32_t new_errno, + gf_boolean_t eio) { if (old_errno == ESTALE || new_errno == ESTALE) return ESTALE; - if (old_errno == EIO || new_errno == EIO) + if (eio && (old_errno == EIO || new_errno == EIO)) return EIO; if (old_errno == ENOENT || new_errno == ENOENT) return ENOENT; @@ -2022,7 +2023,8 @@ afr_resultant_errno_get (int32_t *children, child = i; } op_errno = afr_most_important_error(op_errno, - child_errno[child]); + child_errno[child], + _gf_false); } return op_errno; } @@ -2034,7 +2036,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno) if (op_errno == ENOENT) local->enoent_count++; - local->op_errno = afr_most_important_error(local->op_errno, op_errno); + local->op_errno = afr_most_important_error(local->op_errno, op_errno, + _gf_false); if (local->op_errno == ESTALE) { local->op_ret = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index e7026081b..f59a02557 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -112,7 +112,8 @@ void afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno) { sh->op_ret = -1; - sh->op_errno = afr_most_important_error(sh->op_errno, op_errno); + sh->op_errno = afr_most_important_error(sh->op_errno, op_errno, + _gf_false); } void diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 93bd92f25..85b4b6831 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -954,7 +954,8 @@ afr_children_rm_child (int32_t *children, int32_t child, void afr_reset_children (int32_t *children, int32_t child_count); int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno); +afr_most_important_error(int32_t old_errno, int32_t new_errno, + gf_boolean_t eio); int afr_errno_count (int32_t *children, int *child_errno, unsigned int child_count, int32_t op_errno); |