diff options
| -rwxr-xr-x | tests/bugs/bug-892730.t | 76 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 | 
4 files changed, 88 insertions, 7 deletions
diff --git a/tests/bugs/bug-892730.t b/tests/bugs/bug-892730.t new file mode 100755 index 00000000000..0a677069eb3 --- /dev/null +++ b/tests/bugs/bug-892730.t @@ -0,0 +1,76 @@ +#!/bin/bash +# +# Bug 892730 - Verify that afr handles EIO errors from the brick properly. +# +# The associated bug describes a problem where EIO errors returned from the +# local filesystem of a brick that is part of a replica volume are exposed to +# the user. This test simulates such failures and verifies that the volume +# operates as expected. +# +######## + +. $(dirname $0)/../include.rc + +cleanup; + +TEST mkdir -p $B0/test{1,2} + +# The graph is a two brick replica with error-gen enabled on the second brick +# and configured to return EIO lookup errors 100% of the time. This simulates +# a brick with a crashed or shut down local filesystem. Note that the order in +# which errors occur is a factor in reproducing the original bug (error-gen +# must be enabled in the second brick for this test to be effective). + +cat > $B0/test.vol <<EOF +volume test-posix-0 +    type storage/posix +    option directory $B0/test1 +end-volume + +volume test-locks-0 +    type features/locks +    subvolumes test-posix-0 +end-volume + +volume test-posix-1 +    type storage/posix +    option directory $B0/test2 +end-volume + +volume test-error-1 +    type debug/error-gen +    option failure 100 +    option enable lookup +    option error-no EIO +    subvolumes test-posix-1 +end-volume + +volume test-locks-1 +    type features/locks +    subvolumes test-error-1 +end-volume + +volume test-replicate-0 +    type cluster/replicate +    option background-self-heal-count 0 +    subvolumes test-locks-0 test-locks-1 +end-volume +EOF + +TEST glusterd + +TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0 + +# We should be able to create and remove a file without interference from the +# "broken" brick. + +TEST touch $M0/file +TEST rm $M0/file + +TEST umount $M0 + +rm -f $B0/test.vol +rm -rf $B0/test1 $B0/test2 + +cleanup; + diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index ab1d9018c47..97303d1065b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1639,7 +1639,7 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,          if (op_ret == -1) {                  local->op_ret = -1;  		local->op_errno = afr_most_important_error(local->op_errno, -							   op_errno); +							   op_errno, _gf_true);                  goto out;          } else { @@ -1993,11 +1993,12 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)   * The hierarchy is ESTALE > EIO > ENOENT > others   */  int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno) +afr_most_important_error(int32_t old_errno, int32_t new_errno, +			 gf_boolean_t eio)  {  	if (old_errno == ESTALE || new_errno == ESTALE)  		return ESTALE; -	if (old_errno == EIO || new_errno == EIO) +	if (eio && (old_errno == EIO || new_errno == EIO))  		return EIO;  	if (old_errno == ENOENT || new_errno == ENOENT)  		return ENOENT; @@ -2022,7 +2023,8 @@ afr_resultant_errno_get (int32_t *children,                          child = i;                  }  		op_errno = afr_most_important_error(op_errno, -						    child_errno[child]); +						    child_errno[child], +						    _gf_false);          }          return op_errno;  } @@ -2034,7 +2036,8 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret,  int32_t op_errno)          if (op_errno == ENOENT)                  local->enoent_count++; -	local->op_errno = afr_most_important_error(local->op_errno, op_errno); +	local->op_errno = afr_most_important_error(local->op_errno, op_errno, +						   _gf_false);          if (local->op_errno == ESTALE) {                  local->op_ret = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index e7026081bda..f59a02557c1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -112,7 +112,8 @@ void  afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)  {          sh->op_ret = -1; -	sh->op_errno = afr_most_important_error(sh->op_errno, op_errno); +	sh->op_errno = afr_most_important_error(sh->op_errno, op_errno, +						_gf_false);  }  void diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 93bd92f25e7..85b4b6831b8 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -954,7 +954,8 @@ afr_children_rm_child (int32_t *children, int32_t child,  void  afr_reset_children (int32_t *children, int32_t child_count);  int32_t -afr_most_important_error(int32_t old_errno, int32_t new_errno); +afr_most_important_error(int32_t old_errno, int32_t new_errno, +			 gf_boolean_t eio);  int  afr_errno_count (int32_t *children, int *child_errno,                   unsigned int child_count, int32_t op_errno);  | 
