summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-06-23 21:35:29 +0530
committerNiels de Vos <ndevos@redhat.com>2014-06-24 09:26:13 -0700
commit5d2603c75bfd78c8b33903a20e844430276e7539 (patch)
tree3ca00f09ca0835de64167674351c386190e0a25d
parentb8f6798c173301f9fe0b12b53c67366fb8eaed59 (diff)
cluster/afr: Fix resolution issues with afr
Problem with afr: Lets say there is a directory hierarchy a/b/c/d on the mount and the user is cd'ed into the directory. Bring down one of the bricks of replica and remove all directories/files to simulate disk replacement on that brick. Now this brick is brought back up. Creates on the cd'ed directory fail with ESTALE. Basically before sending a create of 'f' inside 'd', fuse sends a lookup to make sure the file is not present. On one of the bricks 'd' is present and 'f' is not so it sends ENOENT as response. On the new brick 'd' itself is not present. So it sends ESTALE. In afr ESTALE is considered to be special errno on witnessing which lookup has to fail. And ESTALE is given more priority than ENOENT. Due to these reasons lookup fails with ESTALE rather than ENOENT. Since lookup didn't fail with ENOENT, 'create' can't be issued so the command is failed with ESTALE. Solution: Afr needs to consider ESTALE errno normally and ENOENT needs to be given more priority so that operations like create can proceed even when only one of the brick is up and running. Whenever client xlator identifies that gfid-changed, it sets that information in lookup xdata. Afr uses this information to fail the lookup with ESTALE so that top xlator can send fresh lookup. Change-Id: Ie8e0e327542fd644409eb5dadf451679afa1c0e5 BUG: 1112348 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/8154 Tested-by: Justin Clift <justin@gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Ravishankar N <ravishankar@redhat.com> Reviewed-by: Niels de Vos <ndevos@redhat.com>
-rw-r--r--tests/basic/afr/resolve.t49
-rw-r--r--tests/include.rc2
-rw-r--r--xlators/cluster/afr/src/afr-common.c28
-rw-r--r--xlators/cluster/afr/src/afr.h1
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c2
5 files changed, 71 insertions, 11 deletions
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t
new file mode 100644
index 00000000000..7dd432996f6
--- /dev/null
+++ b/tests/basic/afr/resolve.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check that operations succeed after changing the disk of the brick while
+#a brick is down
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+TEST cd $M0
+TEST mkdir -p a/b/c/d/e
+TEST cd a/b/c/d/e
+echo abc > g
+
+#Simulate disk replacement
+TEST kill_brick $V0 $H0 $B0/${V0}0
+rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#Test that the lookup returns ENOENT instead of ESTALE
+#If lookup returns ESTALE this command will fail with ESTALE
+TEST touch f
+
+#Test that ESTALE is ignored when there is a good copy
+EXPECT abc cat g
+
+#Simulate file changing only one mount
+#create the file on first mount
+echo ghi > $M0/b
+
+#re-create the file on other mount while one of the bricks is down.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M1/b
+echo jkl > $M1/b
+#Clear the extended attributes on the directory to create a scenario where
+#gfid-mismatch happened. This should result in EIO
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! cat $M0/b
+cleanup
diff --git a/tests/include.rc b/tests/include.rc
index 40dd8ac96b9..3017de9e653 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -7,7 +7,7 @@ B0=${B0:=/d/backends}; # top level of brick directories
H0=${H0:=`hostname --fqdn`}; # hostname
DEBUG=${DEBUG:=0} # turn on debugging?
statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
-
+CHILD_UP_TIMEOUT=60
CLI="gluster --mode=script";
mkdir -p $B0;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index c4e57625220..2f9913152d4 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2321,18 +2321,18 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ESTALE > EIO > ENOENT > others
+ * The hierarchy is EIO > ENOENT > ESTALE > others
*/
int32_t
afr_most_important_error(int32_t old_errno, int32_t new_errno,
gf_boolean_t eio)
{
- if (old_errno == ESTALE || new_errno == ESTALE)
- return ESTALE;
if (eio && (old_errno == EIO || new_errno == EIO))
return EIO;
if (old_errno == ENOENT || new_errno == ENOENT)
return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
return new_errno;
}
@@ -2361,8 +2361,19 @@ afr_resultant_errno_get (int32_t *children,
}
static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno,
+ dict_t *xattr)
{
+ if (local->cont.lookup.needs_fresh_lookup)
+ return;
+
+ if (xattr && dict_get (xattr, "gfid-changed")) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+ return;
+ }
+
if ((local->loc.name == NULL) && (op_errno == ESTALE))
op_errno = ENOENT;
@@ -2371,10 +2382,6 @@ afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
local->op_errno = afr_most_important_error(local->op_errno, op_errno,
_gf_false);
-
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
}
static void
@@ -2491,7 +2498,7 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind
afr_private_t *priv = this->private;
if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
+ if (!local->cont.lookup.needs_fresh_lookup) {
local->op_ret = op_ret;
local->op_errno = 0;
}
@@ -2528,7 +2535,8 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie,
local = frame->local;
if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
+ afr_lookup_handle_error (local, op_ret, op_errno,
+ xattr);
goto unlock;
}
afr_lookup_handle_success (local, this, child_index, op_ret,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 9abb48694a2..af57d8b0eff 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -550,6 +550,7 @@ typedef struct _afr_local {
int32_t **pending_matrix;
gf_boolean_t fresh_lookup;
gf_boolean_t possible_spb;
+ gf_boolean_t needs_fresh_lookup;
} lookup;
struct {
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 38bc7b6238f..3bd1206e4d0 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -2755,6 +2755,8 @@ client3_3_lookup_cbk (struct rpc_req *req, struct iovec *iov, int count,
"gfid changed for %s", local->loc.path);
rsp.op_ret = -1;
op_errno = ESTALE;
+ if (xdata)
+ ret = dict_set_int32 (xdata, "gfid-changed", 1);
goto out;
}