summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2014-06-12 20:38:34 +0530
committerVijay Bellur <vbellur@redhat.com>2014-06-14 04:10:06 -0700
commit12b7797bfb143890ad4ca085332ec2f5e1ed08b8 (patch)
tree36fc239722c1df779e7a4efc43f64063bcd39ffd
parent7aa3630b1f5e07227e9cd167cbd717bd7932ae78 (diff)
Fix resolution issues across fuse/server/afr
Problems with fuse/server: Fuse loc touch up sets loc->name even when pargfid is not known. Server lookup does (pargfid, name) based lookup when name is set ignoring the gfid. Because of this server resolver finds that the lookup came on (null-pargfid, name) and fails the lookup with EINVAL. Fix: Don't set loc->name in loc_touchup if the pargfid is not known. Did the same even for server-resolver Problem with afr: Lets say there is a directory hierarchy a/b/c/d on the mount and the user is cd'ed into the directory. Bring down one of the bricks of replica and remove all directories/files to simulate disk replacement on that brick. Now this brick is brought back up. Creates on the cd'ed directory fail with ESTALE. Basically before sending a create of 'f' inside 'd', fuse sends a lookup to make sure the file is not present. On one of the bricks 'd' is present and 'f' is not so it sends ENOENT as response. On the new brick 'd' itself is not present. So it sends ESTALE. In afr ESTALE is considered to be special errno on witnessing which lookup has to fail. And ESTALE is given more priority than ENOENT. Due to these reasons lookup fails with ESTALE rather than ENOENT. Since lookup didn't fail with ENOENT, 'create' can't be issued so the command is failed with ESTALE. Solution: Afr needs to consider ESTALE errno normally and ENOENT needs to be given more priority so that operations like create can proceed even when only one of the brick is up and running. Whenever client xlator identifies that gfid-changed, it sets that information in lookup xdata. Afr uses this information to fail the lookup with ESTALE so that top xlator can send fresh lookup. Change-Id: Ica6ce01baef08620154050a635e6f97d51029ef6 BUG: 1106408 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/8015 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--tests/basic/afr/resolve.t49
-rw-r--r--xlators/cluster/afr/src/afr-common.c29
-rw-r--r--xlators/cluster/afr/src/afr.h4
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c5
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c4
-rw-r--r--xlators/protocol/server/src/server-resolve.c5
6 files changed, 76 insertions, 20 deletions
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t
new file mode 100644
index 00000000000..7dd432996f6
--- /dev/null
+++ b/tests/basic/afr/resolve.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check that operations succeed after changing the disk of the brick while
+#a brick is down
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+TEST cd $M0
+TEST mkdir -p a/b/c/d/e
+TEST cd a/b/c/d/e
+echo abc > g
+
+#Simulate disk replacement
+TEST kill_brick $V0 $H0 $B0/${V0}0
+rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#Test that the lookup returns ENOENT instead of ESTALE
+#If lookup returns ESTALE this command will fail with ESTALE
+TEST touch f
+
+#Test that ESTALE is ignored when there is a good copy
+EXPECT abc cat g
+
+#Simulate file changing only one mount
+#create the file on first mount
+echo ghi > $M0/b
+
+#re-create the file on other mount while one of the bricks is down.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M1/b
+echo jkl > $M1/b
+#Clear the extended attributes on the directory to create a scenario where
+#gfid-mismatch happened. This should result in EIO
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! cat $M0/b
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 11aae4617e7..e4fbe794f08 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1157,17 +1157,18 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
afr_inode_read_subvol_get (local->loc.parent, this, readable,
NULL, &event);
- /* First, check if we have an ESTALE from somewhere,
- If so, propagate that so that a revalidate can be
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
issued
*/
+ if (local->cont.lookup.needs_fresh_lookup) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto unwind;
+ }
+
op_errno = afr_final_errno (frame->local, this->private);
local->op_errno = op_errno;
- if (op_errno == ESTALE) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- goto unwind;
- }
read_subvol = -1;
for (i = 0; i < priv->child_count; i++) {
@@ -1270,7 +1271,7 @@ unwind:
* others in that they must be given higher priority while
* returning to the user.
*
- * The hierarchy is ESTALE > ENOENT > others
+ * The hierarchy is ENODATA > ENOENT > ESTALE > others
*/
int
@@ -1278,10 +1279,10 @@ afr_higher_errno (int32_t old_errno, int32_t new_errno)
{
if (old_errno == ENODATA || new_errno == ENODATA)
return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
if (old_errno == ESTALE || new_errno == ESTALE)
return ESTALE;
- if (old_errno == ENOENT || new_errno == ENOENT)
- return ENOENT;
return new_errno;
}
@@ -1523,6 +1524,14 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->replies[child_index].valid = 1;
local->replies[child_index].op_ret = op_ret;
local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get (xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
if (op_ret != -1) {
local->replies[child_index].poststat = *buf;
local->replies[child_index].postparent = *postparent;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index c5b326cee06..b02ecaf3050 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -418,6 +418,10 @@ typedef struct _afr_local {
struct {
struct {
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
+
+ struct {
unsigned char buf_set;
struct statvfs buf;
} statfs;
diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c
index fc04d2c8efa..76b1d9a72cc 100644
--- a/xlators/mount/fuse/src/fuse-resolve.c
+++ b/xlators/mount/fuse/src/fuse-resolve.c
@@ -47,11 +47,6 @@ fuse_resolve_loc_touchup (fuse_state_t *state)
} else if (loc->inode) {
ret = inode_path (loc->inode, NULL, &path);
uuid_copy (loc->gfid, loc->inode->gfid);
- if (path) {
- loc->name = strrchr (path, '/');
- if (loc->name)
- loc->name++;
- }
}
if (ret)
gf_log (THIS->name, GF_LOG_TRACE,
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index dd6f48afb57..64d30f2a99f 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -2737,8 +2737,12 @@ client3_3_lookup_cbk (struct rpc_req *req, struct iovec *iov, int count,
&& (uuid_compare (stbuf.ia_gfid, inode->gfid) != 0)) {
gf_log (frame->this->name, GF_LOG_DEBUG,
"gfid changed for %s", local->loc.path);
+
rsp.op_ret = -1;
op_errno = ESTALE;
+ if (xdata)
+ ret = dict_set_int32 (xdata, "gfid-changed", 1);
+
goto out;
}
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 9384e765cca..3b787c61734 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -48,11 +48,6 @@ resolve_loc_touchup (call_frame_t *frame)
loc->name = resolve->bname;
} else if (loc->inode) {
ret = inode_path (loc->inode, NULL, &path);
- if (path) {
- loc->name = strrchr (path, '/');
- if (loc->name)
- loc->name++;
- }
}
if (ret)
gf_log (frame->this->name, GF_LOG_TRACE,