diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-11-21 16:17:32 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-11-26 00:34:14 -0800 |
commit | 1d554b179f63a5a56ae447f2a5b0044c49ae2642 (patch) | |
tree | 5dd469565b4f2b42afecbfc88c107090210c1e17 | |
parent | b87d96b97d4a0cdc0883bec8ea8b4730b82fb3ba (diff) |
cluster/afr: Provide HA for pathinfo getxattr
Problem:
afr_[f]getxattr_pathinfo_cbks fail the fop even when it succeeded on
one of the bricks. This can happen if the last response to pathinfo
[f]getxattr is a failure.
Fix:
Remember if any of the [f]getxattr_pathinfos are successful and send
that as the op_ret/op_errno value to the xlators above.
Note:
Winding fop to a client xlator that is not connected to server produces
an error log. Preventing that by not even winding fop when client xlator
is DOWN.
Change-Id: I846e8c47423ffcfa2eabffe8924534781a36841a
BUG: 1032927
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/6332
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | tests/bugs/bug-1032927.t | 32 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 63 |
2 files changed, 81 insertions, 14 deletions
diff --git a/tests/bugs/bug-1032927.t b/tests/bugs/bug-1032927.t new file mode 100644 index 00000000000..2106f3d5bfe --- /dev/null +++ b/tests/bugs/bug-1032927.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This tests if pathinfo getxattr fails when one of the bricks is down +#Lets hope it doesn't + +cleanup; +function get_pathinfo_in_loop { + failed=0 + for i in {1..1000} + do + getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null + if [ $? -ne 0 ]; then failed=1;break; fi + done + return $failed +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +#when one of the bricks is down getfattr of pathinfo should not fail +#Lets just do the test for 1000 times to see if we hit the race +TEST get_pathinfo_in_loop + +cleanup diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index e06e3b2f24d..ac64a763626 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1127,6 +1127,14 @@ afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, { callcnt = --local->call_count; + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + } + if (!dict || (op_ret < 0)) goto out; @@ -1204,8 +1212,8 @@ out: " key in dict"); unwind: - AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr, - xdata); + AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret, + local->op_errno, nxattr, local->xdata_rsp); if (nxattr) dict_unref (nxattr); @@ -1242,6 +1250,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, { callcnt = --local->call_count; + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + } + if (!dict || (op_ret < 0)) goto out; @@ -1316,8 +1332,8 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, " key in dict"); unwind: - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr, - xdata); + AFR_STACK_UNWIND (getxattr, frame, local->op_ret, + local->op_errno, nxattr, local->xdata_rsp); if (nxattr) dict_unref (nxattr); @@ -1433,18 +1449,27 @@ afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame, afr_local_t *local = NULL; xlator_t **children = NULL; int i = 0; + int call_count = 0; priv = this->private; children = priv->children; local = frame->local; - local->call_count = priv->child_count; + //local->call_count set in afr_local_init + call_count = local->call_count; + + //If up-children count is 0, afr_local_init would have failed already + //and the call would have unwound so not handling it here. for (i = 0; i < priv->child_count; i++) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, - children[i], children[i]->fops->getxattr, - loc, name, NULL); + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, cbk, + (void *) (long) i, children[i], + children[i]->fops->getxattr, + loc, name, NULL); + if (!--call_count) + break; + } } return; } @@ -1693,18 +1718,28 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame, afr_local_t *local = NULL; xlator_t **children = NULL; int i = 0; + int call_count = 0; priv = this->private; children = priv->children; local = frame->local; - local->call_count = priv->child_count; + //local->call_count set in afr_local_init + call_count = local->call_count; + + //If up-children count is 0, afr_local_init would have failed already + //and the call would have unwound so not handling it here. for (i = 0; i < priv->child_count; i++) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, - children[i], children[i]->fops->fgetxattr, - fd, name, NULL); + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, cbk, + (void *) (long) i, + children[i], + children[i]->fops->fgetxattr, + fd, name, NULL); + if (!--call_count) + break; + } } return; |