From 1d554b179f63a5a56ae447f2a5b0044c49ae2642 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Thu, 21 Nov 2013 16:17:32 +0530 Subject: cluster/afr: Provide HA for pathinfo getxattr Problem: afr_[f]getxattr_pathinfo_cbks fail the fop even when it succeeded on one of the bricks. This can happen if the last response to pathinfo [f]getxattr is a failure. Fix: Remember if any of the [f]getxattr_pathinfos are successful and send that as the op_ret/op_errno value to the xlators above. Note: Winding fop to a client xlator that is not connected to server produces an error log. Preventing that by not even winding fop when client xlator is DOWN. Change-Id: I846e8c47423ffcfa2eabffe8924534781a36841a BUG: 1032927 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6332 Reviewed-by: Vijay Bellur Tested-by: Gluster Build System --- tests/bugs/bug-1032927.t | 32 ++++++++++++++++ xlators/cluster/afr/src/afr-inode-read.c | 63 +++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 tests/bugs/bug-1032927.t diff --git a/tests/bugs/bug-1032927.t b/tests/bugs/bug-1032927.t new file mode 100644 index 00000000000..2106f3d5bfe --- /dev/null +++ b/tests/bugs/bug-1032927.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This tests if pathinfo getxattr fails when one of the bricks is down +#Lets hope it doesn't + +cleanup; +function get_pathinfo_in_loop { + failed=0 + for i in {1..1000} + do + getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null + if [ $? -ne 0 ]; then failed=1;break; fi + done + return $failed +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +#when one of the bricks is down getfattr of pathinfo should not fail +#Lets just do the test for 1000 times to see if we hit the race +TEST get_pathinfo_in_loop + +cleanup diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index e06e3b2f24d..ac64a763626 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1127,6 +1127,14 @@ afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, { callcnt = --local->call_count; + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + } + if (!dict || (op_ret < 0)) goto out; @@ -1204,8 +1212,8 @@ out: " key in dict"); unwind: - AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr, - xdata); + AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret, + local->op_errno, nxattr, local->xdata_rsp); if (nxattr) dict_unref (nxattr); @@ -1242,6 +1250,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, { callcnt = --local->call_count; + if (op_ret < 0) { + local->op_errno = op_errno; + } else { + local->op_ret = op_ret; + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + } + if (!dict || (op_ret < 0)) goto out; @@ -1316,8 +1332,8 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie, " key in dict"); unwind: - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr, - xdata); + AFR_STACK_UNWIND (getxattr, frame, local->op_ret, + local->op_errno, nxattr, local->xdata_rsp); if (nxattr) dict_unref (nxattr); @@ -1433,18 +1449,27 @@ afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame, afr_local_t *local = NULL; xlator_t **children = NULL; int i = 0; + int call_count = 0; priv = this->private; children = priv->children; local = frame->local; - local->call_count = priv->child_count; + //local->call_count set in afr_local_init + call_count = local->call_count; + + //If up-children count is 0, afr_local_init would have failed already + //and the call would have unwound so not handling it here. for (i = 0; i < priv->child_count; i++) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, - children[i], children[i]->fops->getxattr, - loc, name, NULL); + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, cbk, + (void *) (long) i, children[i], + children[i]->fops->getxattr, + loc, name, NULL); + if (!--call_count) + break; + } } return; } @@ -1693,18 +1718,28 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame, afr_local_t *local = NULL; xlator_t **children = NULL; int i = 0; + int call_count = 0; priv = this->private; children = priv->children; local = frame->local; - local->call_count = priv->child_count; + //local->call_count set in afr_local_init + call_count = local->call_count; + + //If up-children count is 0, afr_local_init would have failed already + //and the call would have unwound so not handling it here. for (i = 0; i < priv->child_count; i++) { - STACK_WIND_COOKIE (frame, cbk, - (void *) (long) i, - children[i], children[i]->fops->fgetxattr, - fd, name, NULL); + if (local->child_up[i]) { + STACK_WIND_COOKIE (frame, cbk, + (void *) (long) i, + children[i], + children[i]->fops->fgetxattr, + fd, name, NULL); + if (!--call_count) + break; + } } return; -- cgit