summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2013-11-21 16:17:32 +0530
committerVijay Bellur <vbellur@redhat.com>2013-11-26 00:34:14 -0800
commit1d554b179f63a5a56ae447f2a5b0044c49ae2642 (patch)
tree5dd469565b4f2b42afecbfc88c107090210c1e17
parentb87d96b97d4a0cdc0883bec8ea8b4730b82fb3ba (diff)
cluster/afr: Provide HA for pathinfo getxattr
Problem: afr_[f]getxattr_pathinfo_cbks fail the fop even when it succeeded on one of the bricks. This can happen if the last response to pathinfo [f]getxattr is a failure. Fix: Remember if any of the [f]getxattr_pathinfos are successful and send that as the op_ret/op_errno value to the xlators above. Note: Winding fop to a client xlator that is not connected to server produces an error log. Preventing that by not even winding fop when client xlator is DOWN. Change-Id: I846e8c47423ffcfa2eabffe8924534781a36841a BUG: 1032927 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/6332 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--tests/bugs/bug-1032927.t32
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c63
2 files changed, 81 insertions, 14 deletions
diff --git a/tests/bugs/bug-1032927.t b/tests/bugs/bug-1032927.t
new file mode 100644
index 00000000000..2106f3d5bfe
--- /dev/null
+++ b/tests/bugs/bug-1032927.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This tests if pathinfo getxattr fails when one of the bricks is down
+#Lets hope it doesn't
+
+cleanup;
+function get_pathinfo_in_loop {
+ failed=0
+ for i in {1..1000}
+ do
+ getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null
+ if [ $? -ne 0 ]; then failed=1;break; fi
+ done
+ return $failed
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#when one of the bricks is down getfattr of pathinfo should not fail
+#Lets just do the test for 1000 times to see if we hit the race
+TEST get_pathinfo_in_loop
+
+cleanup
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index e06e3b2f24d..ac64a763626 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -1127,6 +1127,14 @@ afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
if (!dict || (op_ret < 0))
goto out;
@@ -1204,8 +1212,8 @@ out:
" key in dict");
unwind:
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr,
- xdata);
+ AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
if (nxattr)
dict_unref (nxattr);
@@ -1242,6 +1250,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
{
callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref (xdata);
+ }
+
if (!dict || (op_ret < 0))
goto out;
@@ -1316,8 +1332,8 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
" key in dict");
unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
- xdata);
+ AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+ local->op_errno, nxattr, local->xdata_rsp);
if (nxattr)
dict_unref (nxattr);
@@ -1433,18 +1449,27 @@ afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
afr_local_t *local = NULL;
xlator_t **children = NULL;
int i = 0;
+ int call_count = 0;
priv = this->private;
children = priv->children;
local = frame->local;
- local->call_count = priv->child_count;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name, NULL);
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i, children[i],
+ children[i]->fops->getxattr,
+ loc, name, NULL);
+ if (!--call_count)
+ break;
+ }
}
return;
}
@@ -1693,18 +1718,28 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
afr_local_t *local = NULL;
xlator_t **children = NULL;
int i = 0;
+ int call_count = 0;
priv = this->private;
children = priv->children;
local = frame->local;
- local->call_count = priv->child_count;
+ //local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ //If up-children count is 0, afr_local_init would have failed already
+ //and the call would have unwound so not handling it here.
for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i,
- children[i], children[i]->fops->fgetxattr,
- fd, name, NULL);
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE (frame, cbk,
+ (void *) (long) i,
+ children[i],
+ children[i]->fops->fgetxattr,
+ fd, name, NULL);
+ if (!--call_count)
+ break;
+ }
}
return;