diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2013-11-21 16:17:32 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2013-11-26 00:34:14 -0800 | 
| commit | 1d554b179f63a5a56ae447f2a5b0044c49ae2642 (patch) | |
| tree | 5dd469565b4f2b42afecbfc88c107090210c1e17 | |
| parent | b87d96b97d4a0cdc0883bec8ea8b4730b82fb3ba (diff) | |
cluster/afr: Provide HA for pathinfo getxattr
Problem:
afr_[f]getxattr_pathinfo_cbks fail the fop even when it succeeded on
one of the bricks. This can happen if the last response to pathinfo
[f]getxattr is a failure.
Fix:
Remember if any of the [f]getxattr_pathinfos are successful and send
that as the op_ret/op_errno value to the xlators above.
Note:
Winding fop to a client xlator that is not connected to server produces
an error log. Preventing that by not even winding fop when client xlator
is DOWN.
Change-Id: I846e8c47423ffcfa2eabffe8924534781a36841a
BUG: 1032927
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/6332
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
| -rw-r--r-- | tests/bugs/bug-1032927.t | 32 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 63 | 
2 files changed, 81 insertions, 14 deletions
diff --git a/tests/bugs/bug-1032927.t b/tests/bugs/bug-1032927.t new file mode 100644 index 00000000000..2106f3d5bfe --- /dev/null +++ b/tests/bugs/bug-1032927.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This tests if pathinfo getxattr fails when one of the bricks is down +#Lets hope it doesn't + +cleanup; +function get_pathinfo_in_loop { +        failed=0 +        for i in {1..1000} +        do +                getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null +                if [ $? -ne 0 ]; then failed=1;break; fi +        done +        return $failed +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +#when one of the bricks is down getfattr of pathinfo should not fail +#Lets just do the test for 1000 times to see if we hit the race +TEST get_pathinfo_in_loop + +cleanup diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index e06e3b2f24d..ac64a763626 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1127,6 +1127,14 @@ afr_fgetxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,          {                  callcnt = --local->call_count; +                if (op_ret < 0) { +                        local->op_errno = op_errno; +                } else { +                        local->op_ret = op_ret; +                        if (!local->xdata_rsp && xdata) +                                local->xdata_rsp = dict_ref (xdata); +                } +                  if (!dict || (op_ret < 0))                          goto out; @@ -1204,8 +1212,8 @@ out:                                  " key in dict");          unwind: -                AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, nxattr, -                                  xdata); +                AFR_STACK_UNWIND (fgetxattr, frame, local->op_ret, +                                  local->op_errno, nxattr, local->xdata_rsp);                  if (nxattr)                          dict_unref (nxattr); @@ -1242,6 +1250,14 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,                  {                          callcnt = --local->call_count; +                        if (op_ret < 0) { +                                local->op_errno = op_errno; +                        } else { +                                local->op_ret = op_ret; +                                if (!local->xdata_rsp && xdata) +                                        local->xdata_rsp = dict_ref (xdata); +                        } +                          if (!dict || (op_ret < 0))                                  goto out; @@ -1316,8 +1332,8 @@ afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,                                  " key in dict");          unwind: -                AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr, -                                  xdata); +                AFR_STACK_UNWIND (getxattr, frame, local->op_ret, +                                  local->op_errno, nxattr, local->xdata_rsp);                  if (nxattr)                          dict_unref (nxattr); @@ -1433,18 +1449,27 @@ afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,          afr_local_t     *local          = NULL;          xlator_t        **children      = NULL;          int             i               = 0; +        int             call_count      = 0;          priv     = this->private;          children = priv->children;          local = frame->local; -        local->call_count = priv->child_count; +        //local->call_count set in afr_local_init +        call_count = local->call_count; + +        //If up-children count is 0, afr_local_init would have failed already +        //and the call would have unwound so not handling it here.          for (i = 0; i < priv->child_count; i++) { -                STACK_WIND_COOKIE (frame, cbk, -                                   (void *) (long) i, -                                   children[i], children[i]->fops->getxattr, -                                   loc, name, NULL); +                if (local->child_up[i]) { +                        STACK_WIND_COOKIE (frame, cbk, +                                           (void *) (long) i, children[i], +                                           children[i]->fops->getxattr, +                                           loc, name, NULL); +                        if (!--call_count) +                                break; +                }          }          return;  } @@ -1693,18 +1718,28 @@ afr_fgetxattr_frm_all_children (xlator_t *this, call_frame_t *frame,          afr_local_t     *local          = NULL;          xlator_t        **children      = NULL;          int             i               = 0; +        int             call_count      = 0;          priv     = this->private;          children = priv->children;          local = frame->local; -        local->call_count = priv->child_count; +        //local->call_count set in afr_local_init +        call_count = local->call_count; + +        //If up-children count is 0, afr_local_init would have failed already +        //and the call would have unwound so not handling it here.          for (i = 0; i < priv->child_count; i++) { -                STACK_WIND_COOKIE (frame, cbk, -                                   (void *) (long) i, -                                   children[i], children[i]->fops->fgetxattr, -                                   fd, name, NULL); +                if (local->child_up[i]) { +                        STACK_WIND_COOKIE (frame, cbk, +                                           (void *) (long) i, +                                           children[i], +                                           children[i]->fops->fgetxattr, +                                           fd, name, NULL); +                        if (!--call_count) +                                break; +                }          }          return;  | 
