diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-02-05 15:10:06 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-06-27 05:19:55 -0700 |
commit | cae76bad7a31c60a3bf17fa1e5005ec5975bf91e (patch) | |
tree | 87168d124c1abc822668323f628f9284e0cd0871 | |
parent | c6f49213dc04714699691f87bde614c6406c16d5 (diff) |
afr:Don't wind reads for files in metadata split-brain
Backport of http://review.gluster.org/#/c/13389/
Problem: For a read on a file in metadata split-brain:
1.lookup_done resets event_generation to zero.
2. readv is issued, goes to inode refresh due to mismatching event_gen.
3. After refresh is successful, we update event_generation, data and
metdata readable.
3. We then call afr_read_txn_refresh_done() which in turn calls
afr_inode_get_readable() but doesn't check for EIO. So afr_readv_wind
is called with local->readable (which is populated with data_readable),
thus winding the read to a brick.
4. Also, further parallel reads that come directly go to the wind path
because there is no inode_refresh needed.
Fix:
1.For any afr_read_txn(), readable must be an intersection of data and metadata
readable.
2.Check for EIO in afr_read_txn_refresh_done().
Change-Id: I22dd221fdfaf96d7aced2f474e28ed1337d69f0e
BUG: 1349879
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
(cherry picked from commit 7a1c1e2904701496968ed14b6d7479fb706c3188)
Reviewed-on: http://review.gluster.org/14790
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r-- | tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t | 40 | ||||
-rwxr-xr-x | tests/bugs/replicate/bug-977797.t | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 14 |
3 files changed, 47 insertions, 11 deletions
diff --git a/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t new file mode 100644 index 00000000000..780ddb9250c --- /dev/null +++ b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t @@ -0,0 +1,40 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +#Test that for files in metadata-split-brain, we do not wind even a single read. +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} + +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST chmod 700 $M0/file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST chmod 777 $M0/file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST umount $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +lines=`cat $M0/file|wc|awk '{print $1}'` +EXPECT 0 echo $lines +TEST umount $M0 +cleanup diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t index 72c616ba68e..ea9a98adc23 100755 --- a/tests/bugs/replicate/bug-977797.t +++ b/tests/bugs/replicate/bug-977797.t @@ -53,7 +53,9 @@ TEST chmod 757 $M0/a/file TEST $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1; -TEST dd if=$M0/a/file of=/dev/null bs=1024k +dd if=$M0/a/file of=/dev/null bs=1024k +#read fails, but heal is triggered. +TEST [ $? -ne 0 ] EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data" diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 32ad6a46d17..74749f029c8 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -83,7 +83,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) &event_generation, local->transaction.type); - if (ret == -1 || !event_generation) + if (ret == -EIO || !event_generation) /* Even after refresh, we don't have a good read subvolume. Time to bail */ AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); @@ -218,18 +218,12 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, } local->transaction.type = type; - if (local->op == GF_FOP_FSTAT || local->op == GF_FOP_STAT) { - ret = afr_inode_read_subvol_get (inode, this, data, metadata, - &event_generation); - AFR_INTERSECT (local->readable, data, metadata, - priv->child_count); - } else { - ret = afr_inode_read_subvol_type_get (inode, this, local->readable, - &event_generation, type); - } + ret = afr_inode_read_subvol_get (inode, this, data, metadata, + &event_generation); if (ret == -1) /* very first transaction on this inode */ goto refresh; + AFR_INTERSECT (local->readable, data, metadata, priv->child_count); gf_msg_debug (this->name, 0, "%s: generation now vs cached: %d, " "%d", uuid_utoa (inode->gfid), local->event_generation, |