afr: allow I/O when favorite-child-policy is enabled

Problem: Currently, I/O on a split-brained file fails even when the favorite-child-policy is set until the self-heal is complete. Fix: If a valid 'source' is found using the set favorite-child-policy, inspect and reset the afr pending xattrs on the 'sinks' (inside appropriate locks), refresh the inode and then proceed with the read or write transaction. The resetting itself happens in the self-heal code and hence can also happen in the client side background-heal or by the shd's index-heal in addition to the txn code path explained above. When it happens in via heal, we also add checks in undo-pending to not reset the sink xattrs again. Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626 BUG: 1386188 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Simon Turcotte-Langevin <simon.turcotte-langevin@ubisoft.com> Reviewed-on: http://review.gluster.org/15673 Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
author: Ravishankar N <ravishankar@redhat.com> 2016-11-26 21:24:01 +0530
committer: Pranith Kumar Karampuri <pkarampu@redhat.com> 2016-11-27 23:51:59 -0800
commit: a07ddd8fcc8dcdcf7ccfa61211d258f13b9f9229 (patch)
tree: 8192cfc5b2553043dc9511b386bf8894c7972746 /tests/bugs
parent: 2214501d6045b45dde469c5a476c86b6cf464c34 (diff)
1 files changed, 82 insertions, 0 deletions
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
new file mode 100644
index 00000000000..d049d95ef9a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/data.txt
+TEST touch $M0/mdata.txt
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value1 $M0/mdata.txt
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value2 $M0/mdata.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+## Check that the file still in split-brain,
+  ## I/O fails
+  cat $M0/data.txt > /dev/null
+  EXPECT "1" echo $?
+  ## pending xattrs blame each other.
+  brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt)
+  brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+  TEST [ $brick0_pending -ne "000000000000000000000000" ]
+  TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+  ## I/O fails
+  getfattr -n user.value $M0/mdata.txt
+  EXPECT "1" echo $?
+  brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt)
+  brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt)
+  TEST [ $brick0_pending -ne "000000000000000000000000" ]
+  TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+## Let us use mtime as fav-child policy. So brick0 will be source.
+   # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr.
+   TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt
+   # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr.
+   TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt
+
+   TEST $CLI volume set $V0 favorite-child-policy mtime
+
+   # Reading the file should be allowed and sink brick xattrs must be reset.
+   cat $M0/data.txt > /dev/null
+   EXPECT "0" echo $?
+   TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+   TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+   TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+   TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+   # Accessing the file should be allowed and sink brick xattrs must be reset.
+   EXPECT "value2" echo $(getfattr --only-values -n user.value  $M0/mdata.txt)
+   TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+   TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+   TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+   TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+cleanup;
author	Ravishankar N <ravishankar@redhat.com>	2016-11-26 21:24:01 +0530
committer	Pranith Kumar Karampuri <pkarampu@redhat.com>	2016-11-27 23:51:59 -0800
commit	a07ddd8fcc8dcdcf7ccfa61211d258f13b9f9229 (patch)
tree	8192cfc5b2553043dc9511b386bf8894c7972746 /tests/bugs
parent	2214501d6045b45dde469c5a476c86b6cf464c34 (diff)