From 1ca50941d693f48e73723b12a1466a70dd272ea2 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Mon, 8 Apr 2013 15:32:49 -0400 Subject: tests: fix spurious regression test failures Change-Id: I752aeb8e25f43281d2f5cf33d0ff5aeae49687e7 BUG: 764966 Signed-off-by: Jeff Darcy Reviewed-on: http://review.gluster.org/4794 Reviewed-by: Anand Avati Tested-by: Anand Avati --- tests/bugs/bug-873962.t | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'tests/bugs/bug-873962.t') diff --git a/tests/bugs/bug-873962.t b/tests/bugs/bug-873962.t index 6a85cee0c..b245cc3da 100755 --- a/tests/bugs/bug-873962.t +++ b/tests/bugs/bug-873962.t @@ -13,6 +13,26 @@ TEST $CLI volume info; B0_hiphenated=`echo $B0 | tr '/' '-'` TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} +# If we allow self-heal to happen in the background, we'll get spurious +# failures - especially at the point labeled "FAIL HERE" but +# occasionally elsewhere. This behavior is very timing-dependent. It +# doesn't show up in Jenkins, but it does on JD's and KP's machines, and +# it got sharply worse because of an unrelated fsync change (6ae6f3d) +# which changed timing. Putting anything at the FAIL HERE marker tends +# to make it go away most of the time on affected machines, even if the +# "anything" is unrelated. +# +# What's going on is that the I/O on the first mountpoint is allowed to +# complete even though self-heal is still in progress and the state on +# disk does not reflect its result. In fact, the state changes during +# self-heal create the appearance of split brain when the second I/O +# comes in, so that fails even though we haven't actually been in split +# brain since the manual xattr operations. By disallowing background +# self-heal, we ensure that the second I/O can't happen before self-heal +# is complete, because it has to follow the first I/O which now has to +# follow self-heal. +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 + #Make sure self-heal is not triggered when the bricks are re-started TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 performance.stat-prefetch off @@ -53,6 +73,7 @@ TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0 #The operations should do self-heal and give correct output EXPECT "2" cat $M0/a; +# FAIL HERE - see comment about cluster.self-heal-background-count above. EXPECT "2" cat $M1/a; EXPECT "def" getfattr -n trusted.mdata --only-values $M0/b 2>/dev/null EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null -- cgit