diff options
Diffstat (limited to 'tests/basic/afr')
43 files changed, 2523 insertions, 332 deletions
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t index a904e22e2a5..c847e22977f 100644 --- a/tests/basic/afr/add-brick-self-heal.t +++ b/tests/basic/afr/add-brick-self-heal.t @@ -6,10 +6,16 @@ cleanup; TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +EXPECT 'Created' volinfo_field $V0 'Status'; TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 + TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.heal-timeout 5 TEST $CLI volume set $V0 self-heal-daemon off TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; @@ -25,6 +31,7 @@ TEST setfattr -n user.test -v qwerty $M0/file5.txt # Add brick1 TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 # New-brick should accuse the old-bricks (Simulating case for data-loss) TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/ diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t new file mode 100644 index 00000000000..896ba0c9b2c --- /dev/null +++ b/tests/basic/afr/afr-anon-inode-no-quorum.t @@ -0,0 +1,63 @@ +#!/bin/bash + +#Test that anon-inode entry is not cleaned up as long as there exists at least +#one valid entry +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST touch $M0/a $M0/b + +gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) +gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/a $M0/a-new +TEST mv $M0/b $M0/b-new + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! ls $M0/a +TEST ! ls $M0/b +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +#Make sure index heal doesn't happen after enabling heal +TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +#Allow time for a scan +sleep 5 +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) +TEST rm -f $M0/a-new +TEST stat $M0/b-new + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new + +cleanup diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t new file mode 100644 index 00000000000..f4cf37a2fa0 --- /dev/null +++ b/tests/basic/afr/afr-anon-inode.t @@ -0,0 +1,114 @@ +#!/bin/bash +#Tests that afr-anon-inode test cases work fine as expected +#These are cases where in entry-heal/name-heal we dont know entry for an inode +#so these inodes are kept in a special directory + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode no +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode yes +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST mkdir -p $M0/d1/b $M0/d2/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/d2/a $M0/d1 +TEST mv $M0/d1/b $M0/d2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/${V0}1/$anon_inode_name ]] +TEST [[ -d $B0/${V0}2/$anon_inode_name ]] +anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name + +TEST ! ls $M0/$anon_inode_name +EXPECT "^4$" echo $(ls -a $M0 | wc -l) + +#Test purging code path by shd +TEST $CLI volume heal $V0 disable +TEST mkdir $M0/l0 $M0/l1 $M0/l2 +TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file +TEST ln $M0/del-file $M0/del-file-link +TEST ln $M0/l0/file $M0/l1/file-link1 +TEST ln $M0/l0/file $M0/l2/file-link2 +TEST mkdir -p $M0/del-recursive-dir/d1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST rm -f $M0/del-file $M0/del-file-nolink +TEST rm -rf $M0/del-recursive-dir +TEST mv $M0/d1/a $M0/d2 +TEST mv $M0/l0/file $M0/l0/renamed-file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 + +nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) +link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) +dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) +rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) +rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) +TEST ! stat $M0/del-file +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST ! stat $M0/del-file-nolink +TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid +TEST ! stat $M0/del-recursive-dir +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +TEST ! stat $M0/d1/a +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST ! stat $M0/l0/file +TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +TEST ! stat $M0/l1/file-link1 +TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 +TEST ! stat $M0/l2/file-link2 +TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid + +#Simulate only anon-inodes present in all bricks +TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 + +#Test that shd doesn't cleanup anon-inodes when some bricks are down +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +$CLI volume heal $V0 +sleep 5 #Allow time for completion of one scan +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 + +#Test that rename indeed happened instead of rmdir/mkdir +renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) +EXPECT "$rename_dir_inum" echo $renamed_dir_inum +cleanup; diff --git a/tests/basic/afr/afr-no-fsync.t b/tests/basic/afr/afr-no-fsync.t new file mode 100644 index 00000000000..0966d9b0a11 --- /dev/null +++ b/tests/basic/afr/afr-no-fsync.t @@ -0,0 +1,20 @@ +#!/bin/bash +#Tests that sequential write workload doesn't lead to FSYNCs + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,3} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST dd if=/dev/zero of=$M0/a bs=1M count=500 +TEST ! "$CLI volume profile $V0 info incremental | grep FSYNC" + +cleanup; diff --git a/tests/basic/afr/afr-read-hash-mode.t b/tests/basic/afr/afr-read-hash-mode.t new file mode 100644 index 00000000000..eeff10d8ebd --- /dev/null +++ b/tests/basic/afr/afr-read-hash-mode.t @@ -0,0 +1,56 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +function reads_brick_count { + $CLI volume profile $V0 info incremental | grep -w READ | wc -l +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..2} + +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 + +# Disable all caching +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +TEST dd if=/dev/urandom of=$M0/FILE bs=1M count=8 +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +# TEST if the option gives the intended behavior. The way we perform this test +# is by performing reads from the mount and write to /dev/null. If the +# read-hash-mode is 3, then for a given file, more than 1 brick should serve the +# read-fops where as with the default read-hash-mode (i.e. 1), only 1 brick will. + +# read-hash-mode=1 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +EXPECT "1" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode +TEST $CLI volume profile $V0 start +TEST dd if=$M0/FILE of=/dev/null bs=1M +count=`reads_brick_count` +TEST [ $count -eq 1 ] +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +# read-hash-mode=3 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 +TEST $CLI volume set $V0 cluster.read-hash-mode 3 +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "3" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode +TEST $CLI volume profile $V0 info clear +TEST dd if=$M0/FILE of=/dev/null bs=1M +count=`reads_brick_count` +TEST [ $count -eq 2 ] + +# Check that the arbiter did not serve any reads +arbiter_reads=$($CLI volume top $V0 read brick $H0:$B0/${V0}2|grep FILE|awk '{print $1}') +TEST [ -z $arbiter_reads ] + +cleanup; diff --git a/tests/basic/afr/afr-seek.t b/tests/basic/afr/afr-seek.t new file mode 100644 index 00000000000..c12ee011660 --- /dev/null +++ b/tests/basic/afr/afr-seek.t @@ -0,0 +1,55 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +SEEK=$(dirname $0)/seek +build_tester $(dirname $0)/../seek.c -o ${SEEK} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +TEST mkdir -p $B0/${V0}{0..2} +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} + +TEST $CLI volume start $V0 + +TEST $GFS -s $H0 --volfile-id $V0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST ${SEEK} create ${M0}/test 0 1 1048576 1 +# Determine underlying filesystem allocation block size +BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))" + +TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE} + +EXPECT "^0$" ${SEEK} scan ${M0}/test data 0 +EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2)) +EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1)) +EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE} +EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4)) +EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5)) +EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511)) +EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512)) +EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6)) + +EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0 +EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2)) +EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1)) +EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE} +EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4)) +EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5)) +EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511)) +EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512)) +EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6)) + +rm -f ${SEEK} +cleanup + +# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE +#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000 diff --git a/tests/basic/afr/afr-up.t b/tests/basic/afr/afr-up.t new file mode 100644 index 00000000000..428aac875e0 --- /dev/null +++ b/tests/basic/afr/afr-up.t @@ -0,0 +1,28 @@ +#!/bin/bash +#Tests that afr up/down works as expected + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,3,4,5,6} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "1" afr_up_status $V0 $M0 0 +EXPECT "1" afr_up_status $V0 $M0 1 + +#kill two bricks in first replica and check that afr_up_status is 0 for it +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_up_status $V0 $M0 0 +EXPECT "1" afr_up_status $V0 $M0 1 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_up_status $V0 $M0 0 +EXPECT "1" afr_up_status $V0 $M0 1 +cleanup; diff --git a/tests/basic/afr/arbiter-add-brick.t b/tests/basic/afr/arbiter-add-brick.t index fe919de0ab4..77b93d9a210 100644 --- a/tests/basic/afr/arbiter-add-brick.t +++ b/tests/basic/afr/arbiter-add-brick.t @@ -12,6 +12,8 @@ TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume start $V0 TEST $CLI volume set $V0 self-heal-daemon off TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST mkdir $M0/dir1 TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1 @@ -24,6 +26,7 @@ TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1024 #convert replica 2 to arbiter volume TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 #syntax check for add-brick. TEST ! $CLI volume add-brick $V0 replica 2 arbiter 1 $H0:$B0/${V0}2 @@ -31,6 +34,19 @@ TEST ! $CLI volume add-brick $V0 replica 3 arbiter 2 $H0:$B0/${V0}2 TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#Trigger name heals from client. If we just rely on index heal, the first index +#crawl on B0 fails for /, dir2 and /file either due to lock collision or files +#not being present on the other 2 bricks yet. It is getting healed only in the +#next crawl after priv->shd.timeout (600 seconds) or by manually launching +#index heal again. +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST stat $M0/dir1 +TEST stat $M0/dir2 +TEST stat $M0/file1 #Heal files TEST $CLI volume set $V0 self-heal-daemon on diff --git a/tests/basic/afr/arbiter-cli.t b/tests/basic/afr/arbiter-cli.t index 2806b5a376b..ad79de79d02 100644 --- a/tests/basic/afr/arbiter-cli.t +++ b/tests/basic/afr/arbiter-cli.t @@ -16,10 +16,15 @@ TEST ! $CLI volume create $V0 arbiter 3 $H0:$B0/${V0}{0,1,2} # replica count given after arbiter count. TEST ! $CLI volume create $V0 arbiter 1 replica 3 $H0:$B0/${V0}{0,1,2} -#Incorrect values for replica and arbiter count. -TEST ! $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{0,1,2} +# Incorrect values for replica and arbiter count. TEST ! $CLI volume create $V0 replica 3 arbiter 2 $H0:$B0/${V0}{0,1,2} -# Only permissible value is replica=3 and arbiter=1. -TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} +# Correct setup +# Only documented value is replica=2 and arbiter=1. +TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{0,1,2} + +# Earlier documents mentioned 'replica 3 arbiter 1' as the valid option +# Preserve backward compatibility till Oct, 2019. +TEST $CLI volume create ${V0}-old replica 3 arbiter 1 $H0:$B0/${V0}-old{0,1,2} + cleanup diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t index da99096f81f..404d334d2f9 100644 --- a/tests/basic/afr/arbiter-mount.t +++ b/tests/basic/afr/arbiter-mount.t @@ -4,6 +4,9 @@ . $(dirname $0)/../../volume.rc . $(dirname $0)/../../afr.rc . $(dirname $0)/../../nfs.rc + +#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST + cleanup; #Check that mounting fails when only arbiter brick is up. diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t index eba7dc2b3c4..1e9336184b5 100755 --- a/tests/basic/afr/client-side-heal.t +++ b/tests/basic/afr/client-side-heal.t @@ -17,6 +17,7 @@ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; echo "some data" > $M0/datafile EXPECT 0 echo $? TEST touch $M0/mdatafile +TEST touch $M0/mdatafile-backend-direct-modify TEST mkdir $M0/dir #Kill a brick and perform I/O to have pending heals. @@ -29,6 +30,7 @@ EXPECT 0 echo $? #pending metadata heal TEST chmod +x $M0/mdatafile +TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify #pending entry heal. Also causes pending metadata/data heals on file{1..5} TEST touch $M0/dir/file{1..5} @@ -40,9 +42,12 @@ TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 #Medatada heal via explicit lookup must not happen -TEST ls $M0/mdatafile +TEST getfattr -d -m. -e hex $M0/mdatafile +TEST ls $M0/mdatafile-backend-direct-modify -#Inode refresh must not trigger data and entry heals. +TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] + +#Inode refresh must not trigger data metadata and entry heals. #To trigger inode refresh for sure, the volume is unmounted and mounted each time. #Check that data heal does not happen. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 @@ -52,7 +57,6 @@ TEST cat $M0/datafile EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST ls $M0/dir - #No heal must have happened EXPECT 8 get_pending_heal_count $V0 @@ -61,21 +65,25 @@ TEST $CLI volume set $V0 cluster.data-self-heal on TEST $CLI volume set $V0 cluster.metadata-self-heal on TEST $CLI volume set $V0 cluster.entry-self-heal on -#Metadata heal is triggered by lookup without need for inode refresh. -TEST ls $M0/mdatafile -EXPECT 7 get_pending_heal_count $V0 - -#Inode refresh must trigger data and entry heals. +#Inode refresh must trigger data metadata and entry heals. #To trigger inode refresh for sure, the volume is unmounted and mounted each time. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST ls $M0/mdatafile-backend-direct-modify + +TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]] + + +TEST getfattr -d -m. -e hex $M0/mdatafile +EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0 + TEST cat $M0/datafile EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST ls $M0/dir -EXPECT 5 get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0 TEST cat $M0/dir/file1 TEST cat $M0/dir/file2 @@ -83,5 +91,5 @@ TEST cat $M0/dir/file3 TEST cat $M0/dir/file4 TEST cat $M0/dir/file5 -EXPECT 0 get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 cleanup; diff --git a/tests/basic/afr/compounded-write-txns.t b/tests/basic/afr/compounded-write-txns.t deleted file mode 100644 index 7cecd87b01b..00000000000 --- a/tests/basic/afr/compounded-write-txns.t +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -cleanup - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} -TEST $CLI volume set $V0 write-behind off -TEST $CLI volume set $V0 client-io-threads off -TEST $CLI volume start $V0 -TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 - -# Create and generate data into a src file - -TEST `printf %1024s |tr " " "1" > /tmp/source` -TEST `printf %1024s |tr " " "2" >> /tmp/source` - -TEST dd if=/tmp/source of=$M0/file bs=1024 count=2 2>/dev/null -md5sum_file=$(md5sum $M0/file | awk '{print $1}') - -TEST $CLI volume set $V0 cluster.use-compound-fops on - -TEST dd if=$M0/file of=$M0/file-copy bs=1024 count=2 2>/dev/null - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 - -EXPECT "$md5sum_file" echo `md5sum $M0/file-copy | awk '{print $1}'` - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST $CLI volume stop $V0 -TEST $CLI volume delete $V0 - -TEST rm -f /tmp/source -cleanup diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t index 155ffa09ef0..6e0f18b88f8 100644 --- a/tests/basic/afr/durability-off.t +++ b/tests/basic/afr/durability-off.t @@ -26,6 +26,8 @@ TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l) +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 #Test that fsyncs happen when durability is on TEST $CLI volume set $V0 cluster.ensure-durability on TEST $CLI volume set $V0 performance.strict-write-ordering on diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t new file mode 100644 index 00000000000..7bb6ee14193 --- /dev/null +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -0,0 +1,459 @@ +#!/bin/bash + +#This file checks if missing entry self-heal and entry self-heal are working +#as expected. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_file_type { + stat -c "%a:%F:%g:%t:%T:%u" $1 +} + +function diff_dirs { + diff <(ls $1 | sort) <(ls $2 | sort) +} + +function heal_status { + local f1_path="${1}/${3}" + local f2_path="${2}/${3}" + local insync="" + diff_dirs $f1_path $f2_path + if [ $? -eq 0 ]; + then + insync="Y" + else + insync="N" + fi + local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) + local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) + local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) + local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) + local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) + local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) + if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi + if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi + if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi + if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi + if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi + if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi + echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} +} + +function is_heal_done { + local zero_xattr="000000000000000000000000" + if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; + then + echo "Y" + else + echo "N" + fi +} + +function print_pending_heals { + local result=":" + for i in "$@"; + do + if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; + then + result="$result:$i" + fi + done +#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names + if [ $result == ":" ]; then result="~"; fi + echo $result +} + +zero_xattr="000000000000000000000000" +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.use-anonymous-inode off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 +cd $M0 +#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens +#spb is split-brain, fool is all fool + +#source_self_accusing means there exists source and a sink which self-accuses. +#This simulates failures where fops failed on the bricks without it going down. +#Something like EACCESS/EDQUOT etc + +TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing +TEST mkfifo source_deletions_heal/fifo +TEST mknod source_deletions_heal/block b 4 5 +TEST mknod source_deletions_heal/char c 1 5 +TEST touch source_deletions_heal/file +TEST ln -s source_deletions_heal/file source_deletions_heal/slink +TEST mkdir source_deletions_heal/dir1 +TEST mkdir source_deletions_heal/dir1/dir2 + +TEST mkfifo source_deletions_me/fifo +TEST mknod source_deletions_me/block b 4 5 +TEST mknod source_deletions_me/char c 1 5 +TEST touch source_deletions_me/file +TEST ln -s source_deletions_me/file source_deletions_me/slink +TEST mkdir source_deletions_me/dir1 +TEST mkdir source_deletions_me/dir1/dir2 + +TEST mkfifo source_self_accusing/fifo +TEST mknod source_self_accusing/block b 4 5 +TEST mknod source_self_accusing/char c 1 5 +TEST touch source_self_accusing/file +TEST ln -s source_self_accusing/file source_self_accusing/slink +TEST mkdir source_self_accusing/dir1 +TEST mkdir source_self_accusing/dir1/dir2 + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 +TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 +TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 +TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 + +#Test that the files are deleted +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/slink +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/slink +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/slink +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + + +TEST mkfifo source_creations_heal/fifo +TEST mknod source_creations_heal/block b 4 5 +TEST mknod source_creations_heal/char c 1 5 +TEST touch source_creations_heal/file +TEST ln -s source_creations_heal/file source_creations_heal/slink +TEST mkdir source_creations_heal/dir1 +TEST mkdir source_creations_heal/dir1/dir2 + +TEST mkfifo source_creations_me/fifo +TEST mknod source_creations_me/block b 4 5 +TEST mknod source_creations_me/char c 1 5 +TEST touch source_creations_me/file +TEST ln -s source_creations_me/file source_creations_me/slink +TEST mkdir source_creations_me/dir1 +TEST mkdir source_creations_me/dir1/dir2 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#simulate self-accusing for source_self_accusing +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +# Check if conservative merges happened correctly on _me_ dirs +TEST stat spb_me_heal/1 +TEST stat $B0/${V0}0/spb_me_heal/1 +TEST stat $B0/${V0}1/spb_me_heal/1 + +TEST stat spb_me_heal/0 +TEST stat $B0/${V0}0/spb_me_heal/0 +TEST stat $B0/${V0}1/spb_me_heal/0 + +TEST stat fool_me/1 +TEST stat $B0/${V0}0/fool_me/1 +TEST stat $B0/${V0}1/fool_me/1 + +TEST stat fool_me/0 +TEST stat $B0/${V0}0/fool_me/0 +TEST stat $B0/${V0}1/fool_me/0 + +TEST stat v1_fool_me/0 +TEST stat $B0/${V0}0/v1_fool_me/0 +TEST stat $B0/${V0}1/v1_fool_me/0 + +TEST stat v1_fool_me/1 +TEST stat $B0/${V0}0/v1_fool_me/1 +TEST stat $B0/${V0}1/v1_fool_me/1 + +TEST stat v1_dirty_me/0 +TEST stat $B0/${V0}0/v1_dirty_me/0 +TEST stat $B0/${V0}1/v1_dirty_me/0 + +#Check if files that have gfid-mismatches in _me_ are giving EIO +TEST ! stat spb_me/0 + +#Check if stale files are deleted on access +TEST ! stat source_deletions_me/fifo +TEST ! stat $B0/${V0}0/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat source_deletions_me/block +TEST ! stat $B0/${V0}0/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat source_deletions_me/char +TEST ! stat $B0/${V0}0/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 +TEST ! stat source_deletions_me/dir1 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 + +#Test if the files created as part of access are healed correctly +r=$(get_file_type source_creations_me/fifo) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo +TEST [ -p source_creations_me/fifo ] + +r=$(get_file_type source_creations_me/block) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block +TEST [ -b source_creations_me/block ] + +r=$(get_file_type source_creations_me/char) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char +TEST [ -c source_creations_me/char ] + +r=$(get_file_type source_creations_me/file) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file +TEST [ -f source_creations_me/file ] + +r=$(get_file_type source_creations_me/slink) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink +TEST [ -h source_creations_me/slink ] + +r=$(get_file_type source_creations_me/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 +TEST [ -d source_creations_me/dir1/dir2 ] + +r=$(get_file_type source_creations_me/dir1) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 +TEST [ -d source_creations_me/dir1 ] + +#Trigger heal and check _heal dirs are healed properly +#Trigger change in event generation number. That way inodes would get refreshed during lookup +TEST kill_brick $V0 $H0 $B0/${V0}1 +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +TEST stat spb_heal +TEST stat spb_me_heal +TEST stat fool_heal +TEST stat fool_me +TEST stat v1_fool_heal +TEST stat v1_fool_me +TEST stat source_deletions_heal +TEST stat source_deletions_me +TEST stat source_self_accusing +TEST stat source_creations_heal +TEST stat source_creations_me +TEST stat v1_dirty_heal +TEST stat v1_dirty_me +TEST $CLI volume stop $V0 +TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 + +TEST $CLI volume stop $V0; + +#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal +create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +$CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0; +EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing + +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me + +#Don't access the files/dirs from mount point as that may cause self-heals +# Check if conservative merges happened correctly on heal dirs +TEST stat $B0/${V0}0/spb_heal/1 +TEST stat $B0/${V0}1/spb_heal/1 + +TEST stat $B0/${V0}0/spb_heal/0 +TEST stat $B0/${V0}1/spb_heal/0 + +TEST stat $B0/${V0}0/fool_heal/1 +TEST stat $B0/${V0}1/fool_heal/1 + +TEST stat $B0/${V0}0/fool_heal/0 +TEST stat $B0/${V0}1/fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/0 +TEST stat $B0/${V0}1/v1_fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/1 +TEST stat $B0/${V0}1/v1_fool_heal/1 + +TEST stat $B0/${V0}0/v1_dirty_heal/0 +TEST stat $B0/${V0}1/v1_dirty_heal/0 + +#Check if files that have gfid-mismatches in spb are giving EIO +TEST ! stat spb/0 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}0/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}0/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}0/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}0/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}0/source_self_accusing/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + +#Test if the files created as part of full self-heal correctly +r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo +TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/block) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/char) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char + +r=$(get_file_type $B0/${V0}0/source_creations_heal/file) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file +TEST [ -f $B0/${V0}0/source_creations_heal/file ] + +r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink +TEST [ -h $B0/${V0}0/source_creations_heal/slink ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] + +cd - + +#Anonymous directory shouldn't be created +TEST mkdir $M0/rename-dir +before_rename=$(STAT_INO $B0/${V0}1/rename-dir) +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/rename-dir $M0/new-name +TEST $CLI volume start $V0 force +#'spb' is in split-brain so pending-heal-count will be 2 +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +after_rename=$(STAT_INO $B0/${V0}1/new-name) +EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) +EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) +EXPECT_NOT "$before_rename" echo $after_rename +cleanup diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t index 3c900fdcf9a..0c1da7d211e 100644 --- a/tests/basic/afr/entry-self-heal.t +++ b/tests/basic/afr/entry-self-heal.t @@ -79,6 +79,9 @@ TEST $CLI volume set $V0 performance.open-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 diff --git a/tests/basic/afr/gfid-heal.t b/tests/basic/afr/gfid-heal.t index fa13c469b24..5e26e3307eb 100644 --- a/tests/basic/afr/gfid-heal.t +++ b/tests/basic/afr/gfid-heal.t @@ -8,7 +8,7 @@ cleanup; function get_gfid_count { - getfattr -d -m. -e hex $B0/brick{0,1,2,3,4,5}/$1 2>&1 | grep trusted.gfid | wc -l + getfattr -d -m. -e hex $B0/brick{0,1,2,3,4,5}/$1 2>&1 | grep trusted.gfid | grep -v gfid2path | wc -l } TEST glusterd diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-cli.t b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t new file mode 100644 index 00000000000..b739ddc49cc --- /dev/null +++ b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t @@ -0,0 +1,168 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +cd $M0 + +##### Healing from latest mtime ###### + +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Sink based on mtime" > f1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Source based on mtime" > f1 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that first brick has the latest mtime +LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain latest-mtime /f1 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}1/f1 | awk '{print $1}') +TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ] + + +##### Healing from bigger file ###### + +TEST mkdir test +TEST $CLI volume set $V0 self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Bigger file" > test/f2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Small file" > test/f2 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f2) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that second brick has the bigger file +BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/test/f2 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain bigger-file /test/f2 + +#gfid split-brain should be resolved +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}0/test/f2 | awk '{print $1}') +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] + + +#Add one more brick, and heal. +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + +##### Healing from source brick ###### + +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.quorum-type none +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "We will consider these as sinks" > test/f3 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "We will take this as source" > test/f3 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f3) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" != "$gfid_1" ] +TEST [ "$gfid_1" == "$gfid_2" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#We will try to heal the split-brain with bigger file option. +#It should fail, since we have same file size in bricks 1 & 2. +EXPECT "No bigger file for file /test/f3" $CLI volume heal $V0 split-brain bigger-file /test/f3 + +#Now heal from taking the brick 0 as the source +SOURCE_MD5=$(md5sum $B0/${V0}0/test/f3 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}0 /test/f3 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" == "$gfid_1" ] +TEST [ "$gfid_0" == "$gfid_2" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5_1=$(md5sum $B0/${V0}1/test/f3 | awk '{print $1}') +HEALED_MD5_2=$(md5sum $B0/${V0}2/test/f3 | awk '{print $1}') +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_1" ] +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_2" ] + +cd - +cleanup; diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t new file mode 100644 index 00000000000..35e295dc170 --- /dev/null +++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t @@ -0,0 +1,229 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $CLI volume set $V0 cluster.heal-timeout 5 +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off + +##### Healing with favorite-child-policy = mtime ###### +##### and self-heal-daemon ###### + +TEST $CLI volume set $V0 favorite-child-policy mtime +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Sink based on mtime" > $M0/f1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Source based on mtime" > $M0/f1 + +#Gfids of file f1 on bricks 0 & 1 should differ +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that first brick has the latest mtime +LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | cut -d\ -f1) + +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" == "$gfid_1" ] + +HEALED_MD5=$(md5sum $B0/${V0}1/f1 | cut -d\ -f1) +TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ] + +TEST $CLI volume set $V0 self-heal-daemon off + + +##### Healing with favorite-child-policy = ctime ###### +##### and self-heal-daemon ###### + +#gfid split-brain resolution should work even when the granular-enrty-heal is +#enabled +TEST $CLI volume heal $V0 granular-entry-heal enable + +TEST $CLI volume set $V0 favorite-child-policy ctime +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Sink based on ctime" > $M0/f2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Source based on ctime" > $M0/f2 + +#Gfids of file f2 on bricks 0 & 1 should differ +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f2) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#We know that second brick has the latest ctime +LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/f2 | cut -d\ -f1) + +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +#gfid split-brain should be resolved +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2) +TEST [ "$gfid_0" == "$gfid_1" ] + +HEALED_MD5=$(md5sum $B0/${V0}0/f2 | cut -d\ -f1) +TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] + + +#Add one more brick, and heal. +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +TEST $CLI volume set $V0 self-heal-daemon off + + +##### Healing using favorite-child-policy = size ##### +##### and client side heal ##### + +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +#Set the quorum-type to none, and create a gfid split brain +TEST $CLI volume set $V0 cluster.quorum-type none +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Smallest file" > $M0/f3 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "Second smallest file" > $M0/f3 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "Biggest among the three files" > $M0/f3 + +#Bring back the down bricks. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#Gfids of file f3 on all the bricks should differ +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3) +TEST [ "$gfid_0" != "$gfid_1" ] +TEST [ "$gfid_0" != "$gfid_2" ] +TEST [ "$gfid_1" != "$gfid_2" ] + +#We know that second brick has the bigger size file +BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1) + +TEST ls $M0 #Trigger entry heal via readdir inode refresh +TEST cat $M0/f3 #Trigger data heal via readv inode refresh +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +#gfid split-brain should be resolved +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3) +TEST [ "$gfid_0" == "$gfid_1" ] +TEST [ "$gfid_2" == "$gfid_1" ] + +HEALED_MD5_1=$(md5sum $B0/${V0}0/f3 | cut -d\ -f1) +HEALED_MD5_2=$(md5sum $B0/${V0}2/f3 | cut -d\ -f1) +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_1" ] +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_2" ] + + +##### Healing using favorite-child-policy = majority ##### +##### and client side heal ##### + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Does not agree with bricks 0 & 1" > $M0/f4 + +TEST $CLI v start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "Agree on bricks 0 & 1" > $M0/f4 + +#Gfids of file f4 on bricks 0 & 1 should be same and bricks 0 & 2 should differ +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f4) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f4) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4) +TEST [ "$gfid_0" == "$gfid_1" ] +TEST [ "$gfid_0" != "$gfid_2" ] + +#We know that first and second bricks agree with each other. Pick any one of +#them as source +MAJORITY_MD5=$(md5sum $B0/${V0}0/f4 | cut -d\ -f1) + +#Bring back the down brick and heal. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST ls $M0 #Trigger entry heal via readdir inode refresh +TEST cat $M0/f4 #Trigger data heal via readv inode refresh +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +#gfid split-brain should be resolved +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4) +TEST [ "$gfid_0" == "$gfid_2" ] + +HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1) +TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] + +cleanup; diff --git a/tests/basic/afr/gfid-self-heal.t b/tests/basic/afr/gfid-self-heal.t index b54edbcae85..5a530681186 100644 --- a/tests/basic/afr/gfid-self-heal.t +++ b/tests/basic/afr/gfid-self-heal.t @@ -50,6 +50,10 @@ TEST kill_brick $V0 $H0 $B0/${V0}0 TEST touch $M0/a gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/a) TEST touch $B0/${V0}0/a +# storage/posix considers that a file without gfid changed less than a second +# before doesn't exist, so we need to wait for a second to force posix to +# consider that this is a valid file but without gfid. +sleep 1 $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST stat $M0/a @@ -62,6 +66,10 @@ TEST kill_brick $V0 $H0 $B0/${V0}0 TEST touch $M0/b TEST mkdir $B0/${V0}0/b TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +# storage/posix considers that a file without gfid changed less than a second +# before doesn't exist, so we need to wait for a second to force posix to +# consider that this is a valid file but without gfid. +sleep 1 $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST ! stat $M0/b @@ -71,6 +79,10 @@ TEST "[[ -z \"$gfid_0\" ]]" #Check gfid assigning doesn't happen when there is type mismatch TEST touch $B0/${V0}1/c TEST mkdir $B0/${V0}0/c +# storage/posix considers that a file without gfid changed less than a second +# before doesn't exist, so we need to wait for a second to force posix to +# consider that this is a valid file but without gfid. +sleep 1 TEST ! stat $M0/c gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/c) gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/c) @@ -81,6 +93,10 @@ TEST "[[ -z \"$gfid_0\" ]]" # gfid split-brain TEST kill_brick $V0 $H0 $B0/${V0}0 TEST touch $B0/${V0}1/d +# storage/posix considers that a file without gfid changed less than a second +# before doesn't exist, so we need to wait for a second to force posix to +# consider that this is a valid file but without gfid. +sleep 1 TEST ! stat $M0/d gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/d) TEST "[[ -z \"$gfid_1\" ]]" diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t index a655180a095..10b6c6398da 100644 --- a/tests/basic/afr/granular-esh/cli.t +++ b/tests/basic/afr/granular-esh/cli.t @@ -11,7 +11,7 @@ TESTS_EXPECTED_IN_LOOP=4 TEST glusterd TEST pidof glusterd -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} # Test that enabling the option should work on a newly created volume TEST $CLI volume set $V0 cluster.granular-entry-heal on TEST $CLI volume set $V0 cluster.granular-entry-heal off @@ -25,34 +25,6 @@ TEST $CLI volume start $V1 TEST ! $CLI volume heal $V1 granular-entry-heal enable TEST ! $CLI volume heal $V1 granular-entry-heal disable -####################### -###### TIER TEST ###### -####################### -# Execute the same command on a disperse + replicate tiered volume and make -# sure the option is set on the replicate leg of the volume -TEST $CLI volume attach-tier $V1 replica 2 $H0:$B0/${V1}{3,4} -TEST $CLI volume heal $V1 granular-entry-heal enable -EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal -TEST $CLI volume heal $V1 granular-entry-heal disable -EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal - -# Kill a disperse brick and make heal be pending on the volume. -TEST kill_brick $V1 $H0 $B0/${V1}0 - -# Now make sure that one offline brick in disperse does not affect enabling the -# option on the volume. -TEST $CLI volume heal $V1 granular-entry-heal enable -EXPECT "enable" volume_get_field $V1 cluster.granular-entry-heal -TEST $CLI volume heal $V1 granular-entry-heal disable -EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal - -# Now kill a replicate brick. -TEST kill_brick $V1 $H0 $B0/${V1}3 -# Now make sure that one offline brick in replicate causes the command to be -# failed. -TEST ! $CLI volume heal $V1 granular-entry-heal enable -EXPECT "disable" volume_get_field $V1 cluster.granular-entry-heal - ###################### ### REPLICATE TEST ### ###################### @@ -136,7 +108,7 @@ TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID TEST $CLI volume reset $V0 # Ensure that granular entry heal is also disabled EXPECT "no" volume_get_field $V0 cluster.granular-entry-heal -EXPECT "on" volume_get_field $V0 cluster.entry-self-heal +EXPECT "off" volume_get_field $V0 cluster.entry-self-heal cleanup #G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1399038 diff --git a/tests/basic/afr/granular-esh/replace-brick.t b/tests/basic/afr/granular-esh/replace-brick.t index 639ed81b95c..5fc7811a8d8 100644 --- a/tests/basic/afr/granular-esh/replace-brick.t +++ b/tests/basic/afr/granular-esh/replace-brick.t @@ -12,6 +12,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.heal-timeout 5 TEST $CLI volume heal $V0 granular-entry-heal enable TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t new file mode 100644 index 00000000000..3f61f5a0402 --- /dev/null +++ b/tests/basic/afr/halo.t @@ -0,0 +1,61 @@ +#!/bin/bash +#Tests that halo basic functionality works as expected + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_up_child() +{ + if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ]; + then + echo 0 + elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ] + then + echo 1 + fi +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.halo-enabled yes +TEST $CLI volume set $V0 cluster.halo-max-replicas 1 +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]" +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] + +down_id=$((1-up_id)) + +TEST kill_brick $V0 $H0 $B0/${V0}${up_id} +#As max-replicas is configured to be 1, down_child should be up now +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]" + +#Bring the brick back up and the state should be restored +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] +down_id=$((1-up_id)) +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +cleanup; diff --git a/tests/basic/afr/lk-quorum.t b/tests/basic/afr/lk-quorum.t new file mode 100644 index 00000000000..3364d8a6a1b --- /dev/null +++ b/tests/basic/afr/lk-quorum.t @@ -0,0 +1,257 @@ +#!/bin/bash + +SCRIPT_TIMEOUT=300 + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc +cleanup; + +TEST glusterd; +TEST pidof glusterd + +#Tests for quorum-type option for replica 2 +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}; +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; + +TEST touch $M0/a + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'fixed' and quorum-count 2 +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 2 +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should fail +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'fixed' and quorum-count 1 +TEST $CLI volume set $V0 cluster.quorum-count 1 +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should succeed +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#Check locking behavior with quorum 'auto' +TEST $CLI volume set $V0 cluster.quorum-type auto +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#When first brick is down lock/unlock should fail +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#When second brick is down lock/unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +cleanup; +TEST glusterd; +TEST pidof glusterd + +#Tests for replica 3 +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; + +TEST touch $M0/a + +#When all bricks are up, lock and unlock should succeed +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST flock -x $fd1 +TEST fd_close $fd1 + +#When all bricks are down, lock/unlock should fail +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST $CLI volume stop $V0 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#When any of the bricks is down lock/unlock should succeed +#kill first brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +#kill 2nd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#kill 3rd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#When any two of the bricks are down lock/unlock should fail +#kill first,second bricks +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST fd_close $fd1 + +#kill 2nd,3rd bricks +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST fd_close $fd1 + +#kill 1st,3rd brick +TEST fd1=`fd_available` +TEST fd_open $fd1 'w' $M0/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST ! flock -x $fd1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST fd_close $fd1 + +cleanup diff --git a/tests/basic/afr/name-self-heal.t b/tests/basic/afr/name-self-heal.t new file mode 100644 index 00000000000..50fc2ecc6c2 --- /dev/null +++ b/tests/basic/afr/name-self-heal.t @@ -0,0 +1,112 @@ +#!/bin/bash +#Self-heal tests + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc +cleanup; + +#Check that when quorum is not enabled name-heal happens correctly +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume start $V0 +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +TEST touch $M0/a +TEST touch $M0/c +TEST kill_brick $V0 $H0 $B0/brick0 +TEST touch $M0/b +TEST rm -f $M0/a +TEST rm -f $M0/c +TEST touch $M0/c #gfid mismatch case +c_gfid=$(gf_get_gfid_xattr $B0/brick1/c) +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! stat $M0/a +TEST ! stat $B0/brick0/a +TEST ! stat $B0/brick1/a + +TEST stat $M0/b +TEST stat $B0/brick0/b +TEST stat $B0/brick1/b +TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]] + +TEST stat $M0/c +TEST stat $B0/brick0/c +TEST stat $B0/brick1/c +TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] + +cleanup; + +#Check that when quorum is enabled name-heal happens as expected +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2} +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume start $V0 +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +TEST touch $M0/a +TEST touch $M0/c +TEST kill_brick $V0 $H0 $B0/brick0 +TEST touch $M0/b +TEST rm -f $M0/a +TEST rm -f $M0/c +TEST touch $M0/c #gfid mismatch case +c_gfid=$(gf_get_gfid_xattr $B0/brick1/c) +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! stat $M0/a +TEST ! stat $B0/brick0/a +TEST ! stat $B0/brick1/a +TEST ! stat $B0/brick2/a + +TEST stat $M0/b +TEST ! stat $B0/brick0/b #Name heal shouldn't be triggered +TEST stat $B0/brick1/b +TEST stat $B0/brick2/b + +TEST stat $M0/c +TEST stat $B0/brick0/c +TEST stat $B0/brick1/c +TEST stat $B0/brick2/c +TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] + +TEST $CLI volume set $V0 cluster.quorum-type none +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0 +TEST stat $M0/b +TEST stat $B0/brick0/b #Name heal should be triggered +TEST stat $B0/brick1/b +TEST stat $B0/brick2/b +TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]] +TEST $CLI volume set $V0 cluster.quorum-type auto +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +#Missing parent xattrs cases +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST $CLI volume heal $V0 disable +#In cases where a good parent doesn't have pending xattrs and a file, +#name-heal will be triggered +TEST gf_rm_file_and_gfid_link $B0/brick1 c +TEST stat $M0/c +TEST stat $B0/brick0/c +TEST stat $B0/brick1/c +TEST stat $B0/brick2/c +TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]] +cleanup diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t index 252e25468d7..58116ba49f5 100644 --- a/tests/basic/afr/quorum.t +++ b/tests/basic/afr/quorum.t @@ -31,11 +31,7 @@ TEST $CLI volume set $V0 cluster.quorum-count 2 TEST test_write TEST kill_brick $V0 $H0 $B0/${V0}1 TEST ! test_write -EXPECT "abc" cat $M0/b -TEST $CLI volume set $V0 cluster.quorum-reads on -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads TEST ! cat $M0/b -TEST $CLI volume reset $V0 cluster.quorum-reads TEST $CLI volume set $V0 cluster.quorum-type auto EXPECT auto volume_option $V0 cluster.quorum-type @@ -44,11 +40,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 TEST test_write TEST kill_brick $V0 $H0 $B0/${V0}1 TEST ! test_write -EXPECT "abc" cat $M0/b -TEST $CLI volume set $V0 cluster.quorum-reads on -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads TEST ! cat $M0/b -TEST $CLI volume reset $V0 cluster.quorum-reads TEST $CLI volume set $V0 cluster.quorum-type none EXPECT none volume_option $V0 cluster.quorum-type @@ -57,11 +49,6 @@ TEST test_write TEST $CLI volume reset $V0 cluster.quorum-type TEST test_write EXPECT "abc" cat $M0/b -TEST $CLI volume set $V0 cluster.quorum-reads on -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads -EXPECT "abc" cat $M0/b -TEST $CLI volume reset $V0 cluster.quorum-reads - cleanup; TEST glusterd; @@ -86,24 +73,14 @@ TEST $CLI volume set $V0 cluster.quorum-count 3 TEST test_write TEST kill_brick $V0 $H0 $B0/${V0}1 TEST ! test_write -EXPECT "abc" cat $M0/b -TEST $CLI volume set $V0 cluster.quorum-reads on -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads TEST ! cat $M0/b -TEST $CLI volume reset $V0 cluster.quorum-reads - TEST $CLI volume set $V0 cluster.quorum-type auto EXPECT auto volume_option $V0 cluster.quorum-type TEST test_write TEST kill_brick $V0 $H0 $B0/${V0}3 TEST ! test_write -EXPECT "abc" cat $M0/b -TEST $CLI volume set $V0 cluster.quorum-reads on -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-replicate-0 quorum-reads TEST ! cat $M0/b -TEST $CLI volume reset $V0 cluster.quorum-reads - TEST $CLI volume set $V0 cluster.quorum-type none EXPECT none volume_option $V0 cluster.quorum-type diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t new file mode 100644 index 00000000000..256ee2aafce --- /dev/null +++ b/tests/basic/afr/rename-data-loss.t @@ -0,0 +1,72 @@ +#!/bin/bash +#Self-heal tests +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +cd $M0 +TEST `echo "line1" >> file1` +TEST mkdir dir1 +TEST mkdir dir2 +TEST mkdir -p dir1/dira/dirb +TEST `echo "line1">>dir1/dira/dirb/file1` +TEST mkdir delete_me +TEST `echo "line1" >> delete_me/file1` + +#brick0 has witnessed the second write while brick1 is down. +TEST kill_brick $V0 $H0 $B0/brick1 +TEST `echo "line2" >> file1` +TEST `echo "line2" >> dir1/dira/dirb/file1` +TEST `echo "line2" >> delete_me/file1` + +#Toggle the bricks that are up/down. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/brick0 + +#Rename when the 'source' brick0 for data-selfheals is down. +mv file1 file2 +mv dir1/dira dir2 + +#Delete a dir when brick0 is down. +rm -rf delete_me +cd - + +#Bring everything up and trigger heal +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 + +#Remount to avoid reading from caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "line2" tail -1 $M0/file2 +EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 +TEST ! stat $M0/delete_me/file1 +TEST ! stat $M0/delete_me + +anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/brick0/$anon_inode_name ]] +TEST [[ -d $B0/brick1/$anon_inode_name ]] +cleanup diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t index a8c01a0f377..0360db71a2f 100644 --- a/tests/basic/afr/replace-brick-self-heal.t +++ b/tests/basic/afr/replace-brick-self-heal.t @@ -10,7 +10,7 @@ TEST $CLI volume start $V0 TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off - +TEST $CLI volume set $V0 cluster.heal-timeout 5 TEST $CLI volume set $V0 self-heal-daemon off TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t index 2d400563c2e..a741eee6e5e 100644 --- a/tests/basic/afr/resolve.t +++ b/tests/basic/afr/resolve.t @@ -23,6 +23,10 @@ echo abc > g TEST kill_brick $V0 $H0 $B0/${V0}0 rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a +#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI +#which will create .glusterfs folder. +mkdir $B0/${V0}0/.glusterfs && chmod 600 $B0/${V0}0/.glusterfs + TEST $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 #Test that the lookup returns ENOENT instead of ESTALE diff --git a/tests/basic/afr/root-squash-self-heal.t b/tests/basic/afr/root-squash-self-heal.t index c4fab0a35b2..6e12098465a 100644 --- a/tests/basic/afr/root-squash-self-heal.t +++ b/tests/basic/afr/root-squash-self-heal.t @@ -11,6 +11,9 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume set $V0 server.root-squash on +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes --use-readdirp=no $M0 TEST kill_brick $V0 $H0 $B0/${V0}0 diff --git a/tests/basic/afr/self-heal.t b/tests/basic/afr/self-heal.t index e1ac17c2d79..10fb152d046 100644 --- a/tests/basic/afr/self-heal.t +++ b/tests/basic/afr/self-heal.t @@ -10,8 +10,6 @@ AREQUAL_PATH=$(dirname $0)/../../utils AREQUAL_BIN=$AREQUAL_PATH/arequal-checksum CFLAGS="" test "`uname -s`" != "Linux" && { - CFLAGS="$CFLAGS -I$(dirname $0)/../../../contrib/argp-standalone "; - CFLAGS="$CFLAGS -L$(dirname $0)/../../../contrib/argp-standalone -largp "; CFLAGS="$CFLAGS -lintl"; } build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t new file mode 100644 index 00000000000..7c249c4bcbd --- /dev/null +++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t @@ -0,0 +1,124 @@ +#!/bin/bash + +#Test the client side split-brain resolution +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +GET_MDATA_PATH=$(dirname $0)/../../utils +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + +TEST glusterd +TEST pidof glusterd + +count_files () { + ls $1 | wc -l +} + +#Create replica 2 volume +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 1 +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on + +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST mkdir $M0/data +TEST touch $M0/data/file + + +############ Client side healing using favorite-child-policy = mtime ################# +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +mtime1=$(get_mtime $B0/${V0}0/data/file) +mtime2=$(get_mtime $B0/${V0}1/data/file) +if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then + LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +else + LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +fi + +#file will be in split-brain +cat $M0/data/file > /dev/null +EXPECT "1" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime +TEST $CLI volume start $V0 force + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 +cat $M0/data/file > /dev/null +EXPECT "0" echo $? +M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1) +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ] + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1) +B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1) +TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ] +TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ] + +############ Client side directory conservative merge ################# +TEST $CLI volume reset $V0 cluster.favorite-child-policy +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST touch $M0/data/test +files=$(count_files $M0/data) +EXPECT "2" echo $files +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST touch $M0/data/test1 +files=$(count_files $M0/data) +EXPECT "2" echo $files + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#data dir will be in entry split-brain +ls $M0/data > /dev/null +EXPECT "2" echo $? + +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime + +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0 + + +ls $M0/data > /dev/null +EXPECT "0" echo $? + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0 +#Entry Split-brain is gone, but data self-heal is pending on the files +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 + +cat $M0/data/test > /dev/null +cat $M0/data/test1 > /dev/null + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +files=$(count_files $M0/data) +EXPECT "3" echo $files + +TEST force_umount $M0 +TEST rm $GET_MDATA_PATH/get-mdata-xattr + +cleanup diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t deleted file mode 100644 index 0e321c6f095..00000000000 --- a/tests/basic/afr/split-brain-favorite-child-policy.t +++ /dev/null @@ -1,202 +0,0 @@ -#!/bin/bash - -#Test the split-brain resolution CLI commands. -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -cleanup; - -TEST glusterd -TEST pidof glusterd - -#Create replica 2 volume -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} -TEST $CLI volume set $V0 performance.write-behind off -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST $CLI volume set $V0 cluster.entry-self-heal off -TEST $CLI volume set $V0 cluster.data-self-heal off -TEST $CLI volume set $V0 cluster.metadata-self-heal off -TEST $CLI volume start $V0 -TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 -TEST touch $M0/file - -############ Healing using favorite-child-policy = ctime ################# -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file fill in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -# Umount to prevent further FOPS on the file, then find the brick with latest ctime. -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -ctime1=`stat -c "%.Z" $B0/${V0}0/file` -ctime2=`stat -c "%.Z" $B0/${V0}1/file` -if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then - LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -else - LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -fi -TEST $CLI volume set $V0 cluster.favorite-child-policy ctime -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ] -TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ] -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 -cat $M0/file > /dev/null -EXPECT "0" echo $? - -############ Healing using favorite-child-policy = mtime ################# -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file still in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second brick has latest mtime. -LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy mtime -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] - -############ Healing using favorite-child-policy = size ################# -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 - -#file fill in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second brick has the bigger size file. -BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy size -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] - -############ Healing using favorite-child-policy = majority on replica-3 ################# - -#Convert volume to replica-3 -TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 - -TEST $CLI volume set $V0 cluster.quorum-type none -TEST $CLI volume set $V0 cluster.favorite-child-policy none -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST kill_brick $V0 $H0 $B0/${V0}0 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}1 -TEST kill_brick $V0 $H0 $B0/${V0}2 -TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 - -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 -TEST $CLI volume set $V0 cluster.self-heal-daemon on -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 - -#file fill in split-brain -cat $M0/file > /dev/null -EXPECT "1" echo $? - -#We know that the second and third bricks agree with each other. Pick any one of them. -MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) -TEST $CLI volume set $V0 cluster.favorite-child-policy majority -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 -EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 -cat $M0/file > /dev/null -EXPECT "0" echo $? -HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) -TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] - -TEST force_umount $M0 -cleanup diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t index 66275c57207..2e4742fff08 100644 --- a/tests/basic/afr/split-brain-heal-info.t +++ b/tests/basic/afr/split-brain-heal-info.t @@ -47,9 +47,11 @@ SPB_FILES=$(($SPB_FILES + 1)) #### Simulate entry-split-brain TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0 TEST touch $M0/espb/a volume_start_force $V0 TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1 TEST mkdir $M0/espb/a volume_start_force $V0 SPB_FILES=$(($SPB_FILES + 1)) diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t new file mode 100644 index 00000000000..676788fce3f --- /dev/null +++ b/tests/basic/afr/split-brain-healing-ctime.t @@ -0,0 +1,252 @@ +#!/bin/bash + +#Test the split-brain resolution CLI commands. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +function get_replicate_subvol_number { + local filename=$1 + #get_backend_paths + if [ -f $B0/${V0}1/$filename ] + then + echo 0 + elif [ -f $B0/${V0}3/$filename ] + then echo 1 + else + echo -1 + fi +} + +cleanup; + +AREQUAL_PATH=$(dirname $0)/../../utils +GET_MDATA_PATH=$(dirname $0)/../../utils +CFLAGS="" +test "`uname -s`" != "Linux" && { + CFLAGS="$CFLAGS -lintl"; +} +build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +cd $M0 +for i in {1..10} +do + echo "Initial content">>file$i +done + +replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`) +replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`) + +############ Create data split-brain in the files. ########################### +TEST kill_brick $V0 $H0 $B0/${V0}1 +for file in ${!replica_0_files_list[*]} +do + echo "B1 is down">>${replica_0_files_list[$file]} +done +TEST kill_brick $V0 $H0 $B0/${V0}3 +for file in ${!replica_1_files_list[*]} +do + echo "B3 is down">>${replica_1_files_list[$file]} +done + +SMALLER_FILE_SIZE=$(stat -c %s file1) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +TEST kill_brick $V0 $H0 $B0/${V0}2 +for file in ${!replica_0_files_list[*]} +do + echo "B2 is down">>${replica_0_files_list[$file]} + echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]} +done +TEST kill_brick $V0 $H0 $B0/${V0}4 +for file in ${!replica_1_files_list[*]} +do + echo "B4 is down">>${replica_1_files_list[$file]} + echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]} +done + +BIGGER_FILE_SIZE=$(stat -c %s file1) +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 + + +############### Acessing the files should now give EIO. ############################### +TEST ! cat file1 +TEST ! cat file2 +TEST ! cat file3 +TEST ! cat file4 +TEST ! cat file5 +TEST ! cat file6 +TEST ! cat file7 +TEST ! cat file8 +TEST ! cat file9 +TEST ! cat file10 +################### +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3 + +################ Heal file1 using the bigger-file option ############## +$CLI volume heal $V0 split-brain bigger-file /file1 +EXPECT "0" echo $? +EXPECT $BIGGER_FILE_SIZE stat -c %s file1 + +################ Heal file2 using the bigger-file option and its gfid ############## +subvolume=$(get_replicate_subvol_number file2) +if [ $subvolume == 0 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2) +elif [ $subvolume == 1 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2) +fi +GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" +$CLI volume heal $V0 split-brain bigger-file $GFIDSTR +EXPECT "0" echo $? + +################ Heal file3 using the source-brick option ############## +################ Use the brick having smaller file size as source ####### +subvolume=$(get_replicate_subvol_number file3) +if [ $subvolume == 0 ] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3 +elif [ $subvolume == 1 ] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 +fi +EXPECT "0" echo $? +EXPECT $SMALLER_FILE_SIZE stat -c %s file3 + +################ Heal file4 using the source-brick option and it's gfid ############## +################ Use the brick having smaller file size as source ####### +subvolume=$(get_replicate_subvol_number file4) +if [ $subvolume == 0 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR +elif [ $subvolume == 1 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR +fi +EXPECT "0" echo $? +EXPECT $SMALLER_FILE_SIZE stat -c %s file4 + +# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed +# as part of metadata heal. So mtime would be same, hence it can't be healed +# using 'latest-mtime' policy, use 'source-brick' option instead. +################ Heal file5 using the source-brick option ############## +subvolume=$(get_replicate_subvol_number file5) +if [ $subvolume == 0 ] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5 +elif [ $subvolume == 1 ] +then + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5 +fi +EXPECT "0" echo $? + +if [ $subvolume == 0 ] +then + mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) +elif [ $subvolume == 1 ] +then + mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) +fi + +#TODO: To below comparisons on full sub-second resolution + +TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] + +mtime_mount_after_heal=$(stat -c %Y file5) + +TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] + +################ Heal file6 using the source-brick option and its gfid ############## +subvolume=$(get_replicate_subvol_number file6) +if [ $subvolume == 0 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR +elif [ $subvolume == 1 ] +then + GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) + GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" + $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR +fi +EXPECT "0" echo $? + +if [ $subvolume == 0 ] +then + mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) +elif [ $subvolume == 1 ] +then + mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) +fi + +#TODO: To below comparisons on full sub-second resolution + +TEST [ $mtime1_after_heal -eq $mtime2_after_heal ] + +mtime_mount_after_heal=$(stat -c %Y file6) + +TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ] + +################ Heal remaining SB'ed files of replica_0 using B1 as source ############## +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 +EXPECT "0" echo $? + +################ Heal remaining SB'ed files of replica_1 using B3 as source ############## +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 +EXPECT "0" echo $? + +############### Reading the files should now succeed. ############################### +TEST cat file1 +TEST cat file2 +TEST cat file3 +TEST cat file4 +TEST cat file5 +TEST cat file6 +TEST cat file7 +TEST cat file8 +TEST cat file9 +TEST cat file10 + +################ File contents on the bricks must be same. ################################ +TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs) +TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs) + +############### Trying to heal files not in SB should fail. ############################### +$CLI volume heal $V0 split-brain bigger-file /file1 +EXPECT "1" echo $? +$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3 +EXPECT "1" echo $? + +cd - +TEST rm $AREQUAL_PATH/arequal-checksum +TEST rm $GET_MDATA_PATH/get-mdata-xattr +cleanup diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index 8b7eaffb268..315e815eb7e 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -20,13 +20,14 @@ function get_replicate_subvol_number { cleanup; AREQUAL_PATH=$(dirname $0)/../../utils +GET_MDATA_PATH=$(dirname $0)/../../utils CFLAGS="" test "`uname -s`" != "Linux" && { - CFLAGS="$CFLAGS -I$(dirname $0)/../../../contrib/argp-standalone "; - CFLAGS="$CFLAGS -L$(dirname $0)/../../../contrib/argp-standalone -largp "; CFLAGS="$CFLAGS -lintl"; } build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS +build_tester $GET_MDATA_PATH/get-mdata-xattr.c + TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} @@ -34,6 +35,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 ctime off TEST $CLI volume start $V0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 @@ -78,7 +80,6 @@ do done BIGGER_FILE_SIZE=$(stat -c %s file1) - TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 @@ -155,13 +156,13 @@ EXPECT $SMALLER_FILE_SIZE stat -c %s file4 subvolume=$(get_replicate_subvol_number file5) if [ $subvolume == 0 ] then - mtime1=$(stat -c %Y $B0/${V0}1/file5) - mtime2=$(stat -c %Y $B0/${V0}2/file5) + mtime1=$(get_mtime $B0/${V0}1/file5) + mtime2=$(get_mtime $B0/${V0}2/file5) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) elif [ $subvolume == 1 ] then - mtime1=$(stat -c %Y $B0/${V0}3/file5) - mtime2=$(stat -c %Y $B0/${V0}4/file5) + mtime1=$(get_mtime $B0/${V0}3/file5) + mtime2=$(get_mtime $B0/${V0}4/file5) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) fi $CLI volume heal $V0 split-brain latest-mtime /file5 @@ -169,12 +170,12 @@ EXPECT "0" echo $? if [ $subvolume == 0 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file5) - mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file5) + mtime1_after_heal=$(get_mtime $B0/${V0}1/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file5) elif [ $subvolume == 1 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file5) - mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file5) + mtime1_after_heal=$(get_mtime $B0/${V0}3/file5) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file5) fi #TODO: To below comparisons on full sub-second resolution @@ -191,14 +192,14 @@ subvolume=$(get_replicate_subvol_number file6) if [ $subvolume == 0 ] then GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6) - mtime1=$(stat -c %Y $B0/${V0}1/file6) - mtime2=$(stat -c %Y $B0/${V0}2/file6) + mtime1=$(get_mtime $B0/${V0}1/file6) + mtime2=$(get_mtime $B0/${V0}2/file6) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) elif [ $subvolume == 1 ] then GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6) - mtime1=$(stat -c %Y $B0/${V0}3/file6) - mtime2=$(stat -c %Y $B0/${V0}4/file6) + mtime1=$(get_mtime $B0/${V0}3/file6) + mtime2=$(get_mtime $B0/${V0}4/file6) LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2)) fi GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)" @@ -207,12 +208,12 @@ EXPECT "0" echo $? if [ $subvolume == 0 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}1/file6) - mtime2_after_heal=$(stat -c %Y $B0/${V0}2/file6) + mtime1_after_heal=$(get_mtime $B0/${V0}1/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}2/file6) elif [ $subvolume == 1 ] then - mtime1_after_heal=$(stat -c %Y $B0/${V0}3/file6) - mtime2_after_heal=$(stat -c %Y $B0/${V0}4/file6) + mtime1_after_heal=$(get_mtime $B0/${V0}3/file6) + mtime2_after_heal=$(get_mtime $B0/${V0}4/file6) fi #TODO: To below comparisons on full sub-second resolution @@ -256,4 +257,5 @@ EXPECT "1" echo $? cd - TEST rm $AREQUAL_PATH/arequal-checksum +TEST rm $GET_MDATA_PATH/get-mdata-xattr cleanup diff --git a/tests/basic/afr/split-brain-open.t b/tests/basic/afr/split-brain-open.t new file mode 100644 index 00000000000..9b2f2856047 --- /dev/null +++ b/tests/basic/afr/split-brain-open.t @@ -0,0 +1,38 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 + +#Disable self-heal-daemon +TEST $CLI volume heal $V0 disable + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; + +TEST touch $M0/data-split-brain.txt + +#Create data split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 + +`echo "brick1_alive" > $M0/data-split-brain.txt` +TEST [ $? == 0 ]; + +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +`echo "brick0_alive" > $M0/data-split-brain.txt` +TEST [ $? == 0 ]; + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +echo "all-alive" >> $M0/data-split-brain.txt +TEST [ $? != 0 ]; + +cleanup; diff --git a/tests/basic/afr/split-brain-resolution.t b/tests/basic/afr/split-brain-resolution.t index e75e15aaa97..834237c96ec 100644 --- a/tests/basic/afr/split-brain-resolution.t +++ b/tests/basic/afr/split-brain-resolution.t @@ -11,6 +11,9 @@ function get_split_brain_status { TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on TEST $CLI volume start $V0 #Disable self-heal-daemon @@ -71,6 +74,18 @@ TEST setfattr -n replica.split-brain-choice -v none $M0/data-split-brain.txt TEST ! getfattr -n user.test $M0/metadata-split-brain.txt TEST ! cat $M0/data-split-brain.txt +#Check that after timeout fops result in EIO again. +#Set one minute timeout +TEST setfattr -n replica.split-brain-choice-timeout -v 1 $M0/ +TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt +EXPECT "brick1_alive" cat $M0/data-split-brain.txt +TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt +EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt +#Wait until timeout completes and test that the fops fail again +sleep 62 +TEST ! getfattr -n user.test $M0/metadata-split-brain.txt +TEST ! cat $M0/data-split-brain.txt + #Negative test cases should fail TEST ! setfattr -n replica.split-brain-choice -v $V0-client-4 $M0/data-split-brain.txt TEST ! setfattr -n replica.split-brain-heal-finalize -v $V0-client-4 $M0/metadata-split-brain.txt @@ -85,3 +100,6 @@ EXPECT "brick1_alive" cat $M0/data-split-brain.txt EXPECT 0 get_pending_heal_count $V0 cleanup; + +#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 +#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000 diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t new file mode 100644 index 00000000000..c0102c35b7b --- /dev/null +++ b/tests/basic/afr/ta-check-locks.t @@ -0,0 +1,68 @@ +#!/bin/bash +#This test checks if all the locks on +#ta file are being held and released properly + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../thin-arbiter.rc + +function get_lock_count_on_ta() +{ + tapid=`cat $B0/ta.pid` + local sfile=$(generate_statedump $tapid) + count=$(grep "inodelk-count" $sfile | cut -f2 -d'=' | tail -1) + ncount=$(grep "inodelk.inodelk" $sfile | grep "len=1" | wc -l) + echo "count = $count : ncount = $ncount" + if [ "$count" = "" ] + then + count=0 + fi + + if [ "$count" -eq "$ncount" ] + then + echo "$count" + else + echo "-1" + fi +} + +cleanup; +TEST ta_create_brick_and_volfile brick0 +TEST ta_create_brick_and_volfile brick1 +TEST ta_create_ta_and_volfile ta +TEST ta_start_brick_process brick0 +TEST ta_start_brick_process brick1 +TEST ta_start_ta_process ta + +TEST ta_create_mount_volfile brick0 brick1 ta +TEST ta_start_mount_process $M0 +TEST ta_start_mount_process $M1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M1 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta + +TEST ta_create_shd_volfile brick0 brick1 ta +TEST ta_start_shd_process glustershd +shd_pid=$(cat $B0/glustershd.pid) + +TEST touch $M0/a.txt +echo "Hello" >> $M0/a.txt +EXPECT_WITHIN $IO_WAIT_TIMEOUT "0" get_lock_count_on_ta + +TEST ta_kill_brick brick0 +echo "Hello" >> $M0/a.txt +EXPECT_WITHIN $IO_WAIT_TIMEOUT "1" get_lock_count_on_ta + +echo "Hello" >> $M1/a.txt +EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta + +echo "xyz" >> $M0/a.txt +EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta + +chmod 0666 $M0/a.txt +EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta + +TEST ta_start_brick_process brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta + +cleanup; diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t new file mode 100644 index 00000000000..3cfc16b9b8a --- /dev/null +++ b/tests/basic/afr/ta-read.t @@ -0,0 +1,64 @@ +#!/bin/bash + +# Test read transaction logic for thin-arbiter. + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../thin-arbiter.rc +cleanup; +TEST ta_create_brick_and_volfile brick0 +TEST ta_create_brick_and_volfile brick1 +TEST ta_create_ta_and_volfile ta +TEST ta_start_brick_process brick0 +TEST ta_start_brick_process brick1 +TEST ta_start_ta_process ta + +TEST ta_create_mount_volfile brick0 brick1 ta +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta + +TEST touch $M0/FILE +TEST ls $B0/brick0/FILE +TEST ls $B0/brick1/FILE +TEST ! ls $B0/ta/FILE + +# Kill one brick and write to FILE. +TEST ta_kill_brick brick0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 +echo "brick0 down">> $M0/FILE +TEST [ $? -eq 0 ] +EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE +EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2 + +#Umount and mount to remove cached data. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +# Read must be allowed since good brick is up. +TEST cat $M0/FILE + +#Umount and mount to remove cached data. +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +# Toggle good and bad data brick processes. +TEST ta_start_brick_process brick0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 +TEST ta_kill_brick brick1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 +# Read must now fail. +TEST ! cat $M0/FILE + +# Bring all data bricks up, and kill TA. +TEST ta_start_brick_process brick1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +TA_PID=$(ta_get_pid_by_brick_name ta) +TEST [ -n $TA_PID ] +TEST ta_kill_brick ta +TA_PID=$(ta_get_pid_by_brick_name ta) +TEST [ -z $TA_PID ] +# Read must now succeed. +TEST cat $M0/FILE +cleanup; diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t new file mode 100644 index 00000000000..96ecfc678e0 --- /dev/null +++ b/tests/basic/afr/ta-shd.t @@ -0,0 +1,49 @@ +#!/bin/bash +#Self-heal tests + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../thin-arbiter.rc +cleanup; +TEST ta_create_brick_and_volfile brick0 +TEST ta_create_brick_and_volfile brick1 +TEST ta_create_ta_and_volfile ta +TEST ta_start_brick_process brick0 +TEST ta_start_brick_process brick1 +TEST ta_start_ta_process ta + +TEST ta_create_mount_volfile brick0 brick1 ta +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta + +TEST ta_create_shd_volfile brick0 brick1 ta +TEST ta_start_shd_process glustershd + +TEST touch $M0/a.txt +TEST ta_kill_brick brick0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 +echo "Hello" >> $M0/a.txt +EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt +EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 + +#TODO: After the write txn changes are merged, take statedump of TA process and +#check whether AFR_TA_DOM_NOTIFY lock is held by the client here. Take the +#statedump again after line #38 to check AFR_TA_DOM_NOTIFY lock is released by +#the SHD process. + +TEST ta_start_brick_process brick0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 +EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt +EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 + +#Kill the previously up brick and try reading from other brick. Since the heal +#has happened file content should be same. +TEST ta_kill_brick brick1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 +#Umount and mount to remove cached data. +TEST umount $M0 +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT "Hello" cat $M0/a.txt +cleanup; diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t new file mode 100644 index 00000000000..096ca9f47cf --- /dev/null +++ b/tests/basic/afr/ta-write-on-bad-brick.t @@ -0,0 +1,51 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../thin-arbiter.rc +cleanup; +TEST ta_create_brick_and_volfile brick0 +TEST ta_create_brick_and_volfile brick1 +TEST ta_create_ta_and_volfile ta +TEST ta_start_brick_process brick0 +TEST ta_start_brick_process brick1 +TEST ta_start_ta_process ta + +TEST ta_create_mount_volfile brick0 brick1 ta +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta + +TEST touch $M0/a.txt +TEST ls $B0/brick0/a.txt +TEST ls $B0/brick1/a.txt +TEST ! ls $B0/ta/a.txt + +TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + +#Good Data brick is down. TA and bad brick are UP + +TEST ta_kill_brick brick1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 +TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 +TEST ta_kill_brick brick0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 +TEST ta_start_brick_process brick1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + +# Good Data brick is UP. Bad and TA are down +TEST ta_kill_brick brick1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 +TEST ta_start_brick_process brick0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 +TEST ta_kill_brick ta +TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + +# Good and Bad data bricks are UP. TA is down +TEST ta_start_brick_process brick1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 +TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 + +cleanup; diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t new file mode 100644 index 00000000000..05d48431c95 --- /dev/null +++ b/tests/basic/afr/ta.t @@ -0,0 +1,54 @@ +#!/bin/bash +#Self-heal tests + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../thin-arbiter.rc +cleanup; +TEST ta_create_brick_and_volfile brick0 +TEST ta_create_brick_and_volfile brick1 +TEST ta_create_ta_and_volfile ta +TEST ta_start_brick_process brick0 +TEST ta_start_brick_process brick1 +TEST ta_start_ta_process ta + +TEST ta_create_mount_volfile brick0 brick1 ta +TEST ta_start_mount_process $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta + +TEST touch $M0/a.txt +TEST ls $B0/brick0/a.txt +TEST ls $B0/brick1/a.txt +TEST ! ls $B0/ta/a.txt + +TEST ta_kill_brick brick0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST touch $M0/b.txt +EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1 +EXPECT "000000010000000200000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/b.txt +#New entry mark lead to pending data on the file and on ta +EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2 +TEST ! ls $B0/brick0/b.txt +TEST ls $B0/brick1/b.txt + +#Try to create an entry while good brick is down and bad brick is UP. Should not create +TEST ta_start_brick_process brick0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST ta_kill_brick brick1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST ! touch $M0/d.txt +EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2 + +TEST ta_start_brick_process brick1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST ta_kill_brick brick0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 + +TEST ta_kill_brick ta +# Entry create must fail if only one brick is UP, even if that is a good brick. +TEST ! touch $M0/c.txt +TEST ! ls $B0/brick0/c.txt +TEST ! ls $B0/brick1/c.txt + +cleanup; diff --git a/tests/basic/afr/tarissue.t b/tests/basic/afr/tarissue.t index f24d0f74f93..83f7463130c 100644 --- a/tests/basic/afr/tarissue.t +++ b/tests/basic/afr/tarissue.t @@ -4,6 +4,8 @@ . $(dirname $0)/../../volume.rc . $(dirname $0)/../../nfs.rc +#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST + TESTS_EXPECTED_IN_LOOP=10 cleanup; @@ -35,6 +37,3 @@ TEST rm -f /tmp/dir1.tar.gz EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 cleanup; - -#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1337791 -#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1337791 |
