diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2019-05-21 10:58:44 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-05-24 15:00:28 +0000 |
commit | f6c0b59725615da10435c40fec0f26dae542de74 (patch) | |
tree | b5ebd98ba71c8da48a09cb26bc987f22d09fee21 /tests | |
parent | 0b83b8af60916dcdb850f15da7e7b406809dd1d5 (diff) |
tests: Fix spurious failures in ta-write-on-bad-brick.t
Problem:
afr_child_up_status_meta works only when LOOKUP on $M0 is successful.
There are cases where quorum is not met and LOOKUP fails on $M0 which
leads to failures similar to:
grep: /mnt/glusterfs/0/.meta/graphs/active/patchy-replicate-0/private: Transport endpoint is not connected
This was happening once in a while based on attribute-timeout and
md-cache not serving the lookup.
Fix:
Find child-up status based on statedump instead. Also changed mount
options to include --entry-timeout=0 and --attribute-timeout=0
updates bz#1193929
Change-Id: Ic0de72c3006d7399a5feb3e4d10d4748949b2ab3
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/basic/afr/ta-read.t | 10 | ||||
-rw-r--r-- | tests/basic/afr/ta-shd.t | 6 | ||||
-rw-r--r-- | tests/basic/afr/ta-write-on-bad-brick.t | 14 | ||||
-rw-r--r-- | tests/thin-arbiter.rc | 22 | ||||
-rw-r--r-- | tests/volume.rc | 1 |
5 files changed, 36 insertions, 17 deletions
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t index d6f9332d757..3cfc16b9b8a 100644 --- a/tests/basic/afr/ta-read.t +++ b/tests/basic/afr/ta-read.t @@ -25,7 +25,7 @@ TEST ! ls $B0/ta/FILE # Kill one brick and write to FILE. TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 echo "brick0 down">> $M0/FILE TEST [ $? -eq 0 ] EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE @@ -35,7 +35,7 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST ta_start_mount_process $M0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 # Read must be allowed since good brick is up. TEST cat $M0/FILE @@ -45,15 +45,15 @@ TEST ta_start_mount_process $M0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 # Toggle good and bad data brick processes. TEST ta_start_brick_process brick0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 # Read must now fail. TEST ! cat $M0/FILE # Bring all data bricks up, and kill TA. TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 TA_PID=$(ta_get_pid_by_brick_name ta) TEST [ -n $TA_PID ] TEST ta_kill_brick ta diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t index bb2e58b3f77..96ecfc678e0 100644 --- a/tests/basic/afr/ta-shd.t +++ b/tests/basic/afr/ta-shd.t @@ -22,7 +22,7 @@ TEST ta_start_shd_process glustershd TEST touch $M0/a.txt TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 echo "Hello" >> $M0/a.txt EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 @@ -33,14 +33,14 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/ #the SHD process. TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 #Kill the previously up brick and try reading from other brick. Since the heal #has happened file content should be same. TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 #Umount and mount to remove cached data. TEST umount $M0 TEST ta_start_mount_process $M0 diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t index 18cb65b3a76..096ca9f47cf 100644 --- a/tests/basic/afr/ta-write-on-bad-brick.t +++ b/tests/basic/afr/ta-write-on-bad-brick.t @@ -26,26 +26,26 @@ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 #Good Data brick is down. TA and bad brick are UP TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0 TEST ta_start_brick_process brick1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 # Good Data brick is UP. Bad and TA are down TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1 TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST ta_kill_brick ta TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5 # Good and Bad data bricks are UP. TA is down TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0 TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5 cleanup; diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc index 72202eeb53f..29edeb1222b 100644 --- a/tests/thin-arbiter.rc +++ b/tests/thin-arbiter.rc @@ -173,7 +173,7 @@ function ta_start_mount_process() { mkdir -p $1 identifier=$(echo $1 | tr / .) - if glusterfs -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1 + if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1 then cat $B0/$identifier.pid else @@ -182,6 +182,13 @@ function ta_start_mount_process() fi } +function ta_get_mount_pid() +{ + local mount_path=$1 + identifier=$(echo $mount_path | tr / .) + cat $B0/${identifier}.pid +} + function ta_create_mount_volfile() { local b0=$B0/$1 @@ -607,3 +614,16 @@ function ta_start_shd_process() return 1 fi } + +function ta_mount_child_up_status() +{ + local mount_path=$1 + #brick_id is (brick-num in volume info - 1) + local vol=$2 + local brick_id=$3 + local pid=$(ta_get_mount_pid $mount_path) + local fpath=$(generate_statedump $pid) + up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=') + rm -f $fpath + echo "$up" +} diff --git a/tests/volume.rc b/tests/volume.rc index f652a5b0099..af7690ba5ff 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -138,7 +138,6 @@ function wait_statedump_ready { } function generate_statedump { - local fpath="" pid=$1 #remove old stale statedumps cleanup_statedump $pid |