diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2019-05-21 10:58:44 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-05-24 15:00:28 +0000 | 
| commit | f6c0b59725615da10435c40fec0f26dae542de74 (patch) | |
| tree | b5ebd98ba71c8da48a09cb26bc987f22d09fee21 | |
| parent | 0b83b8af60916dcdb850f15da7e7b406809dd1d5 (diff) | |
tests: Fix spurious failures in ta-write-on-bad-brick.t
Problem:
afr_child_up_status_meta works only when LOOKUP on $M0 is successful.
There are cases where quorum is not met and LOOKUP fails on $M0 which
leads to failures similar to:
grep: /mnt/glusterfs/0/.meta/graphs/active/patchy-replicate-0/private: Transport endpoint is not connected
This was happening once in a while based on attribute-timeout and
md-cache not serving the lookup.
Fix:
Find child-up status based on statedump instead. Also changed mount
options to include --entry-timeout=0 and --attribute-timeout=0
updates bz#1193929
Change-Id: Ic0de72c3006d7399a5feb3e4d10d4748949b2ab3
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
| -rw-r--r-- | tests/basic/afr/ta-read.t | 10 | ||||
| -rw-r--r-- | tests/basic/afr/ta-shd.t | 6 | ||||
| -rw-r--r-- | tests/basic/afr/ta-write-on-bad-brick.t | 14 | ||||
| -rw-r--r-- | tests/thin-arbiter.rc | 22 | ||||
| -rw-r--r-- | tests/volume.rc | 1 | 
5 files changed, 36 insertions, 17 deletions
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t index d6f9332d757..3cfc16b9b8a 100644 --- a/tests/basic/afr/ta-read.t +++ b/tests/basic/afr/ta-read.t @@ -25,7 +25,7 @@ TEST ! ls $B0/ta/FILE  # Kill one brick and write to FILE.  TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0  echo "brick0 down">> $M0/FILE  TEST [ $? -eq 0 ]  EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE @@ -35,7 +35,7 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0  TEST ta_start_mount_process $M0  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1  # Read must be allowed since good brick is up.  TEST  cat $M0/FILE @@ -45,15 +45,15 @@ TEST ta_start_mount_process $M0  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0  # Toggle good and bad data brick processes.  TEST ta_start_brick_process brick0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0  TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1  # Read must now fail.  TEST ! cat $M0/FILE  # Bring all data bricks up, and kill TA.  TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1  TA_PID=$(ta_get_pid_by_brick_name ta)  TEST [ -n $TA_PID ]  TEST ta_kill_brick ta diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t index bb2e58b3f77..96ecfc678e0 100644 --- a/tests/basic/afr/ta-shd.t +++ b/tests/basic/afr/ta-shd.t @@ -22,7 +22,7 @@ TEST ta_start_shd_process glustershd  TEST touch $M0/a.txt  TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0  echo "Hello" >> $M0/a.txt  EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt  EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2 @@ -33,14 +33,14 @@ EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/  #the SHD process.  TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0  EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt  EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2  #Kill the previously up brick and try reading from other brick. Since the heal  #has happened file content should be same.  TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1  #Umount and mount to remove cached data.  TEST umount $M0  TEST ta_start_mount_process $M0 diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t index 18cb65b3a76..096ca9f47cf 100644 --- a/tests/basic/afr/ta-write-on-bad-brick.t +++ b/tests/basic/afr/ta-write-on-bad-brick.t @@ -26,26 +26,26 @@ TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5  #Good Data brick is down. TA and bad brick are UP  TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1  TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5  TEST ta_kill_brick brick0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0"  ta_mount_child_up_status $M0 $V0 0  TEST ta_start_brick_process brick1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1  TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5  # Good Data brick is UP. Bad and TA are down  TEST ta_kill_brick brick1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0"  ta_mount_child_up_status $M0 $V0 1  TEST ta_start_brick_process brick0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0  TEST ta_kill_brick ta  TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5  # Good and Bad data bricks are UP. TA is down  TEST ta_start_brick_process brick1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0  TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5  cleanup; diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc index 72202eeb53f..29edeb1222b 100644 --- a/tests/thin-arbiter.rc +++ b/tests/thin-arbiter.rc @@ -173,7 +173,7 @@ function ta_start_mount_process()  {          mkdir -p $1          identifier=$(echo $1 | tr / .) -        if glusterfs  -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1 +        if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1          then                  cat $B0/$identifier.pid          else @@ -182,6 +182,13 @@ function ta_start_mount_process()          fi  } +function ta_get_mount_pid() +{ +    local mount_path=$1 +    identifier=$(echo $mount_path | tr / .) +    cat $B0/${identifier}.pid +} +  function ta_create_mount_volfile()  {          local b0=$B0/$1 @@ -607,3 +614,16 @@ function ta_start_shd_process()                  return 1          fi  } + +function ta_mount_child_up_status() +{ +        local mount_path=$1 +        #brick_id is (brick-num in volume info - 1) +        local vol=$2 +        local brick_id=$3 +        local pid=$(ta_get_mount_pid $mount_path) +        local fpath=$(generate_statedump $pid) +        up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=') +        rm -f $fpath +        echo "$up" +} diff --git a/tests/volume.rc b/tests/volume.rc index f652a5b0099..af7690ba5ff 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -138,7 +138,6 @@ function wait_statedump_ready {  }  function generate_statedump { -        local fpath=""          pid=$1          #remove old stale statedumps          cleanup_statedump $pid  | 
