diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-05-02 18:45:44 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-05-25 11:55:08 -0700 |
commit | 2f29065ae4715c9c4a9d20c4d15311bebd3ddb0e (patch) | |
tree | 163497880075bfcc2b97f92be2d2fb86ed95ab51 /tests/basic | |
parent | f8f16595d8dd8c8a869630bb77b7fd1b42b97e08 (diff) |
afr: Automagic unsplit-brain by [ctime|mtime|size|majority]
Introduce cluster.favorite-child-policy which when enabled with
[ctime|mtime|size|majority], automatically heals files that are in
split-brian.
The majority policy will not pick a source if there is no majority.
The other three policies pick the first brick with a valid reply and
non-zero ctime/mtime/size as source.
Change-Id: I3c099a0404082213860f74f2c9b4d207cfaedb76
BUG: 1328224
Original-author: Richard Wareing <rwareing@fb.com>
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/14026
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anuradha Talur <atalur@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'tests/basic')
-rw-r--r-- | tests/basic/afr/split-brain-favorite-child-policy.t | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t new file mode 100644 index 00000000000..66fcd67a031 --- /dev/null +++ b/tests/basic/afr/split-brain-favorite-child-policy.t @@ -0,0 +1,175 @@ +#!/bin/bash + +#Test the split-brain resolution CLI commands. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +#Create replica 2 volume +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST touch $M0/file + +############ Healing using favorite-child-policy = ctime ################# +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 + +#file fill in split-brain +cat $M0/file > /dev/null +EXPECT "1" echo $? + +#We know that the first brick has latest ctime. +LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +TEST $CLI volume set $V0 cluster.favorite-child-policy ctime +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +cat $M0/file > /dev/null +EXPECT "0" echo $? +HEALED_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] + +############ Healing using favorite-child-policy = mtime ################# +TEST $CLI volume set $V0 cluster.favorite-child-policy none +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 + +#file still in split-brain +cat $M0/file > /dev/null +EXPECT "1" echo $? + +#We know that the second brick has latest mtime. +LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +TEST $CLI volume set $V0 cluster.favorite-child-policy mtime +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +cat $M0/file > /dev/null +EXPECT "0" echo $? +HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ] + +############ Healing using favorite-child-policy = size ################# +TEST $CLI volume set $V0 cluster.favorite-child-policy none +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 + +#file fill in split-brain +cat $M0/file > /dev/null +EXPECT "1" echo $? + +#We know that the second brick has the bigger size file. +BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +TEST $CLI volume set $V0 cluster.favorite-child-policy size +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +cat $M0/file > /dev/null +EXPECT "0" echo $? +HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] + +############ Healing using favorite-child-policy = majority on replica-3 ################# + +#Convert volume to replica-3 +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +TEST $CLI volume set $V0 cluster.quorum-type none +TEST $CLI volume set $V0 cluster.favorite-child-policy none +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240 + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 + +#file fill in split-brain +cat $M0/file > /dev/null +EXPECT "1" echo $? + +#We know that the second and third bricks agree with each other. Pick any one of them. +MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1) +TEST $CLI volume set $V0 cluster.favorite-child-policy majority +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +cat $M0/file > /dev/null +EXPECT "0" echo $? +HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1) +TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] + +TEST force_umount $M0 +cleanup |