diff options
author | Mohammed Rafi KC <rkavunga@redhat.com> | 2019-05-09 14:07:48 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2019-06-06 11:56:43 +0000 |
commit | 373f045497b3e696ab6492b9c8ea105ffe947b91 (patch) | |
tree | aad2ebe742a5397903266e6f13c35764f7c11743 /tests | |
parent | 76673e983b7eb849011b3fb4417cf1d5f7147352 (diff) |
tests/shd: Add test coverage for shd mux
This patch add more test cases for shd mux test cases
The test case includes
1) Createing multiple volumes to check the attach and detach
of self heal daemon requests.
2) Make sure the healing happens in all sceanarios
3) After a volume detach make sure the threads of the detached
volume is all cleaned.
4) Repeat all the above tests for ec volume
5) Node Reboot case
6) glusterd restart cases
7) Add-brick/remove brick
8) Convert a distributed volume to disperse volume
9) Convert a replicated volume to distributed volume
Change-Id: I7c317ef9d23a45ffd831157e4890d7c83a8fce7b
fixes: bz#1708929
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/basic/glusterd-restart-shd-mux.t | 96 | ||||
-rw-r--r-- | tests/basic/shd-mux.t | 149 | ||||
-rw-r--r-- | tests/basic/volume-scale-shd-mux.t | 112 | ||||
-rw-r--r-- | tests/volume.rc | 15 |
4 files changed, 372 insertions, 0 deletions
diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t new file mode 100644 index 00000000000..a50af9dfa57 --- /dev/null +++ b/tests/basic/glusterd-restart-shd-mux.t @@ -0,0 +1,96 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=20 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 + +for i in $(seq 1 3); do + TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_afr$i + TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +#Stop the glusterd +TEST pkill glusterd +#Only stopping glusterd, so there will be one shd +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count +TEST glusterd +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +shd_pid=$(get_shd_mux_pid $V0) +for i in $(seq 1 3); do + afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path + ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path +done + +#Reboot a node scenario +TEST pkill gluster +#Only stopped glusterd, so there will be one shd +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count + +TEST glusterd +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +shd_pid=$(get_shd_mux_pid $V0) +for i in $(seq 1 3); do + afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid" + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path + ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid" + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path +done + +for i in $(seq 1 3); do + TEST $CLI volume stop ${V0}_afr$i + TEST $CLI volume stop ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}3 + +TEST touch $M0/foo{1..100} + +EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0 + +TEST $CLI volume start ${V0} force + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +TEST rm -rf $M0/* +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + + +TEST $CLI volume stop ${V0} +TEST $CLI volume delete ${V0} + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count + +cleanup diff --git a/tests/basic/shd-mux.t b/tests/basic/shd-mux.t new file mode 100644 index 00000000000..e42a34ab1f7 --- /dev/null +++ b/tests/basic/shd-mux.t @@ -0,0 +1,149 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=16 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +shd_pid=$(get_shd_mux_pid $V0) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +#Create a one more volume +TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5} +TEST $CLI volume start ${V0}_1 + +#Check whether the shd has multiplexed or not +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} + +TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0 +TEST $CLI volume set ${V0}_1 cluster.eager-lock off +TEST $CLI volume set ${V0}_1 performance.flush-behind off +TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10 +TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14 + +TEST touch $M0/foo{1..100} +TEST touch $M1/foo{1..100} + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1 + +TEST $CLI volume start ${V0} force +TEST $CLI volume start ${V0}_1 force + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1 + +TEST rm -rf $M0/* +TEST rm -rf $M1/* +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + +#Stop the volume +TEST $CLI volume stop ${V0}_1 +TEST $CLI volume delete ${V0}_1 + +#Check the stop succeeded and detached the volume with out restarting it +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 + +#Check the thread count become to earlier number after stopping +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + + +#Now create a ec volume and check mux works +TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5} +TEST $CLI volume start ${V0}_2 + +#Check whether the shd has multiplexed or not +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0} + +TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0 +TEST $CLI volume set ${V0}_2 cluster.eager-lock off +TEST $CLI volume set ${V0}_2 performance.flush-behind off +TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20 +TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22 + +TEST touch $M0/foo{1..100} +TEST touch $M1/foo{1..100} + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2 + +TEST $CLI volume start ${V0} force +TEST $CLI volume start ${V0}_2 force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2 + +TEST rm -rf $M0/* +TEST rm -rf $M1/* + + +#Stop the volume +TEST $CLI volume stop ${V0}_2 +TEST $CLI volume delete ${V0}_2 + +#Check the stop succeeded and detached the volume with out restarting it +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 + +#Check the thread count become to zero for ec related threads +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +#Check the thread count become to earlier number after stopping +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +for i in $(seq 1 3); do + TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_afr$i + TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_ec$i +done + +#Check the thread count become to number of volumes*number of ec subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +#Check the thread count become to number of volumes*number of afr subvolume (4*6=24) +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +#Delete the volumes +for i in $(seq 1 3); do + TEST $CLI volume stop ${V0}_afr$i + TEST $CLI volume stop ${V0}_ec$i + TEST $CLI volume delete ${V0}_afr$i + TEST $CLI volume delete ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $CLI volume stop ${V0} +TEST $CLI volume delete ${V0} +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count + +cleanup diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t new file mode 100644 index 00000000000..dd9cf8314e0 --- /dev/null +++ b/tests/basic/volume-scale-shd-mux.t @@ -0,0 +1,112 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=6 + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 performance.flush-behind off +TEST $CLI volume start $V0 + +for i in $(seq 1 2); do + TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_afr$i + TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5} + TEST $CLI volume start ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +#Check the thread count become to number of volumes*number of ec subvolume (2*6=12) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" +#Check the thread count become to number of volumes*number of afr subvolume (3*6=18) +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8}; +#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21) + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +#Remove the brick and check the detach is successful +$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5}; +#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18) + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" + +#Remove the brick and check the detach is successful +$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "__ec_shd_healer_wait" + + +for i in $(seq 1 2); do + TEST $CLI volume stop ${V0}_afr$i + TEST $CLI volume stop ${V0}_ec$i +done + +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}4 + +TEST touch $M0/foo{1..100} + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0 + +TEST $CLI volume start ${V0} force + +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +TEST rm -rf $M0/* +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +shd_pid=$(get_shd_mux_pid $V0) +TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10 +TEST $CLI volume start ${V0}_distribute1 + +#Creating a non-replicate/non-ec volume should not have any effect in shd +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" +EXPECT "^${shd_pid}$" get_shd_mux_pid $V0 + +TEST mkdir $B0/add/ +#Now convert the distributed volume to replicate +TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3} +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +#scale down the volume +TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "__afr_shd_healer_wait" + +TEST $CLI volume stop ${V0} +TEST $CLI volume delete ${V0} +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count + +TEST rm -rf $B0/add/ +TEST mkdir $B0/add/ +#Now convert the distributed volume back to replicate and make sure that a new shd is spawned +TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count +EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "__afr_shd_healer_wait" + +#Now convert the replica volume to distribute again and make sure the shd is now stopped +TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force +TEST rm -rf $B0/add/ + +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count + +cleanup diff --git a/tests/volume.rc b/tests/volume.rc index 7dd829210e3..6477dfb484f 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -929,3 +929,18 @@ function volgen_check_ancestry { echo "N" fi } + +function get_shd_mux_pid { + local volume=$1 + pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'` + echo $pid +} + +function shd_count { + ps aux | grep "glustershd" | grep -v grep | wc -l +} + +function number_healer_threads_shd { + local pid=$(get_shd_mux_pid $1) + pstack $pid | grep $2 | wc -l +} |