diff options
author | Mohit Agrawal <moagrawal@redhat.com> | 2018-11-23 09:39:43 +0530 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-12-13 04:46:50 +0000 |
commit | fb917bf10b4783d5c669e81a5be1f902ca48cb84 (patch) | |
tree | 81a61c54de77f598c1c23648d5e38a3f0fff54aa /tests | |
parent | 52d3f82db2f032eae1b60ffa2f283109858ce3df (diff) |
[geo-rep]: Worker still ACTIVE after killing bricks
Problem: In changelog xlator after destroying listener it call's
unlink to delete changelog socket file but socket file
reference is not cleaned up from process memory
Solution: 1) To cleanup reference completely from process memory
serialize transport cleanup for changelog and then
unlink socket file
2) Brick xlator will notify GF_EVENT_PARENT_DOWN to next
xlator only after cleanup all xprts
Test: To test the same run below steps
1) Setup some volume and enable brick mux
2) kill anyone brick with gf_attach
3) check changelog socket for specific to killed brick
in lsof, it should cleanup completely
fixes: bz#1600145
Change-Id: Iba06cbf77d8a87b34a60fce50f6d8c0d427fa491
Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/00-geo-rep/bug-1600145.t | 109 | ||||
-rw-r--r-- | tests/bugs/ec/bug-1236065.t | 1 |
2 files changed, 110 insertions, 0 deletions
diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t new file mode 100644 index 00000000000..1d38bf92682 --- /dev/null +++ b/tests/00-geo-rep/bug-1600145.t @@ -0,0 +1,109 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../geo-rep.rc +. $(dirname $0)/../env.rc + +### Basic Tests with Distribute Replicate volumes + +##Cleanup and start glusterd +cleanup; +SCRIPT_TIMEOUT=600 +TEST glusterd; +TEST pidof glusterd + +##Variables +GEOREP_CLI="$CLI volume geo-replication" +master=$GMV0 +SH0="127.0.0.1" +slave=${SH0}::${GSV0} +num_active=2 +num_passive=2 +master_mnt=$M0 +slave_mnt=$M1 + +############################################################ +#SETUP VOLUMES AND GEO-REPLICATION +############################################################ + +##create_and_start_master_volume +TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2}; +gluster v set all cluster.brick-multiplex on +TEST $CLI volume start $GMV0 + +##create_and_start_slave_volume +TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2}; +TEST $CLI volume start $GSV0 + +##Create, start and mount meta_volume +TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3}; +TEST $CLI volume start $META_VOL +TEST mkdir -p $META_MNT +TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT + +############################################################ +#BASIC GEO-REPLICATION TESTS +############################################################ + +#Create geo-rep session +TEST create_georep_session $master $slave + +#Config gluster-command-dir +TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + +#Config gluster-command-dir +TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} + +#Enable_metavolume +TEST $GEOREP_CLI $master $slave config use_meta_volume true + +#Wait for common secret pem file to be created +EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file + +#Verify the keys are distributed + +EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed + +#Count no. of changelog socket +brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'` +n=$(grep -Fc "changelog" /proc/$brick_pid/net/unix) + +#Start_georep +TEST $GEOREP_CLI $master $slave start + +EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active" +EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Passive" + +#Count no. of changelog socket +brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'` +c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix) +let expected=n+2 +TEST [ "$c" -eq "$expected" ] + +#Kill the "Active" brick +brick=$($GEOREP_CLI $master $slave status | grep -F "Active" | awk {'print $3'}) +cat /proc/$brick_pid/net/unix | grep "changelog" +TEST kill_brick $GMV0 $H0 $brick +#Expect geo-rep status to be "Faulty" +EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Faulty" +EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active" + +#Count no. of changelog socket +brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'` +cat /proc/$brick_pid/net/unix | grep "changelog" +ls -lrth /proc/$brick_pid/fd | grep "socket" +c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix) +TEST [ "$c" -eq "$n" ] + +#Stop Geo-rep +TEST $GEOREP_CLI $master $slave stop + +#Delete Geo-rep +TEST $GEOREP_CLI $master $slave delete + +#Cleanup authorized keys +sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys +sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys + +cleanup; diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t index 9395aa33e8c..76d25d739fa 100644 --- a/tests/bugs/ec/bug-1236065.t +++ b/tests/bugs/ec/bug-1236065.t @@ -2,6 +2,7 @@ . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc +SCRIPT_TIMEOUT=400 cleanup |