diff options
| -rw-r--r-- | geo-replication/syncdaemon/resource.py | 22 | ||||
| -rw-r--r-- | tests/00-geo-rep/georep-stderr-hang.t | 128 | ||||
| -rw-r--r-- | tests/geo-rep.rc | 17 | 
3 files changed, 163 insertions, 4 deletions
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 522279bb7e1..b16db607967 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -1540,15 +1540,29 @@ class SSH(object):          p0.stdin.close()          p0.stdout.close()  # Allow p0 to receive a SIGPIPE if p1 exits. -        # wait for tar to terminate, collecting any errors, further -        # waiting for transfer to complete -        _, stderr1 = p1.communicate()          # stdin and stdout of p0 is already closed, Reset to None and          # wait for child process to complete          p0.stdin = None          p0.stdout = None -        p0.communicate() + +        def wait_for_tar(p0): +            _, stderr = p0.communicate() +            if log_err: +                for errline in stderr.strip().split("\n")[:-1]: +                    if "No such file or directory" not in errline: +                        logging.error(lf("SYNC Error", +                                         sync_engine="Tarssh", +                                         error=errline)) + +        t = syncdutils.Thread(target=wait_for_tar, args=(p0, )) +        # wait for tar to terminate, collecting any errors, further +        # waiting for transfer to complete +        t.start() + +        # wait for ssh process +        _, stderr1 = p1.communicate() +        t.join()          if log_err:              for errline in stderr1.strip().split("\n")[:-1]: diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t new file mode 100644 index 00000000000..496f0e6577d --- /dev/null +++ b/tests/00-geo-rep/georep-stderr-hang.t @@ -0,0 +1,128 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../geo-rep.rc +. $(dirname $0)/../env.rc + +SCRIPT_TIMEOUT=500 + +AREQUAL_PATH=$(dirname $0)/../utils +test "`uname -s`" != "Linux" && { +    CFLAGS="$CFLAGS -lintl"; +} +build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS + +### Basic Tests with Distribute Replicate volumes + +##Cleanup and start glusterd +cleanup; +TEST glusterd; +TEST pidof glusterd + + +##Variables +GEOREP_CLI="$CLI volume geo-replication" +master=$GMV0 +SH0="127.0.0.1" +slave=${SH0}::${GSV0} +num_active=2 +num_passive=2 +master_mnt=$M0 +slave_mnt=$M1 + +############################################################ +#SETUP VOLUMES AND GEO-REPLICATION +############################################################ + +##create_and_start_master_volume +TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1; +TEST $CLI volume start $GMV0 + +##create_and_start_slave_volume +TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1; +TEST $CLI volume start $GSV0 +TEST $CLI volume set $GSV0 performance.stat-prefetch off +TEST $CLI volume set $GSV0 performance.quick-read off +TEST $CLI volume set $GSV0 performance.readdir-ahead off +TEST $CLI volume set $GSV0 performance.read-ahead off + +##Mount master +TEST glusterfs -s $H0 --volfile-id $GMV0 $M0 + +##Mount slave +TEST glusterfs -s $H0 --volfile-id $GSV0 $M1 + +############################################################ +#BASIC GEO-REPLICATION TESTS +############################################################ + +TEST create_georep_session $master $slave +EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created" + +#Config gluster-command-dir +TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR} + +#Config gluster-command-dir +TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR} + +#Set changelog roll-over time to 45 secs +TEST $CLI volume set $GMV0 changelog.rollover-time 45 + +#Wait for common secret pem file to be created +EXPECT_WITHIN $GEO_REP_TIMEOUT  0 check_common_secret_file + +#Verify the keys are distributed +EXPECT_WITHIN $GEO_REP_TIMEOUT  0 check_keys_distributed + +#Set sync-jobs to 1 +TEST $GEOREP_CLI $master $slave config sync-jobs 1 + +#Start_georep +TEST $GEOREP_CLI $master $slave start + +touch $M0 +EXPECT_WITHIN $GEO_REP_TIMEOUT  1 check_status_num_rows "Active" +EXPECT_WITHIN $GEO_REP_TIMEOUT  1 check_status_num_rows "Changelog Crawl" + +#Check History Crawl. +TEST $GEOREP_CLI $master $slave stop +TEST create_data_hang "rsync_hang" +TEST create_data "history_rsync" +TEST $GEOREP_CLI $master $slave start +EXPECT_WITHIN $GEO_REP_TIMEOUT  1 check_status_num_rows "Active" + +#Verify arequal for whole volume +EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + +#Stop Geo-rep +TEST $GEOREP_CLI $master $slave stop + +#Config tarssh as sync-engine +TEST $GEOREP_CLI $master $slave config sync-method tarssh + +#Create tarssh hang data +TEST create_data_hang "tarssh_hang" +TEST create_data "history_tar" + +TEST $GEOREP_CLI $master $slave start +EXPECT_WITHIN $GEO_REP_TIMEOUT  1 check_status_num_rows "Active" + +#Verify arequal for whole volume +EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt} + +#Stop Geo-rep +TEST $GEOREP_CLI $master $slave stop + +#Delete Geo-rep +TEST $GEOREP_CLI $master $slave delete + +#Cleanup are-equal binary +TEST rm $AREQUAL_PATH/arequal-checksum + +#Cleanup authorized keys +sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys +sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys + +cleanup; +#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000 diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc index 2035b9fe106..e4f014eb6f8 100644 --- a/tests/geo-rep.rc +++ b/tests/geo-rep.rc @@ -101,6 +101,23 @@ function create_data()      chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ  } +function create_data_hang() +{ +    prefix=$1 +    mkdir ${master_mnt}/${prefix} +    cd ${master_mnt}/${prefix} +    # ~1k files is required with 1 sync-job and hang happens if +    # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k). +    # 64k is hit when ~800 files were  not found while syncing data +    # from master. So around 1k files is required to hit the condition. +    for i in {1..1000} +    do +        echo "test data" > file$i +        mv -f file$i file +    done +    cd - +} +  function chown_file_ok()  {      local file_owner=$(stat --format "%u:%g" "$1")  | 
