1 files changed, 227 insertions, 42 deletions
diff --git a/tests/include.rc b/tests/include.rc
index 492e35a7b6c..0dc7d830449 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -1,14 +1,19 @@
+
+checkpoint_time="$(date +%s%N)"
+
 M0=${M0:=/mnt/glusterfs/0};   # 0th mount point for FUSE
 M1=${M1:=/mnt/glusterfs/1};   # 1st mount point for FUSE
 M2=${M2:=/mnt/glusterfs/2};   # 2nd mount point for FUSE
+M3=${M3:=/mnt/glusterfs/3};   # 3rd mount point for FUSE
 N0=${N0:=/mnt/nfs/0};         # 0th mount point for NFS
 N1=${N1:=/mnt/nfs/1};         # 1st mount point for NFS
 V0=${V0:=patchy};             # volume name to use in tests
 V1=${V1:=patchy1};            # volume name to use in tests
 GMV0=${GMV0:=master};	      # master volume name to use in geo-rep tests
 GSV0=${GSV0:=slave};	      # slave volume name to use in geo-rep tests
+GSV1=${GSV1:=slave1};	      # slave volume name to use in geo-rep tests
 B0=${B0:=/d/backends};        # top level of brick directories
-WORKDIRS="$B0 $M0 $M1 $M2 $N0 $N1" 
+WORKDIRS="$B0 $M0 $M1 $M2 $M3 $N0 $N1"
 
 ROOT_GFID="00000000-0000-0000-0000-000000000001"
 DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806"
@@ -19,13 +24,20 @@ META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared g
 CC=cc
 OSTYPE=$(uname -s)
 
-ENV_RC=$(dirname $0)/../env.rc
-if [ ! -f $ENV_RC ]; then
-   ENV_RC=$(dirname $0)/../../env.rc
-   if [ ! -f $ENV_RC ]; then
-      ENV_RC=$(dirname $0)/../../../env.rc
-   fi
-fi
+env_dir=$(dirname $0)
+while true; do
+        ENV_RC=${env_dir}/env.rc
+        if [ -f ${ENV_RC} ]; then
+                break
+        fi
+        new_dir=$(dirname $env_dir)
+        if [ x"$new_dir" = x"$old_dir" ]; then
+                ENV_RC="/not/found"
+                break
+        fi
+        old_dir=$env_dir
+        env_dir=$new_dir
+done
 
 if [ ! -f $ENV_RC ]; then
    echo "Aborting." | tee /dev/stderr
@@ -62,13 +74,16 @@ esac
 
 DEBUG=${DEBUG:=0}             # turn on debugging?
 
-PROCESS_UP_TIMEOUT=20
+PROCESS_DOWN_TIMEOUT=5
+PROCESS_UP_TIMEOUT=45
 NFS_EXPORT_TIMEOUT=20
 CHILD_UP_TIMEOUT=20
 PROBE_TIMEOUT=60
-REBALANCE_TIMEOUT=360
+PEER_SYNC_TIMEOUT=20
+REBALANCE_TIMEOUT=600
 REOPEN_TIMEOUT=20
 HEAL_TIMEOUT=80
+IO_HEAL_TIMEOUT=120
 MARKER_UPDATE_TIMEOUT=20
 JANITOR_TIMEOUT=60
 UMOUNT_TIMEOUT=5
@@ -76,13 +91,57 @@ CONFIG_UPDATE_TIMEOUT=5
 AUTH_REFRESH_INTERVAL=10
 GRAPH_SWITCH_TIMEOUT=10
 UNLINK_TIMEOUT=5
+MDC_TIMEOUT=5
+IO_WAIT_TIMEOUT=5
+DISK_FAIL_TIMEOUT=80
 
 LOGDIR=$(gluster --print-logdir)
 
 statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
 
 CLI="gluster --mode=script --wignore";
-GFS="glusterfs --attribute-timeout=0 --entry-timeout=0";
+CLI_NO_FORCE="gluster --mode=script";
+
+# CLI_IGNORE_PARTITION makes sure that the warning related to bricks being on
+# root partition is ignored while running the command in a "no force" mode
+CLI_IGNORE_PARTITION="gluster --mode=script --wignore-partition"
+
+function wait_delay() {
+        local delay="$1"
+        local interval="$2"
+        shift 2
+        local deadline="$(($(date +%s%N) + ${delay}000000000))"
+
+        $*
+        while [[ $? -ne 0 ]]; do
+                if [[ $(date +%s%N) -ge ${deadline} ]]; then
+                        return 1
+                fi
+                sleep ${interval}
+                $*
+        done
+
+        return 0
+}
+
+_GFS () {
+	glusterfs "$@"
+	local mount_ret=$?
+	if [ $mount_ret != 0 ]; then
+		return $mount_ret
+	fi
+	local mount_point=${!#}
+	local i=0
+	while true; do
+		touch $mount_point/xy_zzy 2> /dev/null && break
+		i=$((i+1))
+		[ $i -lt 100 ] || break
+		sleep 0.1
+	done
+	rm -f $mount_point/xy_zzy
+	return $mount_ret
+}
+GFS="_GFS --attribute-timeout=0 --entry-timeout=0";
 
 mkdir -p $WORKDIRS
 
@@ -153,6 +212,7 @@ function test_header()
         dbg "=========================";
         dbg "TEST $t (line $TESTLINE): $*";
         saved_cmd="$*"
+        start_time="$(date +%s%N)"
 }
 
 
@@ -161,16 +221,20 @@ function test_footer()
         RET=$?
         local lineno=$1
         local err=$2
-
+        local end_time
+        local elapsed1
+        local elapsed2
+
+        end_time="$(date +%s%N)"
+        elapsed1="$(((start_time - checkpoint_time) / 1000000))"
+        elapsed2="$(((end_time - start_time) / 1000000))"
+        checkpoint_time="$end_time"
         if [ $RET -eq 0 ]; then
-                echo "ok $t, LINENUM:$lineno";
+                printf "ok %3d [%7d/%7d] <%4d> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd";
         else
-                echo "not ok $t $err, LINENUM:$lineno";
-                # With DEBUG, this was already printed out, so skip it.
-                if [ x"$DEBUG" = x"0" ]; then
-                        echo "FAILED COMMAND: $saved_cmd"
-                fi
+                printf "not ok %3d [%7d/%7d] <%4d> '%s' -> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd" "$err"
                 if [ "$EXIT_EARLY" = "1" ]; then
+			cleanup
                         exit $RET
                 fi
         fi
@@ -226,7 +290,7 @@ function test_expect_not_footer()
                 err="Got \"$a\" when not expecting it"
         fi
 
-        ! [[ "$a" =~ "$e" ]];
+        ! [[ "$a" =~ $e ]];
         test_footer "$lineno" "$err";
 }
 
@@ -282,6 +346,10 @@ function _TEST()
         test_footer "$TESTLINE";
 }
 
+#This function should be used carefully.
+#The expected regex, given to this function, should be
+#used within ^ and $ to match exactly with the output of
+#command.
 function _EXPECT_WITHIN()
 {
         TESTLINE=$1
@@ -297,20 +365,23 @@ function _EXPECT_WITHIN()
         a="";
         shift;
 
-        local endtime=$(( ${timeout}+`date +%s` ))
+        local endtime="$(( ${timeout}000000000 + $(date +%s%N) ))"
+
+        # We *want* this to be globally visible.
+        EW_RETRIES=0
 
-        while [ `date +%s` -lt $endtime ]; do
+        while [[ "$(date +%s%N)" < "$endtime" ]]; do
                 a=$("$@" | tail -1 ; exit ${PIPESTATUS[0]})
                 ## Check command success
                 if [ $? -ne 0 ]; then
                         break;
                 fi
-
                 ## Check match success
-                if [[ "$a" =~ "$e" ]]; then
+                if [[ "$a" =~ $e ]]; then
                         break;
                 fi
-                sleep 1;
+                sleep 0.25;
+                EW_RETRIES=$((EW_RETRIES+1))
         done
 
         if [ "x$e" = "x" ] ; then
@@ -420,8 +491,103 @@ stat -c %s /dev/null > /dev/null 2>&1 || {
   }
 }
 
+function signal_pids() {
+        local sig="$1"
+        shift
+        local pids=($*)
+
+        if [[ ${#pids[@]} -gt 0 ]]; then
+                kill -${sig} ${pids[@]} 2>/dev/null || true
+        fi
+}
+
+function check_pids() {
+        local pids=($*)
+        local tmp=()
+        local pid
+
+        for pid in "${pids[@]}"; do
+                kill -0 "${pid}" 2>/dev/null && tmp+=(${pid})
+        done
+
+        echo "${tmp[@]}"
+}
+
+function pids_alive() {
+        local pids=($*)
+
+        if [[ "$(check_pids ${pids[@]})" != "" ]]; then
+                return 1;
+        fi
+
+        return 0
+}
+
+function terminate_pids() {
+        local pids=($*)
+
+        signal_pids TERM ${pids[@]}
+        wait_delay ${PROCESS_DOWN_TIMEOUT} 0.1 pids_alive ${pids[@]}
+        if [[ $? -ne 0 ]]; then
+                pids=($(check_pids ${pids[@]}))
+                signal_pids KILL ${pids[@]}
+                wait_delay 1 0.1 pids_alive ${pids[@]}
+                if [[ $? -ne 0 ]]; then
+                        return 2
+                fi
+
+                return 1
+        fi
+
+        return 0
+}
+
+function process_pids() {
+        local proc
+        local pids=()
+
+        for proc in $*; do
+                pids+=($(pgrep ${proc}))
+        done
+
+        echo "${pids[@]}"
+}
+
+## Lock files should get automatically removed once "usradd" or "groupadd"
+## command finishes. But sometimes we encounter situations (bugs) where
+## some of these files may not get properly unlocked after the execution of
+## the command. In that case, when we execute useradd next time, it may show
+## the error “cannot lock /etc/password” or “unable to lock group file”.
+## So, to avoid any such errors, check for any lock files under /etc.
+## and remove those.
+
+function remove_lock_files()
+{
+        if [ ! -f /etc/passwd.lock ];
+        then
+                rm -rf /etc/passwd.lock;
+        fi
+
+        if [ ! -f /etc/group.lock ];
+        then
+                rm -rf /etc/group.lock;
+        fi
+
+        if [ ! -f /etc/shadow.lock ];
+        then
+                rm -rf /etc/shadow.lock;
+        fi
+
+        if [ ! -f /etc/gshadow.lock ];
+        then
+                rm -rf /etc/gshadow.lock;
+        fi
+}
+
+
 function cleanup()
 {
+        local end_time
 
         # Prepare flags for umount
         case `uname -s` in
@@ -439,6 +605,9 @@ function cleanup()
                 ;;
         esac
 
+        # Clean up lock files.
+        remove_lock_files
+
         # Clean up all client mounts
         for m in `mount | grep fuse.glusterfs | awk '{print $3}'`; do
                 umount $flag $m
@@ -457,9 +626,10 @@ function cleanup()
         umount $flag /tmp/mnt* 2>/dev/null
 
 
-        # Send SIGKILL to all gluster processes that are still running
-        killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true;
-        test x"$OSTYPE" = x"NetBSD" && pkill -9 perfused rpc.statd || true
+        # Send SIGTERM to all gluster processes and rpc.statd that are still running
+        terminate_pids $(process_pids glusterfs glusterfsd glusterd rpc.statd)
+
+        test x"$OSTYPE" = x"NetBSD" && pkill -9 perfused || true
 
         # unregister nfs and related services from portmapper/rpcbind
         ## nfs
@@ -535,6 +705,7 @@ function cleanup()
         # Complete cleanup time
         rm -rf "$B0/*" "/etc/glusterd/*";
         rm -rf $WORKDIRS
+        find $GLUSTERD_PIDFILEDIR -name "*.pid" | xargs rm -rf
         leftover=""
         for d in $WORKDIRS ; do
                 if test -d $d ; then
@@ -554,26 +725,14 @@ function cleanup()
                 return 1;
         fi >&2
 
-        # tar logs at the start and end of every test
-        if [ -n "$LOGDIR" -a -z "$STOP_WASTING_SPACE" ]
-        then
-                tarname=$(basename $0 .t)
-                tar -rf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \
-                        --exclude="*.tar" \
-                        && \
-                find $LOGDIR/* -maxdepth 0 -name '*.tar' -prune \
-                                        -o -exec rm -rf '{}' ';'
-        else
-                echo "LOGDIR is not set"
-        fi
-
         mkdir -p $WORKDIRS
 	# This is usually the last thing a test script calls, so our return
 	# value becomes their exit value.  While it's not great for the mkdir
 	# above to fail, promoting that into a failure of the whole test (and
 	# thus of an entire regression-test run) seems a bit excessive.  Make
 	# sure we return good status anyway.
-	return 0
+
+        return 0
 }
 
 function force_terminate () {
@@ -808,9 +967,28 @@ useradd --help 2>/dev/null | grep -q -- '--no-create-home' || {
   }
 }
 
+DBG_TEST () {
+        read -p "execute \"$*\"? " x;
+        case $x in
+        'y')
+                _TEST "$@"
+                ;;
+        'q')
+                exit 0
+                ;;
+        *)
+                echo "skipping"
+                ;;
+        esac
+}
+
 alias EXPECT='_EXPECT $LINENO'
 alias EXPECT_NOT='_EXPECT_NOT $LINENO'
-alias TEST='_TEST $LINENO'
+if [ -n "$GF_INTERACTIVE" ]; then
+	alias TEST='DBG_TEST $LINENO'
+else
+	alias TEST='_TEST $LINENO'
+fi
 alias EXPECT_WITHIN='_EXPECT_WITHIN $LINENO'
 alias EXPECT_KEYWORD='_EXPECT_KEYWORD $LINENO'
 alias TEST_IN_LOOP='_TEST_IN_LOOP $LINENO'
@@ -1163,3 +1341,10 @@ function STAT_INO()
                 echo 0
         fi
 }
+
+function get_md5_sum()
+{
+    local file=$1;
+    md5_sum=$(md5sum $file | awk '{print $1}');
+    echo $md5_sum
+}