9 files changed, 301 insertions, 115 deletions
diff --git a/tests/bugs/core/brick-mux-fd-cleanup.t b/tests/bugs/core/brick-mux-fd-cleanup.t
new file mode 100644
index 00000000000..de11c177b8a
--- /dev/null
+++ b/tests/bugs/core/brick-mux-fd-cleanup.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests that the fds from client are closed on brick when gluster volume
+#stop is executed in brick-mux setup.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+function keep_fd_open {
+#This function has to be run as background job because opening the fd in
+#foreground and running commands is leading to flush calls on these fds
+#which is making it very difficult to create the race where fds will be left
+#open even after the brick dies.
+    exec 5>$M1/a
+    exec 6>$M1/b
+    while [ -f $M0/a ]; do sleep 1; done
+}
+
+function count_open_files {
+    local brick_pid="$1"
+    local pattern="$2"
+    ls -l /proc/$brick_pid/fd | grep -i "$pattern" | wc -l
+}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{2,3}
+#Have same configuration on both bricks so that they are multiplexed
+#Delay flush fop for a second
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume heal $V1 disable
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable flush
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+TEST $CLI volume set $V1 delay-gen posix
+TEST $CLI volume set $V1 delay-gen.enable flush
+TEST $CLI volume set $V1 delay-gen.delay-percentage 100
+TEST $CLI volume set $V1 delay-gen.delay-duration 1000000
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0
+TEST $GFS -s $H0 --volfile-id=$V1 --direct-io-mode=enable $M1
+
+TEST touch $M0/a
+keep_fd_open &
+TEST $CLI volume profile $V1 start
+brick_pid=$(get_brick_pid $V1 $H0 $B0/${V1}2)
+TEST count_open_files $brick_pid "$B0/${V1}2/a"
+TEST count_open_files $brick_pid "$B0/${V1}2/b"
+TEST count_open_files $brick_pid "$B0/${V1}3/a"
+TEST count_open_files $brick_pid "$B0/${V1}3/b"
+
+#If any other flush fops are introduced into the system other than the one at
+#cleanup it interferes with the race, so test for it
+EXPECT "^0$" echo "$($CLI volume profile $V1 info incremental | grep -i flush | wc -l)"
+#Stop the volume
+TEST $CLI volume stop $V1
+
+#Wait for cleanup resources or volume V1
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/b"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/b"
+
+TEST rm -f $M0/a #Exit keep_fd_open()
+wait
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup
diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
index 6351ba22511..a1b9a851bf7 100755
--- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
+++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
@@ -3,6 +3,8 @@
 . $(dirname $0)/../../volume.rc
 cleanup;
 
+FILE_COUNT=500
+
 TEST glusterd
 TEST pidof glusterd
 TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
@@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100
 TEST $CLI volume start $V0
 
 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
-touch $M0/file{1..200}
-
+for i in `seq 1 $FILE_COUNT`;  do touch $M0/file$i; done
 TEST kill_brick $V0 $H0 $B0/${V0}1
-for i in {1..200}; do echo hello>$M0/file$i; done
+for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
 
-EXPECT "200" get_pending_heal_count $V0
+EXPECT "$FILE_COUNT" get_pending_heal_count $V0
 TEST $CLI volume set $V0 self-heal-daemon on
 
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
index 2179630202d..2793d7008e1 100644
--- a/tests/bugs/core/bug-1432542-mpx-restart-crash.t
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -1,12 +1,14 @@
 #!/bin/bash
 
-SCRIPT_TIMEOUT=300
+SCRIPT_TIMEOUT=800
 
 . $(dirname $0)/../../include.rc
 . $(dirname $0)/../../volume.rc
 . $(dirname $0)/../../traps.rc
 
-NUM_VOLS=20
+cleanup;
+
+NUM_VOLS=15
 MOUNT_BASE=$(dirname $M0)
 
 # GlusterD reports that bricks are started when in fact their attach requests
@@ -40,7 +42,7 @@ create_volume () {
 	local vol_name=$(printf "%s-vol%02d" $V0 $1)
 
 	local brick_base=$(get_brick_base $1)
-	local cmd="$CLI volume create $vol_name replica 2"
+	local cmd="$CLI volume create $vol_name replica 3"
 	local b
 	for b in $(seq 0 5); do
 		local this_brick=${brick_base}/brick$b
@@ -50,7 +52,7 @@ create_volume () {
 	TEST $cmd
 	TEST $CLI volume start $vol_name
 	# check for 6 bricks and 1 shd daemon to be up and running
-        EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks $vol_name
+        EXPECT_WITHIN 120 7 count_up_bricks $vol_name
 	local mount_point=$(get_mount_point $1)
 	mkdir -p $mount_point
 	TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
@@ -77,12 +79,27 @@ TEST $CLI volume set all cluster.brick-multiplex on
 # Our infrastructure can't handle an arithmetic expression here.  The formula
 # is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
 # NUM_VOLS-1 and there are 5 such statements in each iteration.
-TESTS_EXPECTED_IN_LOOP=95
+TESTS_EXPECTED_IN_LOOP=84
 for i in $(seq 1 $NUM_VOLS); do
+        starttime="$(date +%s)";
+
 	create_volume $i
 	TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+        # Unmounting to reduce memory footprint on regression hosts
+        mnt_point=$(get_mount_point $i)
+        EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+        endtime=$(expr $(date +%s) - $starttime)
+
+        echo "Memory Used after $i volumes : $(pmap -x $(pgrep glusterfsd) | grep total)"
+        echo "Thread Count after $i volumes: $(ps -T -p $(pgrep glusterfsd) | wc -l)"
+        echo "Time taken                   : ${endtime} seconds"
 done
 
+echo "=========="
+echo "List of all the threads in the Brick process"
+ps -T -p $(pgrep glusterfsd)
+echo "=========="
+
 # Kill glusterd, and wait a bit for all traces to disappear.
 TEST killall -9 glusterd
 sleep 5
diff --git a/tests/bugs/core/bug-1650403.t b/tests/bugs/core/bug-1650403.t
new file mode 100644
index 00000000000..43d09bc8bd9
--- /dev/null
+++ b/tests/bugs/core/bug-1650403.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=500
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=5
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried.  That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail.  Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate.  After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures.  Arguably,
+# those are spurious because we will eventually catch up.  We're just not
+# ready *yet*.  More to the point, even if the errors aren't spurious that's
+# not what we're testing right now.  Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+	printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+	printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+        vol=$1;
+        $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+	local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+	local brick_base=$(get_brick_base $1)
+	local cmd="$CLI volume create $vol_name replica 3"
+	local b
+	for b in $(seq 0 5); do
+		local this_brick=${brick_base}/brick$b
+		mkdir -p $this_brick
+		cmd="$cmd $H0:$this_brick"
+	done
+	TEST $cmd
+	TEST $CLI volume start $vol_name
+	# check for 6 bricks and 1 shd daemon to be up and running
+        EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+	local mount_point=$(get_mount_point $1)
+	mkdir -p $mount_point
+	TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+	local v
+	for v in $(seq 1 $NUM_VOLS); do
+		local mount_point=$(get_mount_point $v)
+		force_umount $mount_point
+		rm -rf $mount_point
+		local vol_name=$(printf "%s-vol%02d" $V0 $v)
+		$CLI volume stop $vol_name
+		$CLI volume delete $vol_name
+		rm -rf $(get_brick_base $1) &
+	done &> /dev/null
+	wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here.  The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=24
+for i in $(seq 1 $NUM_VOLS); do
+	create_volume $i
+	TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+        # Unmounting to reduce memory footprint on regression hosts
+        mnt_point=$(get_mount_point $i)
+        EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+done
+
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ]
+start=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glustershd process"
+for i in $(seq 1 50); do
+        pmap -x $glustershd_pid | grep total
+        for j in $(seq 1 $NUM_VOLS); do
+                vol_name=$(printf "%s-vol%02d" $V0 $j)
+                gluster v set $vol_name cluster.self-heal-daemon off > /dev/null
+                gluster v set $vol_name cluster.self-heal-daemon on  > /dev/null
+        done
+done
+
+end=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 10M it means some leak in reconfigure
+# code path
+
+TEST [ $diff -lt 10000 ]
+
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
new file mode 100644
index 00000000000..1acbaa8dc0b
--- /dev/null
+++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+        pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V1
+# At the time initialize brick daemon it always keeps open
+# standard fd's (0, 1 , 2) so after stop 1 volume fd's should
+# be open
+nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l)
+TEST [ $((nofds)) -eq 3 ]
+
+cleanup
diff --git a/tests/bugs/core/bug-834465.c b/tests/bugs/core/bug-834465.c
index 61d3deac077..33dd270b112 100644
--- a/tests/bugs/core/bug-834465.c
+++ b/tests/bugs/core/bug-834465.c
@@ -7,55 +7,54 @@
 #include <fcntl.h>
 
 int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
 {
-        int          fd       = -1;
-        char        *filename = NULL;
-        struct flock lock     = {0, };
-        int          i        = 0;
-        int          ret      = -1;
-
-        if (argc != 2) {
-                fprintf (stderr, "Usage: %s <filename> ", argv[0]);
-                goto out;
+    int fd = -1;
+    char *filename = NULL;
+    struct flock lock = {
+        0,
+    };
+    int i = 0;
+    int ret = -1;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: %s <filename> ", argv[0]);
+        goto out;
+    }
+
+    filename = argv[1];
+
+    fd = open(filename, O_RDWR | O_CREAT, 0);
+    if (fd < 0) {
+        fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+        goto out;
+    }
+
+    lock.l_type = F_WRLCK;
+    lock.l_whence = SEEK_SET;
+    lock.l_start = 1;
+    lock.l_len = 1;
+
+    while (i < 100) {
+        lock.l_type = F_WRLCK;
+        ret = fcntl(fd, F_SETLK, &lock);
+        if (ret < 0) {
+            fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+            goto out;
         }
 
-        filename = argv[1];
-
-        fd = open (filename, O_RDWR | O_CREAT, 0);
-        if (fd < 0) {
-                fprintf (stderr, "open (%s) failed (%s)\n", filename,
-                         strerror (errno));
-                goto out;
+        lock.l_type = F_UNLCK;
+        ret = fcntl(fd, F_SETLK, &lock);
+        if (ret < 0) {
+            fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+            goto out;
         }
 
-        lock.l_type = F_WRLCK;
-        lock.l_whence = SEEK_SET;
-        lock.l_start = 1;
-        lock.l_len = 1;
-
-        while (i < 100) {
-                lock.l_type = F_WRLCK;
-                ret = fcntl (fd, F_SETLK, &lock);
-                if (ret < 0) {
-                        fprintf (stderr, "fcntl setlk failed (%s)\n",
-                                        strerror (errno));
-                        goto out;
-                }
-
-                lock.l_type = F_UNLCK;
-                ret = fcntl (fd, F_SETLK, &lock);
-                if (ret < 0) {
-                        fprintf (stderr, "fcntl setlk failed (%s)\n",
-                                        strerror (errno));
-                        goto out;
-                }
-
-                i++;
-        }
+        i++;
+    }
 
-        ret = 0;
+    ret = 0;
 
 out:
-        return ret;
+    return ret;
 }
diff --git a/tests/bugs/core/bug-927616.t b/tests/bugs/core/bug-927616.t
index 6bb64743183..18257131ac7 100755
--- a/tests/bugs/core/bug-927616.t
+++ b/tests/bugs/core/bug-927616.t
@@ -3,6 +3,8 @@
 . $(dirname $0)/../../include.rc
 . $(dirname $0)/../../nfs.rc
 
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
 cleanup;
 
 TEST glusterd
diff --git a/tests/bugs/core/io-stats-1322825.t b/tests/bugs/core/io-stats-1322825.t
index d232ecb2420..53f2d040daa 100755
--- a/tests/bugs/core/io-stats-1322825.t
+++ b/tests/bugs/core/io-stats-1322825.t
@@ -23,7 +23,7 @@ TEST $CLI volume profile $V0 start
 TEST mkdir $M0/dir1
 
 # Generate the stat dump across the io-stat instances
-TEST setfattr -n trusted.io-stats-dump -v /tmp/io-stats-1322825 $M0
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0
 
 # Check if $M0 is clean w.r.t xattr information
 # TODO: if there are better ways to check we really get no attr error, please
@@ -42,12 +42,12 @@ ret=$(echo $?)
 EXPECT 0 echo $ret
 
 # Check if we have 5 io-stat files in /tmp
-EXPECT 5 ls -1 /tmp/io-stats-1322825*
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
 # Cleanup the 5 generated files
-rm -f /tmp/io-stats-1322825*
+rm -f /var/run/gluster/io-stats-1322825*
 
 # Rinse and repeat above for a directory
-TEST setfattr -n trusted.io-stats-dump -v /tmp/io-stats-1322825 $M0/dir1
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0/dir1
 getfattr -n trusted.io-stats-dump $B0/${V0}1/dir1 2>&1 | grep -qi "no such attribute"
 ret=$(echo $?)
 EXPECT 0 echo $ret
@@ -61,7 +61,7 @@ getfattr -n trusted.io-stats-dump $B0/${V0}4/dir1 2>&1 | grep -qi "no such attri
 ret=$(echo $?)
 EXPECT 0 echo $ret
 
-EXPECT 5 ls -1 /tmp/io-stats-1322825*
-rm -f /tmp/io-stats-1322825*
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
+rm -f /var/run/gluster/io-stats-1322825*
 
 cleanup;
diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t
deleted file mode 100644
index c5bbbdad852..00000000000
--- a/tests/bugs/core/multiplex-limit-issue-151.t
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-
-. $(dirname $0)/../../include.rc
-. $(dirname $0)/../../traps.rc
-. $(dirname $0)/../../volume.rc
-
-function count_up_bricks {
-        $CLI --xml volume status all | grep '<status>1' | wc -l
-}
-
-function count_brick_processes {
-	pgrep glusterfsd | wc -l
-}
-
-function count_brick_pids {
-        $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
-                                     | grep -v "N/A" | sort | uniq | wc -l
-}
-
-cleanup;
-
-TEST glusterd
-
-TEST $CLI volume set all cluster.brick-multiplex on
-TEST ! $CLI volume set all cluster.max-bricks-per-process -1
-TEST ! $CLI volume set all cluster.max-bricks-per-process foobar
-TEST $CLI volume set all cluster.max-bricks-per-process 3
-
-push_trapfunc "$CLI volume set all cluster.brick-multiplex off"
-push_trapfunc "cleanup"
-
-TEST $CLI volume create $V0 $H0:$B0/brick{0..5}
-TEST $CLI volume start $V0
-
-EXPECT 2 count_brick_processes
-EXPECT 2 count_brick_pids
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
-
-pkill gluster
-TEST glusterd
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
-
-TEST $CLI volume add-brick $V0 $H0:$B0/brick6
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks
-
-TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start
-TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks