diff options
Diffstat (limited to 'tests/bugs/core')
| -rw-r--r-- | tests/bugs/core/brick-mux-fd-cleanup.t | 78 | ||||
| -rwxr-xr-x | tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t | 9 | ||||
| -rw-r--r-- | tests/bugs/core/bug-1432542-mpx-restart-crash.t | 27 | ||||
| -rw-r--r-- | tests/bugs/core/bug-1650403.t | 113 | ||||
| -rw-r--r-- | tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t | 33 | ||||
| -rw-r--r-- | tests/bugs/core/bug-834465.c | 85 | ||||
| -rwxr-xr-x | tests/bugs/core/bug-927616.t | 2 | ||||
| -rwxr-xr-x | tests/bugs/core/io-stats-1322825.t | 12 | ||||
| -rw-r--r-- | tests/bugs/core/multiplex-limit-issue-151.t | 57 |
9 files changed, 301 insertions, 115 deletions
diff --git a/tests/bugs/core/brick-mux-fd-cleanup.t b/tests/bugs/core/brick-mux-fd-cleanup.t new file mode 100644 index 00000000000..de11c177b8a --- /dev/null +++ b/tests/bugs/core/brick-mux-fd-cleanup.t @@ -0,0 +1,78 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +#This .t tests that the fds from client are closed on brick when gluster volume +#stop is executed in brick-mux setup. + +cleanup; +TEST glusterd +TEST pidof glusterd + +function keep_fd_open { +#This function has to be run as background job because opening the fd in +#foreground and running commands is leading to flush calls on these fds +#which is making it very difficult to create the race where fds will be left +#open even after the brick dies. + exec 5>$M1/a + exec 6>$M1/b + while [ -f $M0/a ]; do sleep 1; done +} + +function count_open_files { + local brick_pid="$1" + local pattern="$2" + ls -l /proc/$brick_pid/fd | grep -i "$pattern" | wc -l +} + +TEST $CLI volume set all cluster.brick-multiplex on +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{2,3} +#Have same configuration on both bricks so that they are multiplexed +#Delay flush fop for a second +TEST $CLI volume heal $V0 disable +TEST $CLI volume heal $V1 disable +TEST $CLI volume set $V0 delay-gen posix +TEST $CLI volume set $V0 delay-gen.enable flush +TEST $CLI volume set $V0 delay-gen.delay-percentage 100 +TEST $CLI volume set $V0 delay-gen.delay-duration 1000000 +TEST $CLI volume set $V1 delay-gen posix +TEST $CLI volume set $V1 delay-gen.enable flush +TEST $CLI volume set $V1 delay-gen.delay-percentage 100 +TEST $CLI volume set $V1 delay-gen.delay-duration 1000000 + +TEST $CLI volume start $V0 +TEST $CLI volume start $V1 + +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0 +TEST $GFS -s $H0 --volfile-id=$V1 --direct-io-mode=enable $M1 + +TEST touch $M0/a +keep_fd_open & +TEST $CLI volume profile $V1 start +brick_pid=$(get_brick_pid $V1 $H0 $B0/${V1}2) +TEST count_open_files $brick_pid "$B0/${V1}2/a" +TEST count_open_files $brick_pid "$B0/${V1}2/b" +TEST count_open_files $brick_pid "$B0/${V1}3/a" +TEST count_open_files $brick_pid "$B0/${V1}3/b" + +#If any other flush fops are introduced into the system other than the one at +#cleanup it interferes with the race, so test for it +EXPECT "^0$" echo "$($CLI volume profile $V1 info incremental | grep -i flush | wc -l)" +#Stop the volume +TEST $CLI volume stop $V1 + +#Wait for cleanup resources or volume V1 +EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/a" +EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/b" +EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/a" +EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/b" + +TEST rm -f $M0/a #Exit keep_fd_open() +wait + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + +cleanup diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t index 6351ba22511..a1b9a851bf7 100755 --- a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t +++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t @@ -3,6 +3,8 @@ . $(dirname $0)/../../volume.rc cleanup; +FILE_COUNT=500 + TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} @@ -11,15 +13,14 @@ TEST $CLI volume set $V0 cluster.shd-wait-qlength 100 TEST $CLI volume start $V0 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; -touch $M0/file{1..200} - +for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done TEST kill_brick $V0 $H0 $B0/${V0}1 -for i in {1..200}; do echo hello>$M0/file$i; done +for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 -EXPECT "200" get_pending_heal_count $V0 +EXPECT "$FILE_COUNT" get_pending_heal_count $V0 TEST $CLI volume set $V0 self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t index 2179630202d..2793d7008e1 100644 --- a/tests/bugs/core/bug-1432542-mpx-restart-crash.t +++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t @@ -1,12 +1,14 @@ #!/bin/bash -SCRIPT_TIMEOUT=300 +SCRIPT_TIMEOUT=800 . $(dirname $0)/../../include.rc . $(dirname $0)/../../volume.rc . $(dirname $0)/../../traps.rc -NUM_VOLS=20 +cleanup; + +NUM_VOLS=15 MOUNT_BASE=$(dirname $M0) # GlusterD reports that bricks are started when in fact their attach requests @@ -40,7 +42,7 @@ create_volume () { local vol_name=$(printf "%s-vol%02d" $V0 $1) local brick_base=$(get_brick_base $1) - local cmd="$CLI volume create $vol_name replica 2" + local cmd="$CLI volume create $vol_name replica 3" local b for b in $(seq 0 5); do local this_brick=${brick_base}/brick$b @@ -50,7 +52,7 @@ create_volume () { TEST $cmd TEST $CLI volume start $vol_name # check for 6 bricks and 1 shd daemon to be up and running - EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks $vol_name + EXPECT_WITHIN 120 7 count_up_bricks $vol_name local mount_point=$(get_mount_point $1) mkdir -p $mount_point TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point @@ -77,12 +79,27 @@ TEST $CLI volume set all cluster.brick-multiplex on # Our infrastructure can't handle an arithmetic expression here. The formula # is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other # NUM_VOLS-1 and there are 5 such statements in each iteration. -TESTS_EXPECTED_IN_LOOP=95 +TESTS_EXPECTED_IN_LOOP=84 for i in $(seq 1 $NUM_VOLS); do + starttime="$(date +%s)"; + create_volume $i TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1 + # Unmounting to reduce memory footprint on regression hosts + mnt_point=$(get_mount_point $i) + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point + endtime=$(expr $(date +%s) - $starttime) + + echo "Memory Used after $i volumes : $(pmap -x $(pgrep glusterfsd) | grep total)" + echo "Thread Count after $i volumes: $(ps -T -p $(pgrep glusterfsd) | wc -l)" + echo "Time taken : ${endtime} seconds" done +echo "==========" +echo "List of all the threads in the Brick process" +ps -T -p $(pgrep glusterfsd) +echo "==========" + # Kill glusterd, and wait a bit for all traces to disappear. TEST killall -9 glusterd sleep 5 diff --git a/tests/bugs/core/bug-1650403.t b/tests/bugs/core/bug-1650403.t new file mode 100644 index 00000000000..43d09bc8bd9 --- /dev/null +++ b/tests/bugs/core/bug-1650403.t @@ -0,0 +1,113 @@ +#!/bin/bash + +SCRIPT_TIMEOUT=500 + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../traps.rc + +cleanup; + +NUM_VOLS=5 +MOUNT_BASE=$(dirname $M0) + +# GlusterD reports that bricks are started when in fact their attach requests +# might still need to be retried. That's a bit of a hack, but there's no +# feasible way to wait at that point (in attach_brick) and the rest of the +# code is unprepared to deal with transient errors so the whole "brick start" +# would fail. Meanwhile, glusterfsd can only handle attach requests at a +# rather slow rate. After GlusterD tries to start a couple of hundred bricks, +# glusterfsd can fall behind and we start getting mount failures. Arguably, +# those are spurious because we will eventually catch up. We're just not +# ready *yet*. More to the point, even if the errors aren't spurious that's +# not what we're testing right now. Therefore, we give glusterfsd a bit more +# breathing room for this test than we would otherwise. +MOUNT_TIMEOUT=15 + +get_brick_base () { + printf "%s/vol%02d" $B0 $1 +} + +get_mount_point () { + printf "%s/vol%02d" $MOUNT_BASE $1 +} + +function count_up_bricks { + vol=$1; + $CLI --xml volume status $vol | grep '<status>1' | wc -l +} + +create_volume () { + + local vol_name=$(printf "%s-vol%02d" $V0 $1) + + local brick_base=$(get_brick_base $1) + local cmd="$CLI volume create $vol_name replica 3" + local b + for b in $(seq 0 5); do + local this_brick=${brick_base}/brick$b + mkdir -p $this_brick + cmd="$cmd $H0:$this_brick" + done + TEST $cmd + TEST $CLI volume start $vol_name + # check for 6 bricks and 1 shd daemon to be up and running + EXPECT_WITHIN 120 7 count_up_bricks $vol_name + local mount_point=$(get_mount_point $1) + mkdir -p $mount_point + TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point +} + +cleanup_func () { + local v + for v in $(seq 1 $NUM_VOLS); do + local mount_point=$(get_mount_point $v) + force_umount $mount_point + rm -rf $mount_point + local vol_name=$(printf "%s-vol%02d" $V0 $v) + $CLI volume stop $vol_name + $CLI volume delete $vol_name + rm -rf $(get_brick_base $1) & + done &> /dev/null + wait +} +push_trapfunc cleanup_func + +TEST glusterd +TEST $CLI volume set all cluster.brick-multiplex on + +# Our infrastructure can't handle an arithmetic expression here. The formula +# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other +# NUM_VOLS-1 and there are 5 such statements in each iteration. +TESTS_EXPECTED_IN_LOOP=24 +for i in $(seq 1 $NUM_VOLS); do + create_volume $i + TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1 + # Unmounting to reduce memory footprint on regression hosts + mnt_point=$(get_mount_point $i) + EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point +done + +glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'` +TEST [ $glustershd_pid != 0 ] +start=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'` +echo "Memory consumption for glustershd process" +for i in $(seq 1 50); do + pmap -x $glustershd_pid | grep total + for j in $(seq 1 $NUM_VOLS); do + vol_name=$(printf "%s-vol%02d" $V0 $j) + gluster v set $vol_name cluster.self-heal-daemon off > /dev/null + gluster v set $vol_name cluster.self-heal-daemon on > /dev/null + done +done + +end=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'` +diff=$((end-start)) + +# If memory consumption is more than 10M it means some leak in reconfigure +# code path + +TEST [ $diff -lt 10000 ] + +trap - EXIT +cleanup diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t new file mode 100644 index 00000000000..1acbaa8dc0b --- /dev/null +++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t @@ -0,0 +1,33 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../cluster.rc + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +cleanup + +#bug-1444596 - validating brick mux + +TEST glusterd +TEST $CLI volume create $V0 $H0:$B0/brick{0,1} +TEST $CLI volume create $V1 $H0:$B0/brick{2,3} + +TEST $CLI volume set all cluster.brick-multiplex on + +TEST $CLI volume start $V0 +TEST $CLI volume start $V1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count +EXPECT 1 count_brick_processes + +TEST $CLI volume stop $V1 +# At the time initialize brick daemon it always keeps open +# standard fd's (0, 1 , 2) so after stop 1 volume fd's should +# be open +nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l) +TEST [ $((nofds)) -eq 3 ] + +cleanup diff --git a/tests/bugs/core/bug-834465.c b/tests/bugs/core/bug-834465.c index 61d3deac077..33dd270b112 100644 --- a/tests/bugs/core/bug-834465.c +++ b/tests/bugs/core/bug-834465.c @@ -7,55 +7,54 @@ #include <fcntl.h> int -main (int argc, char *argv[]) +main(int argc, char *argv[]) { - int fd = -1; - char *filename = NULL; - struct flock lock = {0, }; - int i = 0; - int ret = -1; - - if (argc != 2) { - fprintf (stderr, "Usage: %s <filename> ", argv[0]); - goto out; + int fd = -1; + char *filename = NULL; + struct flock lock = { + 0, + }; + int i = 0; + int ret = -1; + + if (argc != 2) { + fprintf(stderr, "Usage: %s <filename> ", argv[0]); + goto out; + } + + filename = argv[1]; + + fd = open(filename, O_RDWR | O_CREAT, 0); + if (fd < 0) { + fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno)); + goto out; + } + + lock.l_type = F_WRLCK; + lock.l_whence = SEEK_SET; + lock.l_start = 1; + lock.l_len = 1; + + while (i < 100) { + lock.l_type = F_WRLCK; + ret = fcntl(fd, F_SETLK, &lock); + if (ret < 0) { + fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno)); + goto out; } - filename = argv[1]; - - fd = open (filename, O_RDWR | O_CREAT, 0); - if (fd < 0) { - fprintf (stderr, "open (%s) failed (%s)\n", filename, - strerror (errno)); - goto out; + lock.l_type = F_UNLCK; + ret = fcntl(fd, F_SETLK, &lock); + if (ret < 0) { + fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno)); + goto out; } - lock.l_type = F_WRLCK; - lock.l_whence = SEEK_SET; - lock.l_start = 1; - lock.l_len = 1; - - while (i < 100) { - lock.l_type = F_WRLCK; - ret = fcntl (fd, F_SETLK, &lock); - if (ret < 0) { - fprintf (stderr, "fcntl setlk failed (%s)\n", - strerror (errno)); - goto out; - } - - lock.l_type = F_UNLCK; - ret = fcntl (fd, F_SETLK, &lock); - if (ret < 0) { - fprintf (stderr, "fcntl setlk failed (%s)\n", - strerror (errno)); - goto out; - } - - i++; - } + i++; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } diff --git a/tests/bugs/core/bug-927616.t b/tests/bugs/core/bug-927616.t index 6bb64743183..18257131ac7 100755 --- a/tests/bugs/core/bug-927616.t +++ b/tests/bugs/core/bug-927616.t @@ -3,6 +3,8 @@ . $(dirname $0)/../../include.rc . $(dirname $0)/../../nfs.rc +#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST + cleanup; TEST glusterd diff --git a/tests/bugs/core/io-stats-1322825.t b/tests/bugs/core/io-stats-1322825.t index d232ecb2420..53f2d040daa 100755 --- a/tests/bugs/core/io-stats-1322825.t +++ b/tests/bugs/core/io-stats-1322825.t @@ -23,7 +23,7 @@ TEST $CLI volume profile $V0 start TEST mkdir $M0/dir1 # Generate the stat dump across the io-stat instances -TEST setfattr -n trusted.io-stats-dump -v /tmp/io-stats-1322825 $M0 +TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0 # Check if $M0 is clean w.r.t xattr information # TODO: if there are better ways to check we really get no attr error, please @@ -42,12 +42,12 @@ ret=$(echo $?) EXPECT 0 echo $ret # Check if we have 5 io-stat files in /tmp -EXPECT 5 ls -1 /tmp/io-stats-1322825* +EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825* # Cleanup the 5 generated files -rm -f /tmp/io-stats-1322825* +rm -f /var/run/gluster/io-stats-1322825* # Rinse and repeat above for a directory -TEST setfattr -n trusted.io-stats-dump -v /tmp/io-stats-1322825 $M0/dir1 +TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0/dir1 getfattr -n trusted.io-stats-dump $B0/${V0}1/dir1 2>&1 | grep -qi "no such attribute" ret=$(echo $?) EXPECT 0 echo $ret @@ -61,7 +61,7 @@ getfattr -n trusted.io-stats-dump $B0/${V0}4/dir1 2>&1 | grep -qi "no such attri ret=$(echo $?) EXPECT 0 echo $ret -EXPECT 5 ls -1 /tmp/io-stats-1322825* -rm -f /tmp/io-stats-1322825* +EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825* +rm -f /var/run/gluster/io-stats-1322825* cleanup; diff --git a/tests/bugs/core/multiplex-limit-issue-151.t b/tests/bugs/core/multiplex-limit-issue-151.t deleted file mode 100644 index c5bbbdad852..00000000000 --- a/tests/bugs/core/multiplex-limit-issue-151.t +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../traps.rc -. $(dirname $0)/../../volume.rc - -function count_up_bricks { - $CLI --xml volume status all | grep '<status>1' | wc -l -} - -function count_brick_processes { - pgrep glusterfsd | wc -l -} - -function count_brick_pids { - $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \ - | grep -v "N/A" | sort | uniq | wc -l -} - -cleanup; - -TEST glusterd - -TEST $CLI volume set all cluster.brick-multiplex on -TEST ! $CLI volume set all cluster.max-bricks-per-process -1 -TEST ! $CLI volume set all cluster.max-bricks-per-process foobar -TEST $CLI volume set all cluster.max-bricks-per-process 3 - -push_trapfunc "$CLI volume set all cluster.brick-multiplex off" -push_trapfunc "cleanup" - -TEST $CLI volume create $V0 $H0:$B0/brick{0..5} -TEST $CLI volume start $V0 - -EXPECT 2 count_brick_processes -EXPECT 2 count_brick_pids -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks - -pkill gluster -TEST glusterd - -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks - -TEST $CLI volume add-brick $V0 $H0:$B0/brick6 - -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks - -TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start -TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force - -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids -EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks |
