From 13e7b3b354a252ad4065f7b2f0f805c40a3c5d18 Mon Sep 17 00:00:00 2001 From: Samikshan Bairagya Date: Tue, 16 May 2017 15:07:21 +0530 Subject: glusterd: Don't spawn new glusterfsds on node reboot with brick-mux With brick multiplexing enabled, upon a node reboot new bricks were not being attached to the first spawned brick process even though there wasn't any compatibility issues. The reason for this is that upon glusterd restart after a node reboot, since brick services aren't running, glusterd starts the bricks in a "no-wait" mode. So after a brick process is spawned for the first brick, there isn't enough time for the corresponding pid file to get populated with a value before the compatibilty check is made for the next brick. This commit solves this by iteratively waiting for the pidfile to be populated in the brick compatibility comparison stage before checking if the brick process is alive. Change-Id: Ibd1f8e54c63e4bb04162143c9d70f09918a44aa4 BUG: 1451248 Signed-off-by: Samikshan Bairagya Reviewed-on: https://review.gluster.org/17307 Reviewed-by: Atin Mukherjee Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System --- tests/bugs/glusterd/bug-1451248-mux-reboot-node.t | 54 +++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tests/bugs/glusterd/bug-1451248-mux-reboot-node.t (limited to 'tests') diff --git a/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t new file mode 100644 index 00000000000..5d8ce6e75e6 --- /dev/null +++ b/tests/bugs/glusterd/bug-1451248-mux-reboot-node.t @@ -0,0 +1,54 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../traps.rc +. $(dirname $0)/../../volume.rc + +function count_up_bricks { + $CLI --xml volume status all | grep '1' | wc -l +} + +function count_brick_processes { + pgrep glusterfsd | wc -l +} + +function count_brick_pids { + $CLI --xml volume status all | sed -n '/.*\([^<]*\).*/s//\1/p' \ + | grep -v "N/A" | sort | uniq | wc -l +} + +cleanup; + +TEST glusterd +TEST $CLI volume set all cluster.brick-multiplex on +push_trapfunc "$CLI volume set all cluster.brick-multiplex off" +push_trapfunc "cleanup" + +TEST $CLI volume create $V0 $H0:$B0/brick{0..2} +TEST $CLI volume start $V0 + +EXPECT 1 count_brick_processes +EXPECT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill gluster +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +pkill glusterd +TEST glusterd + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks + +TEST $CLI volume create $V1 $H0:$B0/brick{3..5} +TEST $CLI volume start $V1 + +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids +EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks + -- cgit