glusterd: daemon restart logic should adhere server side quorum

Just like brick processes, other daemon services should also follow the same logic of quorum checks to see if a particular service needs to come up if glusterd is restarted or the incoming friend add/update request is received (in glusterd_restart_bricks () function) Change-Id: I54a1fbdaa1571cc45eed627181b81463fead47a3 BUG: 1383893 Signed-off-by: Atin Mukherjee <amukherj@redhat.com> Reviewed-on: https://review.gluster.org/15626 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Prashanth Pai <ppai@redhat.com>
author: Atin Mukherjee <amukherj@redhat.com> 2016-10-05 14:59:51 +0530
committer: Atin Mukherjee <amukherj@redhat.com> 2017-01-27 00:04:29 -0500
commit: a9f660bc9d2d7c87b3306a35a2088532de000015 (patch)
tree: 699d37b4765da3965addcd01f3b46c307b9a5bee
parent: a3b4c70afee89536374f6fa032465cc313437956 (diff)
2 files changed, 64 insertions, 6 deletions
diff --git a/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t b/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t
new file mode 100644
index 00000000000..105292ab5bb
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1383893-daemons-to-follow-quorum.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# This test checks for if shd or any other daemons brought down (apart from
+# brick processes) is not brought up automatically when glusterd on the other
+# node is (re)started
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function shd_up_status_1 {
+        $CLI_1 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function shd_up_status_2 {
+        $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function get_shd_pid_2 {
+        $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $8}'
+}
+cleanup;
+
+TEST launch_cluster 3
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1  peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2  peer_count
+
+# Lets create the volume
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}1 $H2:$B2/${V0}2
+
+# Start the volume
+TEST $CLI_1 volume start $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status_1 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+# Bring down shd on 2nd node
+kill -15 $(get_shd_pid_2)
+
+# Bring down glusterd on 1st node
+TEST kill_glusterd 1
+
+#Bring back 1st glusterd
+TEST $glusterd_1
+
+# We need to wait till PROCESS_UP_TIMEOUT and then check shd service does not
+# come up on node 2
+sleep $PROCESS_UP_TIMEOUT
+EXPECT "N" shd_up_status_2
+
+cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index a77cc674e63..5f9098f3e9d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -4923,10 +4923,6 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
                 if (volinfo->status != GLUSTERD_STATUS_STARTED)
                         continue;
-                if (start_svcs == _gf_false) {
-                        start_svcs = _gf_true;
-                        glusterd_svcs_manager (NULL);
-                }
                 gf_msg_debug (this->name, 0, "starting the volume %s",
                         volinfo->volname);
 
@@ -4949,6 +4945,11 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
                          */
                         continue;
                 } else {
+                        if (start_svcs == _gf_false) {
+                                start_svcs = _gf_true;
+                                glusterd_svcs_manager (NULL);
+                        }
+
                         cds_list_for_each_entry (brickinfo, &volinfo->bricks,
                                                  brick_list) {
                                 glusterd_brick_start (volinfo, brickinfo,
@@ -4961,8 +4962,8 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
                 cds_list_for_each_entry (volinfo, &snap->volumes, vol_list) {
                         if (volinfo->status != GLUSTERD_STATUS_STARTED)
                                 continue;
-                        /* Check the quorum, if quorum is not met, don't start the
-                           bricks
+                        /* Check the quorum, if quorum is not met, don't start
+                         * the bricks
                         */
                         ret = check_quorum_for_brick_start (volinfo,
                                                             node_quorum);
author	Atin Mukherjee <amukherj@redhat.com>	2016-10-05 14:59:51 +0530
committer	Atin Mukherjee <amukherj@redhat.com>	2017-01-27 00:04:29 -0500
commit	a9f660bc9d2d7c87b3306a35a2088532de000015 (patch)
tree	699d37b4765da3965addcd01f3b46c307b9a5bee
parent	a3b4c70afee89536374f6fa032465cc313437956 (diff)