From 5d499cc221850fb1f83b625df5a113e0b83d0a99 Mon Sep 17 00:00:00 2001 From: "Kaleb S. KEITHLEY" Date: Mon, 20 Feb 2017 11:14:53 -0500 Subject: common-ha: unable to start HA, Connection Error See BZ 1284404. pcsd behavior has changed and pcsd will not accept connections until SSL certificates have fully propagated throughout all the nodes HA devels suggest a 12 second delay between the `pcs cluster setup ...` and the `pcs cluster start --all` release-3.9 BZ: 1425110 release-3.9 change: https://review.gluster.org/16690 Change-Id: If94b6991a62f346dbead023c7e7f8282a995728c BUG: 1425112 Signed-off-by: Kaleb S. KEITHLEY Reviewed-on: https://review.gluster.org/16691 Smoke: Gluster Build System CentOS-regression: Gluster Build System NetBSD-regression: NetBSD Build System --- extras/ganesha/scripts/ganesha-ha.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'extras') diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index ac8c91f194e..0692df8f5dd 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -179,13 +179,16 @@ setup_cluster() logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" exit 1; fi + + # BZ 1284404, 1425110, allow time for SSL certs to propagate, until then + # pcsd will not accept connections. + sleep 12 pcs cluster start --all - if [ $? -ne 0 ]; then - logger "pcs cluster start failed" - exit 1; - fi + while [ $? -ne 0 ]; do + sleep 2 + pcs cluster start --all + done - sleep 1 # wait for the cluster to elect a DC before querying or writing # to the CIB. BZ 1334092 crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 -- cgit