1 files changed, 137 insertions, 80 deletions
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index 5d3bf8413b8..9790a719e10 100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -24,11 +24,16 @@ GANESHA_HA_SH=$(realpath $0)
 HA_NUM_SERVERS=0
 HA_SERVERS=""
 HA_VOL_NAME="gluster_shared_storage"
-HA_VOL_MNT="/var/run/gluster/shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
 HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
 SERVICE_MAN="DISTRO_NOT_FOUND"
 
-RHEL6_PCS_CNAME_OPTION="--name"
+# rhel, fedora id, version
+ID=""
+VERSION_ID=""
+
+PCS9OR10_PCS_CNAME_OPTION=""
+PCS9OR10_PCS_CLONE_OPTION="clone"
 SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
 
 # UNBLOCK RA uses shared_storage which may become unavailable
@@ -101,9 +106,9 @@ determine_service_manager () {
         then
                 SERVICE_MAN="/sbin/service"
         fi
-        if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ]
+        if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
         then
-                echo "Service manager not recognized, exiting"
+                logger "Service manager not recognized, exiting"
                 exit 1
         fi
 }
@@ -114,7 +119,7 @@ manage_service ()
         local new_node=${2}
         local option=
 
-        if [ "${action}" == "start" ]; then
+        if [[ "${action}" == "start" ]]; then
                 option="yes"
         else
                 option="no"
@@ -122,7 +127,7 @@ manage_service ()
         ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
 ${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
 
-        if [ "${SERVICE_MAN}" == "/bin/systemctl" ]
+        if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
         then
                 ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
 ${SECRET_PEM} root@${new_node} "${SERVICE_MAN}  ${action} nfs-ganesha"
@@ -140,7 +145,7 @@ check_cluster_exists()
 
     if [ -e /var/run/corosync.pid ]; then
         cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
-        if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then
+        if [[ "${cluster_name}X" == "${name}X" ]]; then
             logger "$name already exists, exiting"
             exit 0
         fi
@@ -155,7 +160,7 @@ determine_servers()
     local tmp_ifs=${IFS}
     local ha_servers=""
 
-    if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then
+    if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
         ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
         IFS=$' '
         for server in ${ha_servers} ; do
@@ -183,33 +188,6 @@ stop_ganesha_all()
     done
 }
 
-start_cluster()
-{
-    # BZ 1284404, 1425110, allow time for SSL certs to propagate, until then
-    # pcsd will not accept connections.
-    sleep 12
-    pcs cluster start --all
-    while [ $? -ne 0 ]; do
-        sleep 2
-        pcs cluster start --all
-    done
-
-    # wait for the cluster to elect a DC before querying or writing
-    # to the CIB. BZ 1334092
-    crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
-    while [ $? -ne 0 ]; do
-        crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
-    done
-
-    unclean=$(pcs status | grep -u "UNCLEAN")
-    while [[ "${unclean}X" = "UNCLEANX" ]]; do
-         sleep 1
-         unclean=$(pcs status | grep -u "UNCLEAN")
-    done
-    sleep 1
-}
-
-
 setup_cluster()
 {
     local name=${1}
@@ -220,17 +198,41 @@ setup_cluster()
 
     logger "setting up cluster ${name} with the following ${servers}"
 
-    pcs cluster auth ${servers}
-    # pcs cluster setup --name ${name} ${servers}
-    pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers}
+    # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+    pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
     if [ $? -ne 0 ]; then
-        logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
+        logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
         #set up failed stop all ganesha process and clean up symlinks in cluster
-        stop_ganesha_all ${servers}
+        stop_ganesha_all "${servers}"
         exit 1;
     fi
 
-    start_cluster
+    # pcs cluster auth ${servers}
+    pcs cluster auth
+    if [ $? -ne 0 ]; then
+        logger "pcs cluster auth failed"
+    fi
+
+    pcs cluster start --all
+    if [ $? -ne 0 ]; then
+        logger "pcs cluster start failed"
+        exit 1;
+    fi
+
+    sleep 1
+    # wait for the cluster to elect a DC before querying or writing
+    # to the CIB. BZ 1334092
+    crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+    while [ $? -ne 0 ]; do
+        crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+    done
+
+    unclean=$(pcs status | grep -u "UNCLEAN")
+    while [[ "${unclean}X" == "UNCLEANX" ]]; do
+         sleep 1
+         unclean=$(pcs status | grep -u "UNCLEAN")
+    done
+    sleep 1
 
     if [ ${num_servers} -lt 3 ]; then
         quorum_policy="ignore"
@@ -253,7 +255,7 @@ setup_finalize_ha()
     local stopped=""
 
     stopped=$(pcs status | grep -u "Stopped")
-    while [[ "${stopped}X" = "StoppedX" ]]; do
+    while [[ "${stopped}X" == "StoppedX" ]]; do
          sleep 1
          stopped=$(pcs status | grep -u "Stopped")
     done
@@ -267,19 +269,20 @@ refresh_config ()
         local HA_CONFDIR=${2}
         local short_host=$(hostname -s)
 
-        local removed_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
+        local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
                           awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
 
+
         if [ -e ${SECRET_PEM} ]; then
         while [[ ${3} ]]; do
             current_host=`echo ${3} | cut -d "." -f 1`
-            if [ ${short_host} != ${current_host} ]; then
+            if [[ ${short_host} != ${current_host} ]]; then
                 output=$(ssh -oPasswordAuthentication=no \
 -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
 "dbus-send --print-reply --system --dest=org.ganesha.nfsd \
 /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
 string:$HA_CONFDIR/exports/export.$VOL.conf \
-string:\"EXPORT(Export_Id=$removed_id)\" 2>&1")
+string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
                 ret=$?
                 logger <<< "${output}"
                 if [ ${ret} -ne 0 ]; then
@@ -300,7 +303,7 @@ string:\"EXPORT(Export_Id=$removed_id)\" 2>&1")
         output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
 /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
 string:$HA_CONFDIR/exports/export.$VOL.conf \
-string:"EXPORT(Export_Id=$removed_id)" 2>&1)
+string:"EXPORT(Export_Id=$export_id)" 2>&1)
         ret=$?
         logger <<< "${output}"
         if [ ${ret} -ne 0 ] ; then
@@ -313,6 +316,24 @@ string:"EXPORT(Export_Id=$removed_id)" 2>&1)
 
 teardown_cluster()
 {
+    local name=${1}
+
+    for server in ${HA_SERVERS} ; do
+        if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
+            logger "info: ${server} is not in config, removing"
+
+            pcs cluster stop ${server} --force
+            if [ $? -ne 0 ]; then
+                logger "warning: pcs cluster stop ${server} failed"
+            fi
+
+            pcs cluster node remove ${server}
+            if [ $? -ne 0 ]; then
+                logger "warning: pcs cluster node remove ${server} failed"
+            fi
+        fi
+    done
+
     # BZ 1193433 - pcs doesn't reload cluster.conf after modification
     # after teardown completes, a subsequent setup will appear to have
     # 'remembered' the deleted node. You can work around this by
@@ -326,7 +347,7 @@ teardown_cluster()
         logger "warning pcs cluster stop --all failed"
     fi
 
-    pcs cluster destroy --all
+    pcs cluster destroy
     if [ $? -ne 0 ]; then
         logger "error pcs cluster destroy failed"
         exit 1
@@ -388,7 +409,7 @@ wrap_create_virt_ip_constraints()
     # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
     # and so on.
     while [[ ${1} ]]; do
-        if [ "${1}" = "${primary}" ]; then
+        if [[ ${1} == ${primary} ]]; then
             shift
             while [[ ${1} ]]; do
                 tail=${tail}" "${1}
@@ -419,15 +440,15 @@ setup_create_resources()
     local cibfile=$(mktemp -u)
 
     # fixup /var/lib/nfs
-    logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone"
-    pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
+    logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
+    pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
     if [ $? -ne 0 ]; then
-        logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
+        logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
     fi
 
-    pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone
+    pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
     if [ $? -ne 0 ]; then
-        logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed"
+        logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
     fi
 
     # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
@@ -435,9 +456,9 @@ setup_create_resources()
     # ganesha-active crm_attribute
     sleep 5
 
-    pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true
+    pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
     if [ $? -ne 0 ]; then
-        logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed"
+        logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
     fi
 
     pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
@@ -606,7 +627,7 @@ addnode_recreate_resources()
     --after ${add_node}-nfs_block
     if [ $? -ne 0 ]; then
         logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
-	ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
+        ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
     fi
 
     pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
@@ -646,6 +667,12 @@ addnode_create_resources()
     local add_vip=${1}; shift
     local cibfile=$(mktemp -u)
 
+    # start HA on the new node
+    pcs cluster start ${add_node}
+    if [ $? -ne 0 ]; then
+       logger "warning: pcs cluster start ${add_node} failed"
+    fi
+
     pcs cluster cib ${cibfile}
     if [ $? -ne 0 ]; then
         logger "warning: pcs cluster cib ${cibfile} failed"
@@ -665,7 +692,6 @@ addnode_create_resources()
         logger "warning: pcs cluster cib-push ${cibfile} failed"
     fi
     rm -f ${cibfile}
-
 }
 
 
@@ -765,7 +791,7 @@ setup_state_volume()
             touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
         fi
         for server in ${HA_SERVERS} ; do
-            if [ ${server} != ${dirname} ]; then
+            if [[ ${server} != ${dirname} ]]; then
                 ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
                 ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
             fi
@@ -776,6 +802,21 @@ setup_state_volume()
 }
 
 
+enable_pacemaker()
+{
+    while [[ ${1} ]]; do
+        if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+            ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+        else
+            ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
+        fi
+        shift
+    done
+}
+
+
 addnode_state_volume()
 {
     local newnode=${1}; shift
@@ -829,13 +870,14 @@ addnode_state_volume()
     fi
 
     for server in ${HA_SERVERS} ; do
+
         if [[ ${server} != ${dirname} ]]; then
             ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
             ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
 
             ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
             ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
-	fi
+        fi
     done
 
 }
@@ -861,7 +903,7 @@ delnode_state_volume()
     rm -rf ${mnt}/nfs-ganesha/${dirname}
 
     for server in ${HA_SERVERS} ; do
-        if [[ "${server}" != "${dirname}" ]]; then
+        if [[ ${server} != ${dirname} ]]; then
             rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
             rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
         fi
@@ -877,8 +919,9 @@ status()
     local index=1
     local nodes
 
-    # change tabs to spaces, strip leading spaces
-    pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch}
+    # change tabs to spaces, strip leading spaces, including any 
+    # new '*' at the beginning of a line introduced in pcs-0.10.x
+    pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
 
     nodes[0]=${1}; shift
 
@@ -893,7 +936,7 @@ status()
     done
 
     # print the nodes that are expected to be online
-    grep -E "^Online:" ${scratch}
+    grep -E "Online:" ${scratch}
 
     echo
 
@@ -932,7 +975,7 @@ status()
 
 create_ganesha_conf_file()
 {
-        if [ $1 == "yes" ];
+        if [[ "$1" == "yes" ]];
         then
                 if [  -e $GANESHA_CONF ];
                 then
@@ -977,7 +1020,16 @@ main()
         exit 0
     fi
 
-    semanage boolean -m gluster_use_execmem --on
+    if (selinuxenabled) ;then
+     semanage boolean -m gluster_use_execmem --on
+    fi
+
+    local osid=""
+
+    osid=$(grep ^ID= /etc/os-release)
+    eval $(echo ${osid} | grep -F ID=)
+    osid=$(grep ^VERSION_ID= /etc/os-release)
+    eval $(echo ${osid} | grep -F VERSION_ID=)
 
     HA_CONFDIR=${1%/}; shift
     local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
@@ -999,7 +1051,19 @@ main()
 
         determine_servers "setup"
 
-        if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then
+        # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+        # default is pcs-0.10.x options but check for
+        # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+        if [[ ! ${ID} =~ {rhel,centos} ]]; then
+            if [[ ${VERSION_ID} == 7.* ]]; then
+                PCS9OR10_PCS_CNAME_OPTION="--name"
+                PCS9OR10_PCS_CLONE_OPTION="--clone"
+            fi
+        fi
+
+        if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+            determine_service_manager
 
             setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
 
@@ -1009,6 +1073,8 @@ main()
 
             setup_state_volume ${HA_SERVERS}
 
+            enable_pacemaker ${HA_SERVERS}
+
         else
 
             logger "insufficient servers for HA, aborting"
@@ -1022,7 +1088,7 @@ main()
 
         teardown_resources ${HA_SERVERS}
 
-        teardown_cluster
+        teardown_cluster ${HA_NAME}
 
         cleanup_ganesha_config ${HA_CONFDIR}
         ;;
@@ -1048,21 +1114,13 @@ main()
             logger "warning: pcs cluster node add ${node} failed"
         fi
 
-        sleep 2
-        # restart of HA cluster required on RHEL 6 because of BZ1404410
-        pcs cluster stop --all
-        if [ $? -ne 0 ]; then
-            logger "warning: pcs cluster stop failed"
-        fi
-
-        start_cluster
-
         addnode_create_resources ${node} ${vip}
         # Subsequent add-node recreates resources for all the nodes
         # that already exist in the cluster. The nodes are picked up
         # from the entries in the ganesha-ha.conf file. Adding the
         # newly added node to the file so that the resources specfic
         # to this node is correctly recreated in the future.
+        clean_node=${node//[-.]/_}
         echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
 
         NEW_NODES="$HA_CLUSTER_NODES,${node}"
@@ -1072,7 +1130,6 @@ $HA_CONFDIR/ganesha-ha.conf
 
         addnode_state_volume ${node}
 
-
         # addnode_create_resources() already appended ${node} to
         # HA_SERVERS, so only need to increment HA_NUM_SERVERS
         # and set quorum policy
@@ -1134,9 +1191,9 @@ $HA_CONFDIR/ganesha-ha.conf
 
     esac
 
-    semanage boolean -m gluster_use_execmem --off
-
+    if (selinuxenabled) ;then
+     semanage boolean -m gluster_use_execmem --off
+    fi
 }
 
 main $*
-