diff options
Diffstat (limited to 'extras/ganesha/scripts/ganesha-ha.sh')
| -rw-r--r-- | extras/ganesha/scripts/ganesha-ha.sh | 217 |
1 files changed, 137 insertions, 80 deletions
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index 5d3bf8413b8..9790a719e10 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -24,11 +24,16 @@ GANESHA_HA_SH=$(realpath $0) HA_NUM_SERVERS=0 HA_SERVERS="" HA_VOL_NAME="gluster_shared_storage" -HA_VOL_MNT="/var/run/gluster/shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" SERVICE_MAN="DISTRO_NOT_FOUND" -RHEL6_PCS_CNAME_OPTION="--name" +# rhel, fedora id, version +ID="" +VERSION_ID="" + +PCS9OR10_PCS_CNAME_OPTION="" +PCS9OR10_PCS_CLONE_OPTION="clone" SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" # UNBLOCK RA uses shared_storage which may become unavailable @@ -101,9 +106,9 @@ determine_service_manager () { then SERVICE_MAN="/sbin/service" fi - if [ "${SERVICE_MAN}" == "DISTRO_NOT_FOUND" ] + if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] then - echo "Service manager not recognized, exiting" + logger "Service manager not recognized, exiting" exit 1 fi } @@ -114,7 +119,7 @@ manage_service () local new_node=${2} local option= - if [ "${action}" == "start" ]; then + if [[ "${action}" == "start" ]]; then option="yes" else option="no" @@ -122,7 +127,7 @@ manage_service () ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" - if [ "${SERVICE_MAN}" == "/bin/systemctl" ] + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] then ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ ${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" @@ -140,7 +145,7 @@ check_cluster_exists() if [ -e /var/run/corosync.pid ]; then cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) - if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then + if [[ "${cluster_name}X" == "${name}X" ]]; then logger "$name already exists, exiting" exit 0 fi @@ -155,7 +160,7 @@ determine_servers() local tmp_ifs=${IFS} local ha_servers="" - if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then + if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') IFS=$' ' for server in ${ha_servers} ; do @@ -183,33 +188,6 @@ stop_ganesha_all() done } -start_cluster() -{ - # BZ 1284404, 1425110, allow time for SSL certs to propagate, until then - # pcsd will not accept connections. - sleep 12 - pcs cluster start --all - while [ $? -ne 0 ]; do - sleep 2 - pcs cluster start --all - done - - # wait for the cluster to elect a DC before querying or writing - # to the CIB. BZ 1334092 - crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 - while [ $? -ne 0 ]; do - crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 - done - - unclean=$(pcs status | grep -u "UNCLEAN") - while [[ "${unclean}X" = "UNCLEANX" ]]; do - sleep 1 - unclean=$(pcs status | grep -u "UNCLEAN") - done - sleep 1 -} - - setup_cluster() { local name=${1} @@ -220,17 +198,41 @@ setup_cluster() logger "setting up cluster ${name} with the following ${servers}" - pcs cluster auth ${servers} - # pcs cluster setup --name ${name} ${servers} - pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} + # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} if [ $? -ne 0 ]; then - logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" #set up failed stop all ganesha process and clean up symlinks in cluster - stop_ganesha_all ${servers} + stop_ganesha_all "${servers}" exit 1; fi - start_cluster + # pcs cluster auth ${servers} + pcs cluster auth + if [ $? -ne 0 ]; then + logger "pcs cluster auth failed" + fi + + pcs cluster start --all + if [ $? -ne 0 ]; then + logger "pcs cluster start failed" + exit 1; + fi + + sleep 1 + # wait for the cluster to elect a DC before querying or writing + # to the CIB. BZ 1334092 + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + while [ $? -ne 0 ]; do + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + done + + unclean=$(pcs status | grep -u "UNCLEAN") + while [[ "${unclean}X" == "UNCLEANX" ]]; do + sleep 1 + unclean=$(pcs status | grep -u "UNCLEAN") + done + sleep 1 if [ ${num_servers} -lt 3 ]; then quorum_policy="ignore" @@ -253,7 +255,7 @@ setup_finalize_ha() local stopped="" stopped=$(pcs status | grep -u "Stopped") - while [[ "${stopped}X" = "StoppedX" ]]; do + while [[ "${stopped}X" == "StoppedX" ]]; do sleep 1 stopped=$(pcs status | grep -u "Stopped") done @@ -267,19 +269,20 @@ refresh_config () local HA_CONFDIR=${2} local short_host=$(hostname -s) - local removed_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ + local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + if [ -e ${SECRET_PEM} ]; then while [[ ${3} ]]; do current_host=`echo ${3} | cut -d "." -f 1` - if [ ${short_host} != ${current_host} ]; then + if [[ ${short_host} != ${current_host} ]]; then output=$(ssh -oPasswordAuthentication=no \ -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ "dbus-send --print-reply --system --dest=org.ganesha.nfsd \ /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:\"EXPORT(Export_Id=$removed_id)\" 2>&1") +string:\"EXPORT(Export_Id=$export_id)\" 2>&1") ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ]; then @@ -300,7 +303,7 @@ string:\"EXPORT(Export_Id=$removed_id)\" 2>&1") output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ /org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:"EXPORT(Export_Id=$removed_id)" 2>&1) +string:"EXPORT(Export_Id=$export_id)" 2>&1) ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ] ; then @@ -313,6 +316,24 @@ string:"EXPORT(Export_Id=$removed_id)" 2>&1) teardown_cluster() { + local name=${1} + + for server in ${HA_SERVERS} ; do + if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then + logger "info: ${server} is not in config, removing" + + pcs cluster stop ${server} --force + if [ $? -ne 0 ]; then + logger "warning: pcs cluster stop ${server} failed" + fi + + pcs cluster node remove ${server} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node remove ${server} failed" + fi + fi + done + # BZ 1193433 - pcs doesn't reload cluster.conf after modification # after teardown completes, a subsequent setup will appear to have # 'remembered' the deleted node. You can work around this by @@ -326,7 +347,7 @@ teardown_cluster() logger "warning pcs cluster stop --all failed" fi - pcs cluster destroy --all + pcs cluster destroy if [ $? -ne 0 ]; then logger "error pcs cluster destroy failed" exit 1 @@ -388,7 +409,7 @@ wrap_create_virt_ip_constraints() # the result is "node2 node3 node4"; for node2, "node3 node4 node1" # and so on. while [[ ${1} ]]; do - if [ "${1}" = "${primary}" ]; then + if [[ ${1} == ${primary} ]]; then shift while [[ ${1} ]]; do tail=${tail}" "${1} @@ -419,15 +440,15 @@ setup_create_resources() local cibfile=$(mktemp -u) # fixup /var/lib/nfs - logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" - pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone + logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" + pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" + logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" fi - pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone + pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" + logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" fi # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace @@ -435,9 +456,9 @@ setup_create_resources() # ganesha-active crm_attribute sleep 5 - pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone notify=true + pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" fi pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 @@ -606,7 +627,7 @@ addnode_recreate_resources() --after ${add_node}-nfs_block if [ $? -ne 0 ]; then logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ - ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" + ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" fi pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 @@ -646,6 +667,12 @@ addnode_create_resources() local add_vip=${1}; shift local cibfile=$(mktemp -u) + # start HA on the new node + pcs cluster start ${add_node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster start ${add_node} failed" + fi + pcs cluster cib ${cibfile} if [ $? -ne 0 ]; then logger "warning: pcs cluster cib ${cibfile} failed" @@ -665,7 +692,6 @@ addnode_create_resources() logger "warning: pcs cluster cib-push ${cibfile} failed" fi rm -f ${cibfile} - } @@ -765,7 +791,7 @@ setup_state_volume() touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state fi for server in ${HA_SERVERS} ; do - if [ ${server} != ${dirname} ]; then + if [[ ${server} != ${dirname} ]]; then ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} fi @@ -776,6 +802,21 @@ setup_state_volume() } +enable_pacemaker() +{ + while [[ ${1} ]]; do + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" + fi + shift + done +} + + addnode_state_volume() { local newnode=${1}; shift @@ -829,13 +870,14 @@ addnode_state_volume() fi for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} - fi + fi done } @@ -861,7 +903,7 @@ delnode_state_volume() rm -rf ${mnt}/nfs-ganesha/${dirname} for server in ${HA_SERVERS} ; do - if [[ "${server}" != "${dirname}" ]]; then + if [[ ${server} != ${dirname} ]]; then rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} fi @@ -877,8 +919,9 @@ status() local index=1 local nodes - # change tabs to spaces, strip leading spaces - pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} + # change tabs to spaces, strip leading spaces, including any + # new '*' at the beginning of a line introduced in pcs-0.10.x + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} nodes[0]=${1}; shift @@ -893,7 +936,7 @@ status() done # print the nodes that are expected to be online - grep -E "^Online:" ${scratch} + grep -E "Online:" ${scratch} echo @@ -932,7 +975,7 @@ status() create_ganesha_conf_file() { - if [ $1 == "yes" ]; + if [[ "$1" == "yes" ]]; then if [ -e $GANESHA_CONF ]; then @@ -977,7 +1020,16 @@ main() exit 0 fi - semanage boolean -m gluster_use_execmem --on + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --on + fi + + local osid="" + + osid=$(grep ^ID= /etc/os-release) + eval $(echo ${osid} | grep -F ID=) + osid=$(grep ^VERSION_ID= /etc/os-release) + eval $(echo ${osid} | grep -F VERSION_ID=) HA_CONFDIR=${1%/}; shift local ha_conf=${HA_CONFDIR}/ganesha-ha.conf @@ -999,7 +1051,19 @@ main() determine_servers "setup" - if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly + if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" + fi + fi + + if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" @@ -1009,6 +1073,8 @@ main() setup_state_volume ${HA_SERVERS} + enable_pacemaker ${HA_SERVERS} + else logger "insufficient servers for HA, aborting" @@ -1022,7 +1088,7 @@ main() teardown_resources ${HA_SERVERS} - teardown_cluster + teardown_cluster ${HA_NAME} cleanup_ganesha_config ${HA_CONFDIR} ;; @@ -1048,21 +1114,13 @@ main() logger "warning: pcs cluster node add ${node} failed" fi - sleep 2 - # restart of HA cluster required on RHEL 6 because of BZ1404410 - pcs cluster stop --all - if [ $? -ne 0 ]; then - logger "warning: pcs cluster stop failed" - fi - - start_cluster - addnode_create_resources ${node} ${vip} # Subsequent add-node recreates resources for all the nodes # that already exist in the cluster. The nodes are picked up # from the entries in the ganesha-ha.conf file. Adding the # newly added node to the file so that the resources specfic # to this node is correctly recreated in the future. + clean_node=${node//[-.]/_} echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf NEW_NODES="$HA_CLUSTER_NODES,${node}" @@ -1072,7 +1130,6 @@ $HA_CONFDIR/ganesha-ha.conf addnode_state_volume ${node} - # addnode_create_resources() already appended ${node} to # HA_SERVERS, so only need to increment HA_NUM_SERVERS # and set quorum policy @@ -1134,9 +1191,9 @@ $HA_CONFDIR/ganesha-ha.conf esac - semanage boolean -m gluster_use_execmem --off - + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --off + fi } main $* - |
