diff options
Diffstat (limited to 'extras/ganesha/scripts/ganesha-ha.sh')
| -rw-r--r-- | extras/ganesha/scripts/ganesha-ha.sh | 466 |
1 files changed, 356 insertions, 110 deletions
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh index 5162eb5e4da..9790a719e10 100644 --- a/extras/ganesha/scripts/ganesha-ha.sh +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Red Hat Inc. All Rights Reserved +# Copyright 2015-2016 Red Hat Inc. All Rights Reserved # # Pacemaker+Corosync High Availability for NFS-Ganesha # @@ -20,16 +20,26 @@ # ensure that the NFS GRACE DBUS signal is sent after the VIP moves to # the new host. +GANESHA_HA_SH=$(realpath $0) HA_NUM_SERVERS=0 HA_SERVERS="" HA_VOL_NAME="gluster_shared_storage" -HA_VOL_MNT="/var/run/gluster/shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" SERVICE_MAN="DISTRO_NOT_FOUND" -RHEL6_PCS_CNAME_OPTION="--name" +# rhel, fedora id, version +ID="" +VERSION_ID="" + +PCS9OR10_PCS_CNAME_OPTION="" +PCS9OR10_PCS_CLONE_OPTION="clone" SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" +# UNBLOCK RA uses shared_storage which may become unavailable +# during any of the nodes reboot. Hence increase timeout value. +PORTBLOCK_UNBLOCK_TIMEOUT="60s" + # Try loading the config from any of the distro # specific configuration locations if [ -f /etc/sysconfig/ganesha ] @@ -64,9 +74,9 @@ function find_rhel7_conf done } -if [ -z $CONFFILE ] +if [ -z ${CONFFILE} ] then - find_rhel7_conf $OPTIONS + find_rhel7_conf ${OPTIONS} fi @@ -74,20 +84,21 @@ GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} usage() { - echo "Usage : add|delete|status" - echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ + echo "Usage : add|delete|refresh-config|status" + echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ <NODE-HOSTNAME> <NODE-VIP>" - echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ + echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ <NODE-HOSTNAME>" - echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR>\ - <volume>" + echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" + echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" } determine_service_manager () { - if [ -e "/usr/bin/systemctl" ]; + if [ -e "/bin/systemctl" ]; then - SERVICE_MAN="/usr/bin/systemctl" + SERVICE_MAN="/bin/systemctl" elif [ -e "/sbin/invoke-rc.d" ]; then SERVICE_MAN="/sbin/invoke-rc.d" @@ -95,9 +106,9 @@ determine_service_manager () { then SERVICE_MAN="/sbin/service" fi - if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] + if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] then - echo "Service manager not recognized, exiting" + logger "Service manager not recognized, exiting" exit 1 fi } @@ -108,21 +119,21 @@ manage_service () local new_node=${2} local option= - if [ "$action" == "start" ]; then + if [[ "${action}" == "start" ]]; then option="yes" else option="no" fi ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" +${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" - if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] then ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} root@${new_node} "$SERVICE_MAN ${action} nfs-ganesha" +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" else ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ -${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" fi } @@ -134,7 +145,7 @@ check_cluster_exists() if [ -e /var/run/corosync.pid ]; then cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) - if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then + if [[ "${cluster_name}X" == "${name}X" ]]; then logger "$name already exists, exiting" exit 0 fi @@ -149,7 +160,7 @@ determine_servers() local tmp_ifs=${IFS} local ha_servers="" - if [[ "X${cmd}X" != "XsetupX" ]]; then + if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') IFS=$' ' for server in ${ha_servers} ; do @@ -169,6 +180,13 @@ determine_servers() fi } +stop_ganesha_all() +{ + local serverlist=${1} + for node in ${serverlist} ; do + manage_service "stop" ${node} + done +} setup_cluster() { @@ -176,16 +194,25 @@ setup_cluster() local num_servers=${2} local servers=${3} local unclean="" + local quorum_policy="stop" logger "setting up cluster ${name} with the following ${servers}" - pcs cluster auth ${servers} - # pcs cluster setup --name ${name} ${servers} - pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} + # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} if [ $? -ne 0 ]; then - logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" + logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" exit 1; fi + + # pcs cluster auth ${servers} + pcs cluster auth + if [ $? -ne 0 ]; then + logger "pcs cluster auth failed" + fi + pcs cluster start --all if [ $? -ne 0 ]; then logger "pcs cluster start failed" @@ -201,17 +228,18 @@ setup_cluster() done unclean=$(pcs status | grep -u "UNCLEAN") - while [[ "${unclean}X" = "UNCLEANX" ]]; do + while [[ "${unclean}X" == "UNCLEANX" ]]; do sleep 1 unclean=$(pcs status | grep -u "UNCLEAN") done sleep 1 if [ ${num_servers} -lt 3 ]; then - pcs property set no-quorum-policy=ignore - if [ $? -ne 0 ]; then - logger "warning: pcs property set no-quorum-policy=ignore failed" - fi + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" fi pcs property set stonith-enabled=false @@ -227,7 +255,7 @@ setup_finalize_ha() local stopped="" stopped=$(pcs status | grep -u "Stopped") - while [[ "${stopped}X" = "StoppedX" ]]; do + while [[ "${stopped}X" == "StoppedX" ]]; do sleep 1 stopped=$(pcs status | grep -u "Stopped") done @@ -241,36 +269,24 @@ refresh_config () local HA_CONFDIR=${2} local short_host=$(hostname -s) - removed_id=`cat $HA_CONFDIR/exports/export.$VOL.conf |\ -grep Export_Id | awk -F"[=,;]" '{print$2}' | tr -d '[[:space:]]'` + local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + if [ -e ${SECRET_PEM} ]; then while [[ ${3} ]]; do current_host=`echo ${3} | cut -d "." -f 1` - if [ ${short_host} != ${current_host} ]; then - output=$(ssh -oPasswordAuthentication=no \ --oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ -"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ -/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \ -uint16:$removed_id 2>&1") - ret=$? - logger <<< "${output}" - if [ ${ret} -ne 0 ]; then - echo "Error: refresh-config failed on ${current_host}." - exit 1 - fi - sleep 1 + if [[ ${short_host} != ${current_host} ]]; then output=$(ssh -oPasswordAuthentication=no \ -oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ "dbus-send --print-reply --system --dest=org.ganesha.nfsd \ -/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:\"EXPORT(Path=/$VOL)\" 2>&1") +string:\"EXPORT(Export_Id=$export_id)\" 2>&1") ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ]; then - echo "Error: refresh-config failed on ${current_host}." - exit 1 + echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" else echo "Refresh-config completed on ${current_host}." fi @@ -285,24 +301,13 @@ string:\"EXPORT(Path=/$VOL)\" 2>&1") # Run the same command on the localhost, output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ -/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \ -uint16:$removed_id 2>&1) - ret=$? - logger <<< "${output}" - if [ ${ret} -ne 0 ]; then - echo "Error: refresh-config failed on localhost." - exit 1 - fi - sleep 1 - output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ -/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ string:$HA_CONFDIR/exports/export.$VOL.conf \ -string:"EXPORT(Path=/$VOL)" 2>&1) +string:"EXPORT(Export_Id=$export_id)" 2>&1) ret=$? logger <<< "${output}" if [ ${ret} -ne 0 ] ; then - echo "Error: refresh-config failed on localhost." - exit 1 + echo "Refresh-config failed on localhost." else echo "Success: refresh-config completed." fi @@ -352,8 +357,10 @@ teardown_cluster() cleanup_ganesha_config () { - rm -rf /etc/cluster/cluster.conf* - rm -rf /var/lib/pacemaker/cib/* + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf } do_create_virt_ip_constraints() @@ -402,7 +409,7 @@ wrap_create_virt_ip_constraints() # the result is "node2 node3 node4"; for node2, "node3 node4 node1" # and so on. while [[ ${1} ]]; do - if [ "${1}" = "${primary}" ]; then + if [[ ${1} == ${primary} ]]; then shift while [[ ${1} ]]; do tail=${tail}" "${1} @@ -433,15 +440,15 @@ setup_create_resources() local cibfile=$(mktemp -u) # fixup /var/lib/nfs - logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" - pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone + logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" + pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" + logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" fi - pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone + pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" + logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" fi # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace @@ -449,9 +456,9 @@ setup_create_resources() # ganesha-active crm_attribute sleep 5 - pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true + pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true if [ $? -ne 0 ]; then - logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" fi pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 @@ -500,7 +507,9 @@ setup_create_resources() pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ - tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} if [ $? -ne 0 ]; then logger "warning pcs resource create ${1}-nfs_unblock failed" fi @@ -569,9 +578,16 @@ recreate_resources() eval tmp_ipaddr=\$${clean_name} ipaddr=${tmp_ipaddr//_/.} - pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group if [ $? -ne 0 ]; then - logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=10s failed" + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" fi pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 @@ -579,6 +595,15 @@ recreate_resources() logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" fi + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + shift done } @@ -592,15 +617,32 @@ addnode_recreate_resources() recreate_resources ${cibfile} ${HA_SERVERS} - pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=15s + pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ + --after ${add_node}-nfs_block if [ $? -ne 0 ]; then - logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=10s failed" + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" fi pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 if [ $? -ne 0 ]; then logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" fi + pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ + ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_unblock failed" + fi } @@ -609,9 +651,9 @@ clear_resources() local cibfile=${1}; shift while [[ ${1} ]]; do - pcs -f ${cibfile} resource delete ${1}-cluster_ip-1 + pcs -f ${cibfile} resource delete ${1}-group if [ $? -ne 0 ]; then - logger "warning: pcs -f ${cibfile} resource delete ${1}-cluster_ip-1" + logger "warning: pcs -f ${cibfile} resource delete ${1}-group" fi shift @@ -688,7 +730,7 @@ deletenode_update_haconfig() local clean_name=${name//[-.]/_} ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") - sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${clean_name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf + sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf } @@ -711,7 +753,6 @@ setup_state_volume() dirname=${1}${dname} fi - if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then mkdir ${mnt}/nfs-ganesha/tickle_dir fi @@ -726,9 +767,11 @@ setup_state_volume() fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd fi if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov @@ -738,15 +781,17 @@ setup_state_volume() fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm fi if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak fi if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state fi for server in ${HA_SERVERS} ; do - if [ ${server} != ${dirname} ]; then + if [[ ${server} != ${dirname} ]]; then ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} fi @@ -757,32 +802,180 @@ setup_state_volume() } +enable_pacemaker() +{ + while [[ ${1} ]]; do + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" + fi + shift + done +} + + +addnode_state_volume() +{ + local newnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${newnode} == *${dname} ]]; then + dirname=${newnode} + else + dirname=${newnode}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + + for server in ${HA_SERVERS} ; do + + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done + +} + + +delnode_state_volume() +{ + local delnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${delnode} == *${dname} ]]; then + dirname=${delnode} + else + dirname=${delnode}${dname} + fi + + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done +} + + status() { - local regex_str="^ ${1}"; shift - local status_file=$(mktemp) + local scratch=$(mktemp) + local regex_str="^${1}-cluster_ip-1" + local healthy=0 + local index=1 + local nodes - while [[ ${1} ]]; do + # change tabs to spaces, strip leading spaces, including any + # new '*' at the beginning of a line introduced in pcs-0.10.x + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift - regex_str="${regex_str}|^ ${1}" + # make a regex of the configured nodes + # and initalize the nodes array for later + while [[ ${1} ]]; do + regex_str="${regex_str}|^${1}-cluster_ip-1" + nodes[${index}]=${1} + ((index++)) shift done - pcs status | egrep "^Online:" > ${status_file} + # print the nodes that are expected to be online + grep -E "Online:" ${scratch} + + echo + + # print the VIPs and which node they are on + grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 - echo >> ${status_file} + echo - pcs status | egrep "${regex_str}" | sed -e "s/\t/ /" | cut -d ' ' -f 2,4 >> ${status_file} + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do - cat ${status_file} + grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + + grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then + echo "Cluster HA Status: BAD" + elif [ ${healthy} -eq 0 ]; then + echo "Cluster HA Status: HEALTHY" + else + echo "Cluster HA Status: FAILOVER" + fi - rm -f ${status_file} + rm -f ${scratch} } create_ganesha_conf_file() { - if [ $1 == "yes" ]; + if [[ "$1" == "yes" ]]; then if [ -e $GANESHA_CONF ]; then @@ -799,11 +992,25 @@ create_ganesha_conf_file() else # Restoring previous file rm -rf $GANESHA_CONF - sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF fi } +set_quorum_policy() +{ + local quorum_policy="stop" + local num_servers=${1} + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi +} + main() { @@ -812,19 +1019,29 @@ main() usage exit 0 fi - if [[ ${cmd} != *status ]]; then - HA_CONFDIR=${1%/}; shift - local ha_conf=${HA_CONFDIR}/ganesha-ha.conf - local node="" - local vip="" - # ignore any comment lines - cfgline=$(grep ^HA_NAME= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_NAME=) - cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) - eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --on fi + local osid="" + + osid=$(grep ^ID= /etc/os-release) + eval $(echo ${osid} | grep -F ID=) + osid=$(grep ^VERSION_ID= /etc/os-release) + eval $(echo ${osid} | grep -F VERSION_ID=) + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" + local vip="" + + # ignore any comment lines + cfgline=$(grep ^HA_NAME= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_NAME=) + cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + case "${cmd}" in setup | --setup) @@ -834,7 +1051,19 @@ main() determine_servers "setup" - if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly + if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" + fi + fi + + if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" @@ -844,6 +1073,8 @@ main() setup_state_volume ${HA_SERVERS} + enable_pacemaker ${HA_SERVERS} + else logger "insufficient servers for HA, aborting" @@ -858,6 +1089,8 @@ main() teardown_resources ${HA_SERVERS} teardown_cluster ${HA_NAME} + + cleanup_ganesha_config ${HA_CONFDIR} ;; cleanup | --cleanup) @@ -888,13 +1121,20 @@ main() # newly added node to the file so that the resources specfic # to this node is correctly recreated in the future. clean_node=${node//[-.]/_} - echo "VIP_$clean_node=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf + echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf NEW_NODES="$HA_CLUSTER_NODES,${node}" sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ $HA_CONFDIR/ganesha-ha.conf - HA_SERVERS="${HA_SERVERS} ${node}" + + addnode_state_volume ${node} + + # addnode_create_resources() already appended ${node} to + # HA_SERVERS, so only need to increment HA_NUM_SERVERS + # and set quorum policy + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) + set_quorum_policy ${HA_NUM_SERVERS} ;; delete | --delete) @@ -913,11 +1153,14 @@ $HA_CONFDIR/ganesha-ha.conf deletenode_update_haconfig ${node} - rm -rf ${HA_VOL_MNT}/nfs-ganesha/${node} + delnode_state_volume ${node} determine_service_manager manage_service "stop" ${node} + + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) + set_quorum_policy ${HA_NUM_SERVERS} ;; status | --status) @@ -947,7 +1190,10 @@ $HA_CONFDIR/ganesha-ha.conf ;; esac + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --off + fi } main $* - |
