summaryrefslogtreecommitdiffstats
path: root/extras/ganesha/scripts/ganesha-ha.sh
diff options
context:
space:
mode:
authorKaleb S KEITHLEY <kkeithle@redhat.com>2015-12-14 09:24:57 -0500
committerKaleb KEITHLEY <kkeithle@redhat.com>2016-03-14 21:34:27 -0700
commit40a24f5ab917863d1549508ae9cf31085955d174 (patch)
tree04568d9bb2c39b6ad9abc5059d298b3457a65f6a /extras/ganesha/scripts/ganesha-ha.sh
parent7240923a55eab2fc237d951ec0d89c51676a2aae (diff)
common-ha: reliable grace using pacemaker notify actions
Using *-dead_ip-1 resources to track on which nodes the ganesha.nfsd had died was found to be unreliable. Running `pcs status` in the ganesha_grace monitor action was seen to time out during failover; the HA devs opined that it was, generally, not a good idea to run `pcs status` in a monitor action in any event. They suggested using the notify feature, where the resources on all the nodes are notified when a clone resource agent dies. This change adds a notify action to the ganesha_grace RA. The ganesha_mon RA monitors its ganesha.nfsd daemon. While the daemon is running, it creates two attributes: ganesha-active and grace-active. When the daemon stops for any reason, the attributes are deleted. Deleting the ganesha-active attribute triggers the failover of the virtual IP (the IPaddr RA) to another node where ganesha.nfsd is still running. The ganesha_grace RA monitors the grace-active attribute. When the grace-active attibute is deleted, the ganesha_grace RA stops, and will not restart. This triggers pacemaker to trigger the notify action in the ganesha_grace RAs on the other nodes in the cluster; which send a DBUS message to their ganesha.nfsd. (N.B. grace-active is a bit of a misnomer. while the grace-active attribute exists, everything is normal and healthy. Deleting the attribute triggers putting the surviving ganesha.nfsds into GRACE.) To ensure that the remaining/surviving ganesha.nfsds are put into NFS-GRACE before the IPaddr (virtual IP) fails over there is a short delay (sleep) between deleting the grace-active attribute and the ganesha-active attribute. To summarize: 1. on node 2 ganesha_mon:monitor notices that ganesha.nfsd has died 2. on node 2 ganesha_mon:monitor deletes its grace-active attribute 3. on node 2 ganesha_grace:monitor notices that grace-active is gone and returns OCF_ERR_GENERIC, a.k.a. new error. When pacemaker tries to (re)start ganesha_grace, its start action will return OCF_NOT_RUNNING, a.k.a. known error, don't attempt further restarts. 4. on nodes 1, 3, etc., ganesha_grace:notify receives a post-stop notification indicating that node 2 is gone, and sends a DBUS message to its ganesha.nfsd putting it into NFS-GRACE. 5. on node 2 ganesha_mon:monitor waits a short period, then deletes its ganesha-active attribute. This triggers the IPaddr (virt IP) failover according to constraint location rules. ganesha_nfsd modified to run for the duration, start action is invoked to setup the /var/lib/nfs symlink, stop action is invoked to restore it. ganesha-ha.sh modified accordingly to create it as a clone resource. BUG: 1290865 Change-Id: I1ba24f38fa4338b3aeb17c65645e9f439387ff57 Signed-off-by: Kaleb S KEITHLEY <kkeithle@redhat.com> Reviewed-on: http://review.gluster.org/12964 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> Reviewed-on: http://review.gluster.org/13725
Diffstat (limited to 'extras/ganesha/scripts/ganesha-ha.sh')
-rw-r--r--extras/ganesha/scripts/ganesha-ha.sh207
1 files changed, 42 insertions, 165 deletions
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index 84397477e32..16ede52f8b2 100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -170,8 +170,8 @@ setup_cluster()
logger "setting up cluster ${name} with the following ${servers}"
pcs cluster auth ${servers}
-# fedora pcs cluster setup ${name} ${servers}
-# rhel6 pcs cluster setup --name ${name} ${servers}
+ # fedora pcs cluster setup ${name} ${servers}
+ # rhel6 pcs cluster setup --name ${name} ${servers}
pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers}
if [ $? -ne 0 ]; then
logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed"
@@ -204,7 +204,7 @@ setup_cluster()
}
-setup_finalize()
+setup_finalize_ha()
{
local cibfile=${1}
local stopped=""
@@ -215,7 +215,7 @@ setup_finalize()
stopped=$(pcs status | grep -u "Stopped")
done
- pcs status | grep dead_ip-1 | sort > /var/run/ganesha/pcs_status
+ # pcs resource cleanup
}
@@ -292,7 +292,7 @@ string:\"EXPORT(Path=/$VOL)\" 2>&1")
exit 1
fi
-#Run the same command on the localhost,
+ # Run the same command on the localhost,
output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \
uint16:$removed_id 2>&1)
@@ -373,13 +373,13 @@ teardown_cluster()
fi
done
-# BZ 1193433 - pcs doesn't reload cluster.conf after modification
-# after teardown completes, a subsequent setup will appear to have
-# 'remembered' the deleted node. You can work around this by
-# issuing another `pcs cluster node remove $node`,
-# `crm_node -f -R $server`, or
-# `cibadmin --delete --xml-text '<node id="$server"
-# uname="$server"/>'
+ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
+ # after teardown completes, a subsequent setup will appear to have
+ # 'remembered' the deleted node. You can work around this by
+ # issuing another `pcs cluster node remove $node`,
+ # `crm_node -f -R $server`, or
+ # `cibadmin --delete --xml-text '<node id="$server"
+ # uname="$server"/>'
pcs cluster stop --all
if [ $? -ne 0 ]; then
@@ -479,28 +479,26 @@ setup_create_resources()
{
local cibfile=$(mktemp -u)
- # mount the HA-state volume and start ganesha.nfsd on all nodes
- pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
+ # fixup /var/lib/nfs
+ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone"
+ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs_start ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
+ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
fi
- sleep 1
- # cloned resources seem to never have their start() invoked when they
- # are created, but stop() is invoked when they are destroyed. Why???.
- # No matter, we don't want this resource agent hanging around anyway
- pcs resource delete nfs_start-clone
+
+ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone
if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_start-clone failed"
+ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed"
fi
- pcs resource create nfs-mon ganesha_mon --clone
+ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs-mon ganesha_mon --clone failed"
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed"
fi
- pcs resource create nfs-grace ganesha_grace --clone
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs-grace ganesha_grace --clone failed"
+ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
fi
pcs cluster cib ${cibfile}
@@ -530,21 +528,6 @@ setup_create_resources()
logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s failed"
fi
- pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed"
- fi
-
- pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed"
- fi
-
- pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed"
- fi
-
pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
@@ -567,6 +550,13 @@ teardown_resources()
{
# local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
+ # restore /var/lib/nfs
+ logger "notice: pcs resource delete nfs_setup-clone"
+ pcs resource delete nfs_setup-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs_setup-clone failed"
+ fi
+
# delete -clone resource agents
# in particular delete the ganesha monitor so we don't try to
# trigger anything when we shut down ganesha next.
@@ -580,32 +570,11 @@ teardown_resources()
logger "warning: pcs resource delete nfs-grace-clone failed"
fi
- # unmount the HA-state volume and terminate ganesha.nfsd on all nodes
- pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create nfs_stop ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed"
- fi
- sleep 1
- # cloned resources seem to never have their start() invoked when they
- # are created, but stop() is invoked when they are destroyed. Why???.
- pcs resource delete nfs_stop-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_stop-clone failed"
- fi
-
while [[ ${1} ]]; do
pcs resource delete ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs resource delete ${1}-cluster_ip-1 failed"
fi
- pcs resource delete ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete ${1}-trigger_ip-1 failed"
- fi
- pcs resource delete ${1}-dead_ip-1
- if [ $? -ne 0 ]; then
- logger "info: pcs resource delete ${1}-dead_ip-1 failed"
- fi
shift
done
@@ -632,21 +601,6 @@ recreate_resources()
logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=10s failed"
fi
- pcs -f ${cibfile} resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${1}-trigger_ip-1 ocf:heartbeat:Dummy failed"
- fi
-
- pcs -f ${cibfile} constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${1}-cluster_ip-1 with ${1}-trigger_ip-1 failed"
- fi
-
- pcs -f ${cibfile} constraint order ${1}-trigger_ip-1 then nfs-grace-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${1}-trigger_ip-1 then nfs-grace-clone failed"
- fi
-
pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
@@ -670,21 +624,6 @@ addnode_recreate_resources()
logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${add_vip} cidr_netmask=32 op monitor interval=10s failed"
fi
- pcs -f ${cibfile} resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource create ${add_node}-trigger_ip-1 ocf:heartbeat:Dummy failed"
- fi
-
- pcs -f ${cibfile} constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint colocation add ${add_node}-cluster_ip-1 with ${add_node}-trigger_ip-1 failed"
- fi
-
- pcs -f ${cibfile} constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs constraint order ${add_node}-trigger_ip-1 then nfs-grace-clone failed"
- fi
-
pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
if [ $? -ne 0 ]; then
logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
@@ -702,11 +641,6 @@ clear_resources()
logger "warning: pcs -f ${cibfile} resource delete ${1}-cluster_ip-1"
fi
- pcs -f ${cibfile} resource delete ${1}-trigger_ip-1
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource delete ${1}-trigger_ip-1"
- fi
-
shift
done
}
@@ -718,52 +652,19 @@ addnode_create_resources()
local add_vip=${1}; shift
local cibfile=$(mktemp -u)
- # mount the HA-state volume and start ganesha.nfsd on the new node
- pcs cluster cib ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib ${cibfile} failed"
- fi
-
- pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource create nfs_start-${add_node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
- fi
-
- pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${add_node}=INFINITY
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint location nfs_start-${add_node} prefers ${add_node}=INFINITY failed"
- fi
-
- pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint order nfs_start-${add_node} then nfs-mon-clone failed"
- fi
-
- pcs cluster cib-push ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib-push ${cibfile} failed"
- fi
- rm -f ${cibfile}
-
# start HA on the new node
pcs cluster start ${add_node}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster start ${add_node} failed"
fi
- pcs resource delete nfs_start-${add_node}
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_start-${add_node} failed"
- fi
-
-
pcs cluster cib ${cibfile}
if [ $? -ne 0 ]; then
logger "warning: pcs cluster cib ${cibfile} failed"
fi
- # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
- # clearing their constraints, then create them again so we can
+ # delete all the -cluster_ip-1 resources, clearing
+ # their constraints, then create them again so we can
# recompute their constraints
clear_resources ${cibfile} ${HA_SERVERS}
addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
@@ -805,31 +706,6 @@ deletenode_delete_resources()
fi
rm -f ${cibfile}
- pcs cluster cib ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib ${cibfile} failed"
- fi
-
- pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT}
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} resource create nfs_stop-${node} ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} failed"
- fi
-
- pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY
- if [ $? -ne 0 ]; then
- logger "warning: pcs -f ${cibfile} constraint location nfs_stop-${node} prefers ${node}=INFINITY failed"
- fi
-
- pcs cluster cib-push ${cibfile}
- if [ $? -ne 0 ]; then
- logger "warning: pcs cluster cib-push ${cibfile} failed"
- fi
- rm -f ${cibfile}
-
- pcs resource delete nfs_stop-${node}
- if [ $? -ne 0 ]; then
- logger "warning: pcs resource delete nfs_stop-${node} failed"
- fi
}
@@ -973,11 +849,12 @@ main()
setup_create_resources ${HA_SERVERS}
+ setup_finalize_ha
+
setup_state_volume ${HA_SERVERS}
setup_copy_config ${HA_SERVERS}
- setup_finalize
else
logger "insufficient servers for HA, aborting"
@@ -1018,15 +895,15 @@ main()
fi
addnode_create_resources ${node} ${vip}
- #Subsequent add-node recreates resources for all the nodes
- #that already exist in the cluster. The nodes are picked up
- #from the entries in the ganesha-ha.conf file. Adding the
- #newly added node to the file so that the resources specfic
- #to this node is correctly recreated in the future.
+ # Subsequent add-node recreates resources for all the nodes
+ # that already exist in the cluster. The nodes are picked up
+ # from the entries in the ganesha-ha.conf file. Adding the
+ # newly added node to the file so that the resources specfic
+ # to this node is correctly recreated in the future.
clean_node=${node//[-.]/_}
echo "VIP_$clean_node=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
- NEW_NODES="$HA_CLUSTER_NODES,$node"
+ NEW_NODES="$HA_CLUSTER_NODES,${node}"
sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
$HA_CONFDIR/ganesha-ha.conf
@@ -1053,7 +930,7 @@ $HA_CONFDIR/ganesha-ha.conf
setup_copy_config ${HA_SERVERS}
- rm -rf ${HA_VOL_MNT}/nfs-ganesha/{node}
+ rm -rf ${HA_VOL_MNT}/nfs-ganesha/${node}
determine_service_manager
@@ -1074,7 +951,7 @@ $HA_CONFDIR/ganesha-ha.conf
refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
;;
- *)
+ *)
# setup and teardown are not intended to be used by a
# casual user
usage