diff options
| author | Kaleb S. KEITHLEY <kkeithle@redhat.com> | 2017-02-20 12:00:02 -0500 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-02-20 18:21:27 -0500 | 
| commit | 142ee0c200f1fbb727c6f58daf2352f69e07c029 (patch) | |
| tree | 0987c8cb1ef33a432d3dc2c26a1b99baebed1036 /extras | |
| parent | 6cf59d308371715704b397c851683490a897da8c (diff) | |
common-ha: revert switch to storhaug
we'll try again for 3.11.
Change-Id: Ib4c50414dc3d16a23dc7a49020445129da55843c
BUG: 1410843
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://review.gluster.org/16692
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'extras')
| -rw-r--r-- | extras/ganesha/Makefile.am | 2 | ||||
| -rw-r--r-- | extras/ganesha/ocf/Makefile.am | 12 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_grace | 222 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_mon | 235 | ||||
| -rw-r--r-- | extras/ganesha/ocf/ganesha_nfsd | 168 | ||||
| -rw-r--r-- | extras/ganesha/scripts/Makefile.am | 6 | ||||
| -rw-r--r-- | extras/ganesha/scripts/ganesha-ha.sh | 1158 | 
7 files changed, 1800 insertions, 3 deletions
diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am index 542de68156b..9eaa401b6c8 100644 --- a/extras/ganesha/Makefile.am +++ b/extras/ganesha/Makefile.am @@ -1,2 +1,2 @@ -SUBDIRS = scripts config +SUBDIRS = scripts config ocf  CLEANFILES = diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am new file mode 100644 index 00000000000..6aed9548a0f --- /dev/null +++ b/extras/ganesha/ocf/Makefile.am @@ -0,0 +1,12 @@ +EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd + +# The root of the OCF resource agent hierarchy +# Per the OCF standard, it's always "lib", +# not "lib64" (even on 64-bit platforms). +ocfdir = $(prefix)/lib/ocf + +# The provider directory +radir = $(ocfdir)/resource.d/heartbeat + +ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd + diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace new file mode 100644 index 00000000000..cb6dcc4e867 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_grace @@ -0,0 +1,222 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +#                    All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like.  Any license provided herein, whether implied or +# otherwise, applies only to this software file.  Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "$OCF_DEBUG_LIBRARY" ]; then +	. $OCF_DEBUG_LIBRARY +else +	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_grace_active_default="grace-active" +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} + +ganesha_meta_data() { +	cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_grace"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +</parameters> + +<actions> +<action name="start"   timeout="40s" /> +<action name="stop"    timeout="40s" /> +<action name="status"  timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="5s" /> +<action name="notify"  timeout="10s" /> +<action name="meta-data"  timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_grace_usage() { +	echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in +	meta-data)	ganesha_meta_data +			exit ${OCF_SUCCESS} +			;; +	usage|help)	ganesha_usage +			exit ${OCF_SUCCESS} +			;; +	*) +			;; +esac + +ganesha_grace_start() +{ +	local rc=${OCF_ERR_GENERIC} +	local host=$(hostname -s) + +	ocf_log debug "ganesha_grace_start()" +	# give ganesha_mon RA a chance to set the crm_attr first +	# I mislike the sleep, but it's not clear that looping +	# with a small sleep is necessarily better +	# start has a 40sec timeout, so a 5sec sleep here is okay +        sleep 5 +	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) +        if [ $? -ne 0 ]; then +		host=$(hostname) +		attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) +                if [ $? -ne 0 ]; then +	                ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" +                fi +        fi + +	# Three possibilities: +	# 1. There is no attribute at all and attr_updater returns +	#    a zero length string. This happens when +	#    ganesha_mon::monitor hasn't run at least once to set +	#    the attribute. The assumption here is that the system +	#    is coming up. We pretend, for now, that the node is +	#    healthy, to allow the system to continue coming up. +	#    It will cure itself in a few seconds +	# 2. There is an attribute, and it has the value "1"; this +	#    node is healthy. +	# 3. There is an attribute, but it has no value or the value +	#    "0"; this node is not healthy. + +	# case 1 +	if [[ -z "${attr}" ]]; then +		return ${OCF_SUCCESS} +	fi + +	# case 2 +	if [[ "${attr}" = *"value=1" ]]; then +		return ${OCF_SUCCESS} +	fi + +	# case 3 +	return ${OCF_NOT_RUNNING} +} + +ganesha_grace_stop() +{ + +	ocf_log debug "ganesha_grace_stop()" +	return ${OCF_SUCCESS} +} + +ganesha_grace_notify() +{ +        # since this is a clone RA we should only ever see pre-start +        # or post-stop +	mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" +	case "${mode}" in +	pre-start | post-stop) +		dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} +		if [ $? -ne 0 ]; then +			ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" +		fi +		;; +	esac + +	return ${OCF_SUCCESS} +} + +ganesha_grace_monitor() +{ +	local host=$(hostname -s) + +	ocf_log debug "monitor" + +	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) +        if [ $? -ne 0 ]; then +		host=$(hostname) +	        attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) +                if [ $? -ne 0 ]; then +	                ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" +                fi +        fi + +	# if there is no attribute (yet), maybe it's because +	# this RA started before ganesha_mon (nfs-mon) has had +	# chance to create it. In which case we'll pretend +	# everything is okay this time around +	if [[ -z "${attr}" ]]; then +		return ${OCF_SUCCESS} +	fi + +	if [[ "${attr}" = *"value=1" ]]; then +		return ${OCF_SUCCESS} +	fi + +	return ${OCF_NOT_RUNNING} +} + +ganesha_grace_validate() +{ +	return ${OCF_SUCCESS} +} + +ganesha_grace_validate + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start)          ganesha_grace_start +		;; +stop)           ganesha_grace_stop +		;; +status|monitor) ganesha_grace_monitor +		;; +notify)         ganesha_grace_notify +		;; +*)              ganesha_grace_usage +		exit ${OCF_ERR_UNIMPLEMENTED} +		;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc + diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon new file mode 100644 index 00000000000..7d2c268d412 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_mon @@ -0,0 +1,235 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +#                    All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like.  Any license provided herein, whether implied or +# otherwise, applies only to this software file.  Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then +	. ${OCF_DEBUG_LIBRARY} +else +	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +# Defaults +OCF_RESKEY_ganesha_active_default="ganesha-active" +OCF_RESKEY_grace_active_default="grace-active" +OCF_RESKEY_grace_delay_default="5" + +: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} +: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} + +ganesha_meta_data() { +	cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_mon"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ganesha_active"> +<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc> +<content type="string" default="ganesha-active" /> +</parameter> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +<parameter name="grace_delay"> +<longdesc lang="en"> +NFS-Ganesha grace delay. +When changing this, adjust the ganesha_grace RA's monitor interval to match. +</longdesc> +<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc> +<content type="string" default="5" /> +</parameter> +</parameters> + +<actions> +<action name="start"   timeout="40s" /> +<action name="stop"    timeout="40s" /> +<action name="status"  timeout="20s" interval="60s" /> +<action name="monitor" depth="0"  timeout="10s" interval="10s" /> +<action name="meta-data"  timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_mon_usage() { +	echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case ${__OCF_ACTION} in +	meta-data)	ganesha_meta_data +			exit ${OCF_SUCCESS} +			;; +	usage|help)	ganesha_usage +			exit ${OCF_SUCCESS} +			;; +	*) +			;; +esac + +ganesha_mon_start() +{ +	ocf_log debug "ganesha_mon_start" +	ganesha_mon_monitor +	return $OCF_SUCCESS +} + +ganesha_mon_stop() +{ +	ocf_log debug "ganesha_mon_stop" +	return $OCF_SUCCESS +} + +ganesha_mon_monitor() +{ +	local host=$(hostname -s) +	local pid_file="/var/run/ganesha.pid" +	local rhel6_pid_file="/var/run/ganesha.nfsd.pid" +	local proc_pid="/proc/" + +	# RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid +	# RHEL7 systemd does not. Would be nice if all distros used the +	# same pid file. +	if [ -e ${rhel6_pid_file} ]; then +		pid_file=${rhel6_pid_file} +	fi +	if [ -e ${pid_file} ]; then +		proc_pid="${proc_pid}$(cat ${pid_file})" +	fi + +	if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then + +		attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 +		if [ $? -ne 0 ]; then +			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" +		fi + +		# ganesha_grace (nfs-grace) RA follows grace-active attr +		# w/ constraint location +		attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 +		if [ $? -ne 0 ]; then +			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" +		fi + +		# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) +		# track grace-active crm_attr (attr != crm_attr) +		# we can't just use the attr as there's no way to query +		# its value in RHEL6 pacemaker + +		crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null +		if [ $? -ne 0 ]; then +			host=$(hostname) +			crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null +			if [ $? -ne 0 ]; then +				ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" +			fi +		fi + +		return ${OCF_SUCCESS} +	fi + +	# VIP fail-over is triggered by clearing the +	# ganesha-active node attribute on this node. +	# +	# Meanwhile the ganesha_grace notify() runs when its +	# nfs-grace resource is disabled on a node; which +	# is triggered by clearing the grace-active attribute +	# on this node. +	# +	# We need to allow time for it to run and put +	# the remaining ganesha.nfsds into grace before +	# initiating the VIP fail-over. + +	attrd_updater -D -n ${OCF_RESKEY_grace_active} +	if [ $? -ne 0 ]; then +		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" +	fi + +	host=$(hostname -s) +	crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null +	if [ $? -ne 0 ]; then +		host=$(hostname) +		crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null +		if [ $? -ne 0 ]; then +			ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" +		fi +	fi + +	sleep ${OCF_RESKEY_grace_delay} + +	attrd_updater -D -n ${OCF_RESKEY_ganesha_active} +	if [ $? -ne 0 ]; then +		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" +	fi + +	return ${OCF_SUCCESS} +} + +ganesha_mon_validate() +{ +	return ${OCF_SUCCESS} +} + +ganesha_mon_validate + +# Translate each action into the appropriate function call +case ${__OCF_ACTION} in +start)          ganesha_mon_start +		;; +stop)           ganesha_mon_stop +		;; +status|monitor) ganesha_mon_monitor +		;; +*)              ganesha_mon_usage +		exit ${OCF_ERR_UNIMPLEMENTED} +		;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc + diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd new file mode 100644 index 00000000000..29e333ca903 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_nfsd @@ -0,0 +1,168 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +#                    All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like.  Any license provided herein, whether implied or +# otherwise, applies only to this software file.  Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then +	. ${OCF_DEBUG_LIBRARY} +else +	: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +	. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_ha_vol_mnt_default="/var/run/gluster/shared_storage" +: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + +ganesha_meta_data() { +	cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_nfsd"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ha_vol_mnt"> +<longdesc lang="en">HA State Volume Mount Point</longdesc> +<shortdesc lang="en">HA_State Volume Mount Point</shortdesc> +<content type="string" default="" /> +</parameter> +</parameters> + +<actions> +<action name="start"   timeout="5s" /> +<action name="stop"    timeout="5s" /> +<action name="status" depth="0"  timeout="5s" interval="0" /> +<action name="monitor" depth="0"  timeout="5s" interval="0" /> +<action name="meta-data"  timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_nfsd_usage() { +	echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in +	meta-data)	ganesha_meta_data +			exit ${OCF_SUCCESS} +			;; +	usage|help)	ganesha_usage +			exit ${OCF_SUCCESS} +			;; +	*) +			;; +esac + +ganesha_nfsd_start() +{ +	local long_host=$(hostname) + +	if [[ -d /var/lib/nfs ]]; then +		mv /var/lib/nfs /var/lib/nfs.backup +		if [ $? -ne 0 ]; then +			ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" +		fi +		ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs +		if [ $? -ne 0 ]; then +			ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" +		fi +	fi + +	return ${OCF_SUCCESS} +} + +ganesha_nfsd_stop() +{ + +	if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then +		rm -f /var/lib/nfs +		if [ $? -ne 0 ]; then +			ocf_log notice "rm -f /var/lib/nfs failed" +		fi +		mv /var/lib/nfs.backup /var/lib/nfs +		if [ $? -ne 0 ]; then +			ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" +		fi +	fi + +	return ${OCF_SUCCESS} +} + +ganesha_nfsd_monitor() +{ +	# pacemaker checks to see if RA is already running before starting it. +	# if we return success, then it's presumed it's already running and +	# doesn't need to be started, i.e. invoke the start action. +	# return something other than success to make pacemaker invoke the +	# start action +	if [[ -L /var/lib/nfs ]]; then +		return ${OCF_SUCCESS} +	fi +	return ${OCF_NOT_RUNNING} +} + +ganesha_nfsd_validate() +{ +	return ${OCF_SUCCESS} +} + +ganesha_nfsd_validate + +# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start)          ganesha_nfsd_start +		;; +stop)           ganesha_nfsd_stop +		;; +status|monitor) ganesha_nfsd_monitor +		;; +*)              ganesha_nfsd_usage +		exit ${OCF_ERR_UNIMPLEMENTED} +		;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc + diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am index 9ee8867ebcc..ca46e5bcd98 100644 --- a/extras/ganesha/scripts/Makefile.am +++ b/extras/ganesha/scripts/Makefile.am @@ -1,4 +1,6 @@ -EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh +EXTRA_DIST = ganesha-ha.sh dbus-send.sh create-export-ganesha.sh \ +             generate-epoch.py  scriptsdir = $(libexecdir)/ganesha -scripts_SCRIPTS = create-export-ganesha.sh generate-epoch.py +scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh ganesha-ha.sh \ +                  generate-epoch.py diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh new file mode 100644 index 00000000000..5b26702a07b --- /dev/null +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -0,0 +1,1158 @@ +#!/bin/bash + +# Copyright 2015-2016 Red Hat Inc.  All Rights Reserved +# +# Pacemaker+Corosync High Availability for NFS-Ganesha +# +# setup, teardown, add, delete, refresh-config, and status +# +# Each participating node in the cluster is assigned a virtual IP (VIP) +# which fails over to another node when its associated ganesha.nfsd dies +# for any reason. After the VIP is moved to another node all the +# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE. +# +# There are six resource agent types used: ganesha_mon, ganesha_grace, +# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the +# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put +# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start +# and stop the ganesha.nfsd during setup and teardown. IPaddr manages +# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to +# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to +# the new host. + +HA_NUM_SERVERS=0 +HA_SERVERS="" +HA_VOL_NAME="gluster_shared_storage" +HA_VOL_MNT="/var/run/gluster/shared_storage" +HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" +SERVICE_MAN="DISTRO_NOT_FOUND" + +RHEL6_PCS_CNAME_OPTION="--name" +SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" + +# UNBLOCK RA uses shared_storage which may become unavailable +# during any of the nodes reboot. Hence increase timeout value. +PORTBLOCK_UNBLOCK_TIMEOUT="60s" + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] +        then +        . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] +        then +        . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] +        then +        . /etc/default/ganesha +fi + +GANESHA_CONF= + +function find_rhel7_conf +{ + while [[ $# > 0 ]] +        do +                key="$1" +                case $key in +                        -f) +                         CONFFILE="$2" +                         break; +                         ;; +                         *) +                         ;; +                 esac +                 shift +         done +} + +if [ -z $CONFFILE ] +        then +        find_rhel7_conf $OPTIONS + +fi + +GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} + +usage() { + +        echo "Usage      : add|delete|refresh-config|status" +        echo "Add-node   : ganesha-ha.sh --add <HA_CONF_DIR> \ +<NODE-HOSTNAME>  <NODE-VIP>" +        echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ +<NODE-HOSTNAME>" +        echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" +        echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" +} + +determine_service_manager () { + +        if [ -e "/usr/bin/systemctl" ]; +        then +                SERVICE_MAN="/usr/bin/systemctl" +        elif [ -e "/sbin/invoke-rc.d" ]; +        then +                SERVICE_MAN="/sbin/invoke-rc.d" +        elif [ -e "/sbin/service" ]; +        then +                SERVICE_MAN="/sbin/service" +        fi +        if [ "$SERVICE_MAN" == "DISTRO_NOT_FOUND" ] +        then +                echo "Service manager not recognized, exiting" +                exit 1 +        fi +} + +manage_service () +{ +        local action=${1} +        local new_node=${2} +        local option= + +        if [ "$action" == "start" ]; then +                option="yes" +        else +                option="no" +        fi +        ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "/usr/libexec/ganesha/ganesha-ha.sh --setup-ganesha-conf-files $HA_CONFDIR $option" + +        if [ "$SERVICE_MAN" == "/usr/bin/systemctl" ] +        then +                ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "$SERVICE_MAN  ${action} nfs-ganesha" +        else +                ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "$SERVICE_MAN nfs-ganesha ${action}" +        fi +} + + +check_cluster_exists() +{ +    local name=${1} +    local cluster_name="" + +    if [ -e /var/run/corosync.pid ]; then +        cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) +        if [ ${cluster_name} -a ${cluster_name} = ${name} ]; then +            logger "$name already exists, exiting" +            exit 0 +        fi +    fi +} + + +determine_servers() +{ +    local cmd=${1} +    local num_servers=0 +    local tmp_ifs=${IFS} +    local ha_servers="" + +    if [ "X${cmd}X" != "XsetupX" -a "X${cmd}X" != "XstatusX" ]; then +        ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') +        IFS=$' ' +        for server in ${ha_servers} ; do +            num_servers=$(expr ${num_servers} + 1) +        done +        IFS=${tmp_ifs} +        HA_NUM_SERVERS=${num_servers} +        HA_SERVERS="${ha_servers}" +    else +        IFS=$',' +        for server in ${HA_CLUSTER_NODES} ; do +            num_servers=$(expr ${num_servers} + 1) +        done +        IFS=${tmp_ifs} +        HA_NUM_SERVERS=${num_servers} +        HA_SERVERS="${HA_CLUSTER_NODES//,/ }" +    fi +} + + +setup_cluster() +{ +    local name=${1} +    local num_servers=${2} +    local servers=${3} +    local unclean="" +    local quorum_policy="stop" + +    logger "setting up cluster ${name} with the following ${servers}" + +    pcs cluster auth ${servers} +    # pcs cluster setup --name ${name} ${servers} +    pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} --transport udpu ${servers} +    if [ $? -ne 0 ]; then +        logger "pcs cluster setup ${RHEL6_PCS_CNAME_OPTION} ${name} ${servers} failed" +        exit 1; +    fi + +    # BZ 1284404, 1425110, allow time for SSL certs to propagate, until then +    # pcsd will not accept connections. +    sleep 12 +    pcs cluster start --all +    while [ $? -ne 0 ]; do +        sleep 2 +        pcs cluster start --all +    done + +    # wait for the cluster to elect a DC before querying or writing +    # to the CIB. BZ 1334092 +    crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 +    while [ $? -ne 0 ]; do +        crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 +    done + +    unclean=$(pcs status | grep -u "UNCLEAN") +    while [[ "${unclean}X" = "UNCLEANX" ]]; do +         sleep 1 +         unclean=$(pcs status | grep -u "UNCLEAN") +    done +    sleep 1 + +    if [ ${num_servers} -lt 3 ]; then +        quorum_policy="ignore" +    fi +    pcs property set no-quorum-policy=${quorum_policy} +    if [ $? -ne 0 ]; then +        logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" +    fi + +    pcs property set stonith-enabled=false +    if [ $? -ne 0 ]; then +        logger "warning: pcs property set stonith-enabled=false failed" +    fi +} + + +setup_finalize_ha() +{ +    local cibfile=${1} +    local stopped="" + +    stopped=$(pcs status | grep -u "Stopped") +    while [[ "${stopped}X" = "StoppedX" ]]; do +         sleep 1 +         stopped=$(pcs status | grep -u "Stopped") +    done +} + + +refresh_config () +{ +        local short_host=$(hostname -s) +        local VOL=${1} +        local HA_CONFDIR=${2} +        local short_host=$(hostname -s) + +        local removed_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ +                          awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + +        if [ -e ${SECRET_PEM} ]; then +        while [[ ${3} ]]; do +            current_host=`echo ${3} | cut -d "." -f 1` +            if [ ${short_host} != ${current_host} ]; then +                output=$(ssh -oPasswordAuthentication=no \ +-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ +"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \ +uint16:$removed_id 2>&1") +                ret=$? +                logger <<< "${output}" +                if [ ${ret} -ne 0 ]; then +                       echo "Error: refresh-config failed on ${current_host}." +                       exit 1 +                fi +                sleep 1 +                output=$(ssh -oPasswordAuthentication=no \ +-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ +"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:\"EXPORT(Export_Id=$removed_id)\" 2>&1") +                ret=$? +                logger <<< "${output}" +                if [ ${ret} -ne 0 ]; then +                        echo "Error: refresh-config failed on ${current_host}." +                        exit 1 +                else +                        echo "Refresh-config completed on ${current_host}." +                fi + +          fi +          shift +        done +    else +        echo "Error: refresh-config failed. Passwordless ssh is not enabled." +        exit 1 +    fi + +    # Run the same command on the localhost, +        output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.RemoveExport \ +uint16:$removed_id 2>&1) +        ret=$? +        logger <<< "${output}" +        if [ ${ret} -ne 0 ]; then +                echo "Error: refresh-config failed on localhost." +                exit 1 +        fi +        sleep 1 +        output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:"EXPORT(Export_Id=$removed_id)" 2>&1) +        ret=$? +        logger <<< "${output}" +        if [ ${ret} -ne 0 ] ; then +                echo "Error: refresh-config failed on localhost." +                exit 1 +        else +                echo "Success: refresh-config completed." +        fi +} + + +teardown_cluster() +{ +    local name=${1} + +    for server in ${HA_SERVERS} ; do +        if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then +            logger "info: ${server} is not in config, removing" + +            pcs cluster stop ${server} --force +            if [ $? -ne 0 ]; then +                logger "warning: pcs cluster stop ${server} failed" +            fi + +            pcs cluster node remove ${server} +            if [ $? -ne 0 ]; then +                logger "warning: pcs cluster node remove ${server} failed" +            fi +        fi +    done + +    # BZ 1193433 - pcs doesn't reload cluster.conf after modification +    # after teardown completes, a subsequent setup will appear to have +    # 'remembered' the deleted node. You can work around this by +    # issuing another `pcs cluster node remove $node`, +    # `crm_node -f -R $server`, or +    # `cibadmin --delete --xml-text '<node id="$server" +    # uname="$server"/>' + +    pcs cluster stop --all +    if [ $? -ne 0 ]; then +        logger "warning pcs cluster stop --all failed" +    fi + +    pcs cluster destroy +    if [ $? -ne 0 ]; then +        logger "error pcs cluster destroy failed" +        exit 1 +    fi +} + + +cleanup_ganesha_config () +{ +    rm -f /etc/corosync/corosync.conf +    rm -rf /etc/cluster/cluster.conf* +    rm -rf /var/lib/pacemaker/cib/* +} + +do_create_virt_ip_constraints() +{ +    local cibfile=${1}; shift +    local primary=${1}; shift +    local weight="1000" + +    # first a constraint location rule that says the VIP must be where +    # there's a ganesha.nfsd running +    pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 +    if [ $? -ne 0 ]; then +        logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" +    fi + +    # then a set of constraint location prefers to set the prefered order +    # for where a VIP should move +    while [[ ${1} ]]; do +        pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} +        if [ $? -ne 0 ]; then +            logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" +        fi +        weight=$(expr ${weight} + 1000) +        shift +    done +    # and finally set the highest preference for the VIP to its home node +    # default weight when created is/was 100. +    # on Fedora setting appears to be additive, so to get the desired +    # value we adjust the weight +    # weight=$(expr ${weight} - 100) +    pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} +    if [ $? -ne 0 ]; then +        logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" +    fi +} + + +wrap_create_virt_ip_constraints() +{ +    local cibfile=${1}; shift +    local primary=${1}; shift +    local head="" +    local tail="" + +    # build a list of peers, e.g. for a four node cluster, for node1, +    # the result is "node2 node3 node4"; for node2, "node3 node4 node1" +    # and so on. +    while [[ ${1} ]]; do +        if [ "${1}" = "${primary}" ]; then +            shift +            while [[ ${1} ]]; do +                tail=${tail}" "${1} +                shift +            done +        else +            head=${head}" "${1} +        fi +        shift +    done +    do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head} +} + + +create_virt_ip_constraints() +{ +    local cibfile=${1}; shift + +    while [[ ${1} ]]; do +        wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS} +        shift +    done +} + + +setup_create_resources() +{ +    local cibfile=$(mktemp -u) + +    # fixup /var/lib/nfs +    logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone" +    pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} --clone failed" +    fi + +    pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon --clone failed" +    fi + +    # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace +    # start method. Allow time for ganesha_mon to start and set the +    # ganesha-active crm_attribute +    sleep 5 + +    pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone meta notify=true +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace --clone failed" +    fi + +    pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 +    if [ $? -ne 0 ]; then +        logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" +    fi + +    pcs cluster cib ${cibfile} + +    while [[ ${1} ]]; do + +        # this is variable indirection +        # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"' +        # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...) +        # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1') +        # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"') +        # after the `eval ${clean_nvs}` there is a variable VIP_host_1 +        # with the value '10_7_6_5', and the following \$$ magic to +        # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us +        # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s +        # to give us ipaddr="10.7.6.5". whew! +        name="VIP_${1}" +        clean_name=${name//[-.]/_} +        nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) +        clean_nvs=${nvs//[-.]/_} +        eval ${clean_nvs} +        eval tmp_ipaddr=\$${clean_name} +        ipaddr=${tmp_ipaddr//_/.} + +        pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ +        portno=2049 action=block ip=${ipaddr} --group ${1}-group +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-nfs_block failed" +        fi +        pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ +        cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ +            cidr_netmask=32 op monitor interval=15s failed" +        fi + +        pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 +        if [ $? -ne 0 ]; then +            logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" +        fi + +        pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ +        portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ +        tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ +        op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ +        op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-nfs_unblock failed" +        fi + + +        shift +    done + +    create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + +    pcs cluster cib-push ${cibfile} +    if [ $? -ne 0 ]; then +        logger "warning pcs cluster cib-push ${cibfile} failed" +    fi +    rm -f ${cibfile} +} + + +teardown_resources() +{ +    # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) + +    # restore /var/lib/nfs +    logger "notice: pcs resource delete nfs_setup-clone" +    pcs resource delete nfs_setup-clone +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource delete nfs_setup-clone failed" +    fi + +    # delete -clone resource agents +    # in particular delete the ganesha monitor so we don't try to +    # trigger anything when we shut down ganesha next. +    pcs resource delete nfs-mon-clone +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource delete nfs-mon-clone failed" +    fi + +    pcs resource delete nfs-grace-clone +    if [ $? -ne 0 ]; then +        logger "warning: pcs resource delete nfs-grace-clone failed" +    fi + +    while [[ ${1} ]]; do +        pcs resource delete ${1}-group +        if [ $? -ne 0 ]; then +            logger "warning: pcs resource delete ${1}-group failed" +        fi +        shift +    done + +} + + +recreate_resources() +{ +    local cibfile=${1}; shift + +    while [[ ${1} ]]; do +        # this is variable indirection +        # see the comment on the same a few lines up +        name="VIP_${1}" +        clean_name=${name//[-.]/_} +        nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) +        clean_nvs=${nvs//[-.]/_} +        eval ${clean_nvs} +        eval tmp_ipaddr=\$${clean_name} +        ipaddr=${tmp_ipaddr//_/.} + +        pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ +        portno=2049 action=block ip=${ipaddr} --group ${1}-group +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-nfs_block failed" +        fi +        pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ +        cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ +            cidr_netmask=32 op monitor interval=15s failed" +        fi + +        pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 +        if [ $? -ne 0 ]; then +            logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" +        fi + +        pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ +        portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ +        tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ +        op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ +        op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} +        if [ $? -ne 0 ]; then +            logger "warning pcs resource create ${1}-nfs_unblock failed" +        fi + +        shift +    done +} + + +addnode_recreate_resources() +{ +    local cibfile=${1}; shift +    local add_node=${1}; shift +    local add_vip=${1}; shift + +    recreate_resources ${cibfile} ${HA_SERVERS} + +    pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ +    protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group +    if [ $? -ne 0 ]; then +        logger "warning pcs resource create ${add_node}-nfs_block failed" +    fi +    pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ +    ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ +    --after ${add_node}-nfs_block +    if [ $? -ne 0 ]; then +        logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ +	ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" +    fi + +    pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 +    if [ $? -ne 0 ]; then +        logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" +    fi +    pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ +    protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ +    tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ +    ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ +    timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ +    timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} +    if [ $? -ne 0 ]; then +        logger "warning pcs resource create ${add_node}-nfs_unblock failed" +    fi +} + + +clear_resources() +{ +    local cibfile=${1}; shift + +    while [[ ${1} ]]; do +        pcs -f ${cibfile} resource delete ${1}-group +        if [ $? -ne 0 ]; then +            logger "warning: pcs -f ${cibfile} resource delete ${1}-group" +        fi + +        shift +    done +} + + +addnode_create_resources() +{ +    local add_node=${1}; shift +    local add_vip=${1}; shift +    local cibfile=$(mktemp -u) + +    # start HA on the new node +    pcs cluster start ${add_node} +    if [ $? -ne 0 ]; then +       logger "warning: pcs cluster start ${add_node} failed" +    fi + +    pcs cluster cib ${cibfile} +    if [ $? -ne 0 ]; then +        logger "warning: pcs cluster cib ${cibfile} failed" +    fi + +    # delete all the -cluster_ip-1 resources, clearing +    # their constraints, then create them again so we can +    # recompute their constraints +    clear_resources ${cibfile} ${HA_SERVERS} +    addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} + +    HA_SERVERS="${HA_SERVERS} ${add_node}" +    create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + +    pcs cluster cib-push ${cibfile} +    if [ $? -ne 0 ]; then +        logger "warning: pcs cluster cib-push ${cibfile} failed" +    fi +    rm -f ${cibfile} +} + + +deletenode_delete_resources() +{ +    local node=${1}; shift +    local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//) +    local cibfile=$(mktemp -u) + +    pcs cluster cib ${cibfile} +    if [ $? -ne 0 ]; then +        logger "warning: pcs cluster cib ${cibfile} failed" +    fi + +    # delete all the -cluster_ip-1 and -trigger_ip-1 resources, +    # clearing their constraints, then create them again so we can +    # recompute their constraints +    clear_resources ${cibfile} ${HA_SERVERS} +    recreate_resources ${cibfile} ${ha_servers} +    HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/  / /") + +    create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + +    pcs cluster cib-push ${cibfile} +    if [ $? -ne 0 ]; then +        logger "warning: pcs cluster cib-push ${cibfile} failed" +    fi +    rm -f ${cibfile} + +} + + +deletenode_update_haconfig() +{ +    local name="VIP_${1}" +    local clean_name=${name//[-.]/_} + +    ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") +    sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf +} + + +setup_state_volume() +{ +    local mnt=${HA_VOL_MNT} +    local longname="" +    local shortname="" +    local dname="" +    local dirname="" + +    longname=$(hostname) +    dname=${longname#$(hostname -s)} + +    while [[ ${1} ]]; do + +        if [[ ${1} == *${dname} ]]; then +            dirname=${1} +        else +            dirname=${1}${dname} +        fi + +        if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then +            mkdir ${mnt}/nfs-ganesha/tickle_dir +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname} +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd +            chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd +        fi +        if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then +            touch ${mnt}/nfs-ganesha/${dirname}/nfs/state +            chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm +            chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm +        fi +        if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then +            mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak +            chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak +        fi +        if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then +            touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +        fi +        for server in ${HA_SERVERS} ; do +            if [ ${server} != ${dirname} ]; then +                ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} +                ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} +            fi +        done +        shift +    done + +} + + +addnode_state_volume() +{ +    local newnode=${1}; shift +    local mnt=${HA_VOL_MNT} +    local longname="" +    local dname="" +    local dirname="" + +    longname=$(hostname) +    dname=${longname#$(hostname -s)} + +    if [[ ${newnode} == *${dname} ]]; then +        dirname=${newnode} +    else +        dirname=${newnode}${dname} +    fi + +    if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname} +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd +        chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd +    fi +    if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then +        touch ${mnt}/nfs-ganesha/${dirname}/nfs/state +        chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm +        chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm +    fi +    if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then +        mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak +        chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak +    fi +    if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then +        touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state +    fi + +    for server in ${HA_SERVERS} ; do +        if [[ ${server} != ${dirname} ]]; then +            ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} +            ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + +            ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} +            ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} +	fi +    done + +} + + +delnode_state_volume() +{ +    local delnode=${1}; shift +    local mnt=${HA_VOL_MNT} +    local longname="" +    local dname="" +    local dirname="" + +    longname=$(hostname) +    dname=${longname#$(hostname -s)} + +    if [[ ${delnode} == *${dname} ]]; then +        dirname=${delnode} +    else +        dirname=${delnode}${dname} +    fi + +    rm -rf ${mnt}/nfs-ganesha/${dirname} + +    for server in ${HA_SERVERS} ; do +        if [[ "${server}" != "${dirname}" ]]; then +            rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} +            rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} +        fi +    done +} + + +status() +{ +    local scratch=$(mktemp) +    local regex_str="^${1}-cluster_ip-1" +    local healthy=0 +    local index=1 +    local nodes + +    # change tabs to spaces, strip leading spaces +    pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*//" > ${scratch} + +    nodes[0]=${1}; shift + +    # make a regex of the configured nodes +    # and initalize the nodes array for later +    while [[ ${1} ]]; do + +        regex_str="${regex_str}|^${1}-cluster_ip-1" +        nodes[${index}]=${1} +        ((index++)) +        shift +    done + +    # print the nodes that are expected to be online +    grep -E "^Online:" ${scratch} + +    echo + +    # print the VIPs and which node they are on +    grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 + +    echo + +    # check if the VIP and port block/unblock RAs are on the expected nodes +    for n in ${nodes[*]}; do + +        grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} +        result=$? +        ((healthy+=${result})) +        grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} +        result=$? +        ((healthy+=${result})) +        grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} +        result=$? +        ((healthy+=${result})) +    done + +    grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} +    result=$? + +    if [ ${result} -eq 0 ]; then +        echo "Cluster HA Status: BAD" +    elif [ ${healthy} -eq 0 ]; then +        echo "Cluster HA Status: HEALTHY" +    else +        echo "Cluster HA Status: FAILOVER" +    fi + +    rm -f ${scratch} +} + +create_ganesha_conf_file() +{ +        if [ $1 == "yes" ]; +        then +                if [  -e $GANESHA_CONF ]; +                then +                        rm -rf $GANESHA_CONF +                fi +        # The symlink /etc/ganesha/ganesha.conf need to be +        # created using ganesha conf file mentioned in the +        # shared storage. Every node will only have this +        # link and actual file will stored in shared storage, +        # so that ganesha conf editing of ganesha conf will +        # be easy as well as it become more consistent. + +                ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF +        else +        # Restoring previous file +                rm -rf $GANESHA_CONF +                cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF +                sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF +        fi +} + +set_quorum_policy() +{ +    local quorum_policy="stop" +    local num_servers=${1} + +    if [ ${num_servers} -lt 3 ]; then +        quorum_policy="ignore" +    fi +    pcs property set no-quorum-policy=${quorum_policy} +    if [ $? -ne 0 ]; then +        logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" +    fi +} + +main() +{ + +    local cmd=${1}; shift +    if [[ ${cmd} == *help ]]; then +        usage +        exit 0 +    fi +    HA_CONFDIR=${1%/}; shift +    local ha_conf=${HA_CONFDIR}/ganesha-ha.conf +    local node="" +    local vip="" + +    # ignore any comment lines +    cfgline=$(grep  ^HA_NAME= ${ha_conf}) +    eval $(echo ${cfgline} | grep -F HA_NAME=) +    cfgline=$(grep  ^HA_CLUSTER_NODES= ${ha_conf}) +    eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + +    case "${cmd}" in + +    setup | --setup) +        logger "setting up ${HA_NAME}" + +        check_cluster_exists ${HA_NAME} + +        determine_servers "setup" + +        if [ "X${HA_NUM_SERVERS}X" != "X1X" ]; then + +            setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" + +            setup_create_resources ${HA_SERVERS} + +            setup_finalize_ha + +            setup_state_volume ${HA_SERVERS} + +        else + +            logger "insufficient servers for HA, aborting" +        fi +        ;; + +    teardown | --teardown) +        logger "tearing down ${HA_NAME}" + +        determine_servers "teardown" + +        teardown_resources ${HA_SERVERS} + +        teardown_cluster ${HA_NAME} + +        cleanup_ganesha_config ${HA_CONFDIR} +        ;; + +    cleanup | --cleanup) +        cleanup_ganesha_config ${HA_CONFDIR} +        ;; + +    add | --add) +        node=${1}; shift +        vip=${1}; shift + +        logger "adding ${node} with ${vip} to ${HA_NAME}" + +        determine_service_manager + +        manage_service "start" ${node} + +        determine_servers "add" + +        pcs cluster node add ${node} +        if [ $? -ne 0 ]; then +            logger "warning: pcs cluster node add ${node} failed" +        fi + +        addnode_create_resources ${node} ${vip} +        # Subsequent add-node recreates resources for all the nodes +        # that already exist in the cluster. The nodes are picked up +        # from the entries in the ganesha-ha.conf file. Adding the +        # newly added node to the file so that the resources specfic +        # to this node is correctly recreated in the future. +        clean_node=${node//[-.]/_} +        echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf + +        NEW_NODES="$HA_CLUSTER_NODES,${node}" + +        sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ +$HA_CONFDIR/ganesha-ha.conf + +        addnode_state_volume ${node} + + +        # addnode_create_resources() already appended ${node} to +        # HA_SERVERS, so only need to increment HA_NUM_SERVERS +        # and set quorum policy +        HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) +        set_quorum_policy ${HA_NUM_SERVERS} +        ;; + +    delete | --delete) +        node=${1}; shift + +        logger "deleting ${node} from ${HA_NAME}" + +        determine_servers "delete" + +        deletenode_delete_resources ${node} + +        pcs cluster node remove ${node} +        if [ $? -ne 0 ]; then +            logger "warning: pcs cluster node remove ${node} failed" +        fi + +        deletenode_update_haconfig ${node} + +        delnode_state_volume ${node} + +        determine_service_manager + +        manage_service "stop" ${node} + +        HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) +        set_quorum_policy ${HA_NUM_SERVERS} +        ;; + +    status | --status) +        determine_servers "status" + +        status ${HA_SERVERS} +        ;; + +    refresh-config | --refresh-config) +        VOL=${1} + +        determine_servers "refresh-config" + +        refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} +        ;; + +    setup-ganesha-conf-files | --setup-ganesha-conf-files) + +        create_ganesha_conf_file ${1} +        ;; + +    *) +        # setup and teardown are not intended to be used by a +        # casual user +        usage +        logger "Usage: ganesha-ha.sh add|delete|status" +        ;; + +    esac +} + +main $* +  | 
