summaryrefslogtreecommitdiffstats
path: root/extras/stop-all-gluster-processes.sh
diff options
context:
space:
mode:
Diffstat (limited to 'extras/stop-all-gluster-processes.sh')
-rwxr-xr-xextras/stop-all-gluster-processes.sh193
1 files changed, 193 insertions, 0 deletions
diff --git a/extras/stop-all-gluster-processes.sh b/extras/stop-all-gluster-processes.sh
new file mode 100755
index 00000000000..710aaf5fd3c
--- /dev/null
+++ b/extras/stop-all-gluster-processes.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+#
+# Kill all the processes/services except glusterd
+#
+# Usage: ./extras/stop-all-gluster-processes.sh [-g] [-h]
+# options:
+# -g Terminate in graceful mode
+# -h Show this message, then exit
+#
+# eg:
+# 1. ./extras/stop-all-gluster-processes.sh
+# 2. ./extras/stop-all-gluster-processes.sh -g
+#
+# By default, this script executes in force mode, i.e. all of brick, gsyncd
+# and other glustershd services/processes are killed without checking for
+# ongoing tasks such as geo-rep, self-heal, rebalance and etc. which may lead
+# to inconsistency after the node is brought back.
+#
+# On specifying '-g' option this script works in graceful mode, to maintain
+# data consistency the script fails with a valid exit code incase if any of
+# the gluster processes are busy in doing their jobs.
+#
+# The author of page [1] proposes user-defined exit codes to the range 64 - 113
+# Find the better explanation behind the choice in the link
+#
+# The exit code returned by stop-all-gluster-processes.sh:
+# 0 No errors/Success
+# 64 Rebalance is in progress
+# 65 Self-Heal is in progress
+# 66 Tier daemon running on this node
+# 127 option not found
+#
+# [1] http://www.tldp.org/LDP/abs/html/exitcodes.html
+
+
+# global
+errors=0
+
+# find the mounts and return their pids
+get_mount_pids()
+{
+ local opts
+ local pid
+
+ for opts in $(grep -w fuse.glusterfs /proc/mounts| awk '{print $1":/"$2}');
+ do
+ IFS=' ' read -r -a volinfo <<< $(echo "${opts}" | sed 's/:\// /g')
+ pid+="$(ps -Ao pid,args | grep -w "volfile-server=${volinfo[0]}" |
+ grep -w "volfile-id=/${volinfo[1]}" | grep -w "${volinfo[2]}" |
+ awk '{print $1}') "
+ done
+ echo "${pid}"
+}
+
+# handle mount processes i.e. 'glusterfs'
+kill_mounts()
+{
+ local signal=${1}
+ local pid
+
+ for pid in $(get_mount_pids);
+ do
+ echo "sending SIG${signal} to mount process with pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# handle brick processes and node services
+kill_bricks_and_services()
+{
+ local signal=${1}
+ local pidfile
+ local pid
+
+ for pidfile in $(find /var/run/gluster/ -name '*.pid');
+ do
+ local pid=$(cat ${pidfile});
+ echo "sending SIG${signal} to pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# for geo-replication, only 'monitor' has pid file written, other
+# processes are not having a pid file, so get it through 'ps' and
+# handle these processes
+kill_georep_gsync()
+{
+ local signal=${1}
+
+ # FIXME: add strick/better check
+ local gsyncpid=$(ps -Ao pid,args | grep gluster | grep gsync |
+ awk '{print $1}');
+ if [ -n "${gsyncpid}" ]
+ then
+ echo "sending SIG${signal} to geo-rep gsync process ${gsyncpid}";
+ kill -${signal} ${gsyncpid} || errors=$((${errors} + 1));
+ fi
+}
+
+# check if all processes are ready to die
+check_background_tasks()
+{
+ volumes=$(gluster vol list)
+ quit=0
+ for volname in ${volumes};
+ do
+ # tiering
+ if [[ $(gluster volume tier ${volname} status 2> /dev/null |
+ grep "localhost" | grep -c "in progress") -gt 0 ]]
+ then
+ quit=66
+ break;
+ fi
+
+ # rebalance
+ if [[ $(gluster volume rebalance ${volname} status 2> /dev/null |
+ grep -c "in progress") -gt 0 ]]
+ then
+ quit=64
+ break;
+ fi
+
+ # self heal
+ if [[ $(gluster volume heal ${volname} info | grep "Number of entries" |
+ awk '{ sum+=$4} END {print sum}') -gt 0 ]];
+ then
+ quit=65
+ break;
+ fi
+
+ # geo-rep, snapshot and quota doesn't need grace checks,
+ # as they ensures the consistancy on force kills
+ done
+
+ echo ${quit}
+}
+
+usage()
+{
+ cat <<EOM
+Usage: $0 [-g] [-h]
+ options:
+ -g Terminate in graceful mode
+ -h Show this message, then exit
+
+eg:
+ 1. $0
+ 2. $0 -g
+EOM
+}
+
+main()
+{
+ while getopts "gh" opt; do
+ case $opt in
+ g)
+ # graceful mode
+ quit=$(check_background_tasks)
+ if [[ ${quit} -ne 0 ]]
+ then
+ exit ${quit};
+ fi
+ # else safe to kill
+ ;;
+ h)
+ usage
+ exit 0;
+ ;;
+ *)
+ usage
+ exit 127;
+ ;;
+ esac
+ done
+ # remove all the options that have been parsed by getopts
+ shift $((OPTIND-1))
+
+ kill_mounts TERM
+ kill_georep_gsync TERM
+ kill_bricks_and_services TERM
+
+ sleep 5;
+ echo ""
+
+ # still not Terminated? let's pass SIGKILL
+ kill_mounts KILL
+ kill_georep_gsync KILL
+ kill_bricks_and_services KILL
+
+ exit ${errors};
+}
+
+main "$@"