diff options
Diffstat (limited to 'extras/stop-all-gluster-processes.sh')
| -rwxr-xr-x | extras/stop-all-gluster-processes.sh | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/extras/stop-all-gluster-processes.sh b/extras/stop-all-gluster-processes.sh new file mode 100755 index 00000000000..710aaf5fd3c --- /dev/null +++ b/extras/stop-all-gluster-processes.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# +# Kill all the processes/services except glusterd +# +# Usage: ./extras/stop-all-gluster-processes.sh [-g] [-h] +# options: +# -g Terminate in graceful mode +# -h Show this message, then exit +# +# eg: +# 1. ./extras/stop-all-gluster-processes.sh +# 2. ./extras/stop-all-gluster-processes.sh -g +# +# By default, this script executes in force mode, i.e. all of brick, gsyncd +# and other glustershd services/processes are killed without checking for +# ongoing tasks such as geo-rep, self-heal, rebalance and etc. which may lead +# to inconsistency after the node is brought back. +# +# On specifying '-g' option this script works in graceful mode, to maintain +# data consistency the script fails with a valid exit code incase if any of +# the gluster processes are busy in doing their jobs. +# +# The author of page [1] proposes user-defined exit codes to the range 64 - 113 +# Find the better explanation behind the choice in the link +# +# The exit code returned by stop-all-gluster-processes.sh: +# 0 No errors/Success +# 64 Rebalance is in progress +# 65 Self-Heal is in progress +# 66 Tier daemon running on this node +# 127 option not found +# +# [1] http://www.tldp.org/LDP/abs/html/exitcodes.html + + +# global +errors=0 + +# find the mounts and return their pids +get_mount_pids() +{ + local opts + local pid + + for opts in $(grep -w fuse.glusterfs /proc/mounts| awk '{print $1":/"$2}'); + do + IFS=' ' read -r -a volinfo <<< $(echo "${opts}" | sed 's/:\// /g') + pid+="$(ps -Ao pid,args | grep -w "volfile-server=${volinfo[0]}" | + grep -w "volfile-id=/${volinfo[1]}" | grep -w "${volinfo[2]}" | + awk '{print $1}') " + done + echo "${pid}" +} + +# handle mount processes i.e. 'glusterfs' +kill_mounts() +{ + local signal=${1} + local pid + + for pid in $(get_mount_pids); + do + echo "sending SIG${signal} to mount process with pid: ${pid}"; + kill -${signal} ${pid}; + done +} + +# handle brick processes and node services +kill_bricks_and_services() +{ + local signal=${1} + local pidfile + local pid + + for pidfile in $(find /var/run/gluster/ -name '*.pid'); + do + local pid=$(cat ${pidfile}); + echo "sending SIG${signal} to pid: ${pid}"; + kill -${signal} ${pid}; + done +} + +# for geo-replication, only 'monitor' has pid file written, other +# processes are not having a pid file, so get it through 'ps' and +# handle these processes +kill_georep_gsync() +{ + local signal=${1} + + # FIXME: add strick/better check + local gsyncpid=$(ps -Ao pid,args | grep gluster | grep gsync | + awk '{print $1}'); + if [ -n "${gsyncpid}" ] + then + echo "sending SIG${signal} to geo-rep gsync process ${gsyncpid}"; + kill -${signal} ${gsyncpid} || errors=$((${errors} + 1)); + fi +} + +# check if all processes are ready to die +check_background_tasks() +{ + volumes=$(gluster vol list) + quit=0 + for volname in ${volumes}; + do + # tiering + if [[ $(gluster volume tier ${volname} status 2> /dev/null | + grep "localhost" | grep -c "in progress") -gt 0 ]] + then + quit=66 + break; + fi + + # rebalance + if [[ $(gluster volume rebalance ${volname} status 2> /dev/null | + grep -c "in progress") -gt 0 ]] + then + quit=64 + break; + fi + + # self heal + if [[ $(gluster volume heal ${volname} info | grep "Number of entries" | + awk '{ sum+=$4} END {print sum}') -gt 0 ]]; + then + quit=65 + break; + fi + + # geo-rep, snapshot and quota doesn't need grace checks, + # as they ensures the consistancy on force kills + done + + echo ${quit} +} + +usage() +{ + cat <<EOM +Usage: $0 [-g] [-h] + options: + -g Terminate in graceful mode + -h Show this message, then exit + +eg: + 1. $0 + 2. $0 -g +EOM +} + +main() +{ + while getopts "gh" opt; do + case $opt in + g) + # graceful mode + quit=$(check_background_tasks) + if [[ ${quit} -ne 0 ]] + then + exit ${quit}; + fi + # else safe to kill + ;; + h) + usage + exit 0; + ;; + *) + usage + exit 127; + ;; + esac + done + # remove all the options that have been parsed by getopts + shift $((OPTIND-1)) + + kill_mounts TERM + kill_georep_gsync TERM + kill_bricks_and_services TERM + + sleep 5; + echo "" + + # still not Terminated? let's pass SIGKILL + kill_mounts KILL + kill_georep_gsync KILL + kill_bricks_and_services KILL + + exit ${errors}; +} + +main "$@" |
