diff options
Diffstat (limited to 'extras')
38 files changed, 3362 insertions, 46 deletions
diff --git a/extras/Makefile.am b/extras/Makefile.am index ff5ca9bdb83..983f014cca6 100644 --- a/extras/Makefile.am +++ b/extras/Makefile.am @@ -11,7 +11,8 @@ EditorModedir = $(docdir) EditorMode_DATA = glusterfs-mode.el glusterfs.vim SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ - $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python + $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ + ganesha confdir = $(sysconfdir)/glusterfs if WITH_SERVER diff --git a/extras/cliutils/README.md b/extras/cliutils/README.md index e11166774e3..309beb1ca25 100644 --- a/extras/cliutils/README.md +++ b/extras/cliutils/README.md @@ -221,7 +221,7 @@ required.(Under `%files` section) - gluster-mountbroker http://review.gluster.org/14544 - gluster-eventsapi http://review.gluster.org/14248 - gluster-georep-sshkey http://review.gluster.org/14732 -- gluster-restapi https://github.com/aravindavk/glusterfs-restapi +- gluster-restapi https://github.com/gluster/restapi ## Limitations/TODOs - Not yet possible to create CLI without any subcommand, For example diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py index 7bfb6c9652a..5a07e2feab1 100755 --- a/extras/distributed-testing/distributed-test-runner.py +++ b/extras/distributed-testing/distributed-test-runner.py @@ -383,14 +383,17 @@ class Handlers: return self.shell.call("make install") == 0 @synchronized - def prove(self, id, test, timeout, valgrind=False, asan_noleaks=True): + def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True): assert id == self.client_id self.shell.cd(self.gluster_root) env = "DEBUG=1 " - if valgrind: + if valgrind == "memcheck" or valgrind == "yes": cmd = "valgrind" cmd += " --tool=memcheck --leak-check=full --track-origins=yes" cmd += " --show-leak-kinds=all -v prove -v" + elif valgrind == "drd": + cmd = "valgrind" + cmd += " --tool=drd -v prove -v" elif asan_noleaks: cmd = "prove -v" env += "ASAN_OPTIONS=detect_leaks=0 " @@ -827,8 +830,9 @@ parser.add_argument("--port", help="server port to listen", type=int, default=DEFAULT_PORT) # test role parser.add_argument("--tester", help="start tester", action="store_true") -parser.add_argument("--valgrind", help="run tests under valgrind", - action="store_true") +parser.add_argument("--valgrind[=memcheck,drd]", + help="run tests with valgrind tool 'memcheck' or 'drd'", + default="no") parser.add_argument("--asan", help="test with asan enabled", action="store_true") parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks", diff --git a/extras/ec-heal-script/README.md b/extras/ec-heal-script/README.md new file mode 100644 index 00000000000..aaefd6681f6 --- /dev/null +++ b/extras/ec-heal-script/README.md @@ -0,0 +1,69 @@ +# gluster-heal-scripts +Scripts to correct extended attributes of fragments of files to make them healble. + +Following are the guidelines/suggestions to use these scripts. + +1 - Passwordless ssh should be setup for all the nodes of the cluster. + +2 - Scripts should be executed from one of these nodes. + +3 - Make sure NO "IO" is going on for the files for which we are running +these two scripts. + +4 - There should be no heal going on for the file for which xattrs are being +set by correct_pending_heals.sh. Disable the self heal while running this script. + +5 - All the bricks of the volume should be UP to identify good and bad fragments +and to decide if an entry is healble or not. + +6 - If correct_pending_heals.sh is stopped in the middle while it was processing +healble entries, it is suggested to re-run gfid_needing_heal_parallel.sh to create +latest list of healble and non healble entries and "potential_heal" "can_not_heal" files. + +7 - Based on the number of entries, these files might take time to get and set the +stats and xattrs of entries. + +8 - A backup of the fragments will be taken on <brick path>/.glusterfs/correct_pending_heals + directory with a file name same as gfid. + +9 - Once the correctness of the file gets verified by user, these backup should be removed. + +10 - Make sure we have enough space on bricks to take these backups. + +11 - At the end this will create two files - + 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed. + 2 - can_not_heal - Contains list of files which can not be healed. + +12 - It is suggested that the integrity of the data of files, which were modified and healed, + should be checked by the user. + + +Usage: + +Following are the sequence of steps to use these scripts - + +1 - ./gfid_needing_heal_parallel.sh <volume name> + + Execute gfid_needing_heal_parallel.sh with volume name to create list of files which could + be healed and can not be healed. It creates "potential_heal" and "can_not_heal" files. + During execution, it also displays the list of files on consol with the verdict. + +2 - ./correct_pending_heals.sh + + Execute correct_pending_heals.sh without any argument. This script processes entries present + in "heal" file. It asks user to enter how many files we want to process in one attempt. + Once the count is provided, this script will fetch the entries one by one from "potential_heal" file and takes necessary action. + If at this point also a file can not be healed, it will be pushed to "can_not_heal" file. + If a file can be healed, this script will modify the xattrs of that file fragments and create an entry in "modified_and_backedup_files" file + +3 - At the end, all the entries of "potential_heal" will be processed and based on the processing only two files will be left. + + 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed. + 2 - can_not_heal - Contains list of files which can not be healed. + +Logs and other files - + +1 - modified_and_backedup_files - It contains all the files which could be healed and the location of backup of each fragments. +2 - can_not_heal - It contains all the files which can not be healed. +3 - potential_heal - List of files which could be healed and should be processed by "correct_pending_heals.sh" +4 - /var/log/glusterfs/ec-heal-script.log - It contains logs of both the files. diff --git a/extras/ec-heal-script/correct_pending_heals.sh b/extras/ec-heal-script/correct_pending_heals.sh new file mode 100755 index 00000000000..c9f19dd7c89 --- /dev/null +++ b/extras/ec-heal-script/correct_pending_heals.sh @@ -0,0 +1,415 @@ +#!/bin/bash +# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +# This script finally resets the xattrs of all the fragments of a file +# which can be healed as per gfid_needing_heal_parallel.sh. +# gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal. +# This script takes potential_heal as input and resets xattrs of all the fragments +# of those files present in this file and which could be healed as per +# trusted.ec.size xattar of the file else it will place the entry in can_not_heal +# file. Those entries which must be healed will be place in must_heal file +# after setting xattrs so that user can track those files. + + +MOD_BACKUP_FILES="modified_and_backedup_files" +CAN_NOT_HEAL="can_not_heal" +LOG_DIR="/var/log/glusterfs" +LOG_FILE="$LOG_DIR/ec-heal-script.log" +LINE_SEP="===================================================" + +function heal_log() +{ + echo "$1" >> "$LOG_FILE" +} + +function desc () +{ + echo "" + echo "This script finally resets the xattrs of all the fragments of a file +which can be healed as per gfid_needing_heal_parallel.sh. +gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal. +This script takes potential_heal as input and resets xattrs of all the fragments +of those files present in this file and which could be healed as per +trusted.ec.size xattar of the file else it will place the entry in can_not_heal +file. Those entries which must be healed will be place in must_heal file +after setting xattrs so that user can track those files." +} + +function _init () +{ + if [ $# -ne 0 ] + then + echo "usage: $0" + desc + exit 2 + fi + + if [ ! -f "potential_heal" ] + then + echo "Nothing to correct. File "potential_heal" does not exist" + echo "" + desc + exit 2 + fi +} + +function total_file_size_in_hex() +{ + local frag_size=$1 + local size=0 + local hex_size="" + + size=$((frag_size * 4)) + hex_size=$(printf '0x%016x' $size) + echo "$hex_size" +} + +function backup_file_fragment() +{ + local file_host=$1 + local file_entry=$2 + local gfid_actual_paths=$3 + local brick_root="" + local temp="" + local backup_dir="" + local cmd="" + local gfid="" + + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + temp=$(echo "$(basename "$BASH_SOURCE")" | cut -d '.' -f 1) + backup_dir=$(echo "${brick_root}/.glusterfs/${temp}") + file_entry=${file_entry//#} + + gfid=$(echo "${gfid_actual_paths}" | cut -d '|' -f 1 | cut -d '/' -f 5) + echo "${file_host}:${backup_dir}/${gfid}" >> "$MOD_BACKUP_FILES" + + cmd="mkdir -p ${backup_dir} && yes | cp -af ${file_entry} ${backup_dir}/${gfid} 2>/dev/null" + ssh -n "${file_host}" "${cmd}" +} + +function set_frag_xattr () +{ + local file_host=$1 + local file_entry=$2 + local good=$3 + local cmd1="" + local cmd2="" + local cmd="" + local version="0x00000000000000010000000000000001" + local dirty="0x00000000000000010000000000000001" + + if [[ $good -eq 0 ]] + then + version="0x00000000000000000000000000000000" + fi + + cmd1=" setfattr -n trusted.ec.version -v ${version} ${file_entry} &&" + cmd2=" setfattr -n trusted.ec.dirty -v ${dirty} ${file_entry}" + cmd=${cmd1}${cmd2} + ssh -n "${file_host}" "${cmd}" +} + +function set_version_dirty_xattr () +{ + local file_paths=$1 + local good=$2 + local gfid_actual_paths=$3 + local file_entry="" + local file_host="" + local bpath="" + + for bpath in ${file_paths//,/ } + do + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + backup_file_fragment "$file_host" "$file_entry" "$gfid_actual_paths" + file_entry=${file_entry//#} + set_frag_xattr "$file_host" "$file_entry" "$good" + done +} + +function match_size_xattr_quorum () +{ + local file_paths=$1 + local file_entry="" + local file_host="" + local cmd="" + local size_xattr="" + local bpath="" + declare -A xattr_count + + for bpath in ${file_paths//,/ } + do + size_xattr="" + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + file_entry=${file_entry//#} + + cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2" + size_xattr=$(ssh -n "${file_host}" "${cmd}") + if [[ -n $size_xattr ]] + then + count=$((xattr_count["$size_xattr"] + 1)) + xattr_count["$size_xattr"]=${count} + if [[ $count -ge 4 ]] + then + echo "${size_xattr}" + return + fi + fi + done + echo "False" +} + +function match_version_xattr () +{ + local file_paths=$1 + local file_entry="" + local file_host="" + local cmd="" + local version="" + local bpath="" + declare -A ver_count + + for bpath in ${file_paths//,/ } + do + version="" + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + file_entry=${file_entry//#} + + cmd="getfattr -n trusted.ec.version -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.version" | cut -d '=' -f 2" + version=$(ssh -n "${file_host}" "${cmd}") + ver_count["$version"]=$((ver_count["$version"] + 1)) + done + for key in "${ver_count[@]}" + do + if [[ $key -ge 4 ]] + then + echo "True" + return + else + echo "False" + return + fi + done +} + +function match_stat_size_with_xattr () +{ + local bpath=$1 + local size=$2 + local file_stat=$3 + local xattr=$4 + local file_entry="" + local file_host="" + local cmd="" + local stat_output="" + local hex_size="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + + file_entry=${file_entry//#} + cmd="stat --format=%F:%B:%s $file_entry 2>/dev/null" + stat_output=$(ssh -n "${file_host}" "${cmd}") + echo "$stat_output" | grep -w "${file_stat}" > /dev/null + + if [[ $? -eq 0 ]] + then + cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2" + hex_size=$(ssh -n "${file_host}" "${cmd}") + + if [[ -z $hex_size || "$hex_size" != "$xattr" ]] + then + echo "False" + return + fi + size_diff=$(printf '%d' $(( size - hex_size ))) + if [[ $size_diff -gt 2047 ]] + then + echo "False" + return + else + echo "True" + return + fi + else + echo "False" + return + fi +} + +function find_file_paths () +{ + local bpath=$1 + local file_entry="" + local file_host="" + local cmd="" + local brick_root="" + local gfid="" + local actual_path="" + local gfid_path="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + + gfid=$(echo "${file_entry}" | grep ".glusterfs") + if [[ -n "$gfid" ]] + then + gfid_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' " + actual_path=$(ssh -n "${file_host}" "${cmd}") + #removing absolute path so that user can refer this from mount point + actual_path=${actual_path#"$brick_root"} + else + actual_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' " + gfid_path=$(ssh -n "${file_host}" "${cmd}") + gfid_path=${gfid_path#"$brick_root"} + fi + + echo "${gfid_path}|${actual_path}" +} + +function log_can_not_heal () +{ + local gfid_actual_paths=$1 + local file_paths=$2 + file_paths=${file_paths//#} + + echo "${LINE_SEP}" >> "$CAN_NOT_HEAL" + echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL" + for bpath in ${file_paths//,/ } + do + echo "${bpath}" >> "$CAN_NOT_HEAL" + done +} + +function check_all_frag_and_set_xattr () +{ + local file_paths=$1 + local total_size=$2 + local file_stat=$3 + local bpath="" + local healthy_count=0 + local match="False" + local matching_bricks="" + local bad_bricks="" + local gfid_actual_paths="" + + for bpath in ${file_paths//,/ } + do + if [[ -n "$gfid_actual_paths" ]] + then + break + fi + gfid_actual_paths=$(find_file_paths "$bpath") + done + + match=$(match_size_xattr_quorum "$file_paths") + +# echo "${match} : $bpath" >> "$MOD_BACKUP_FILES" + + if [[ "$match" != "False" ]] + then + xattr="$match" + for bpath in ${file_paths//,/ } + do + match="False" + match=$(match_stat_size_with_xattr "$bpath" "$total_size" "$file_stat" "$xattr") + if [[ "$match" == "True" ]] + then + matching_bricks="${bpath},${matching_bricks}" + healthy_count=$((healthy_count + 1)) + else + bad_bricks="${bpath},${bad_bricks}" + fi + done + fi + + if [[ $healthy_count -ge 4 ]] + then + match="True" + echo "${LINE_SEP}" >> "$MOD_BACKUP_FILES" + echo "Modified : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$MOD_BACKUP_FILES" + set_version_dirty_xattr "$matching_bricks" 1 "$gfid_actual_paths" + set_version_dirty_xattr "$bad_bricks" 0 "$gfid_actual_paths" + else + log_can_not_heal "$gfid_actual_paths" "${file_paths}" + fi + + echo "$match" +} +function set_xattr() +{ + local count=$1 + local heal_entry="" + local file_stat="" + local frag_size="" + local total_size="" + local file_paths="" + local num="" + local can_heal_count=0 + + heal_log "Started $(basename $BASH_SOURCE) on $(date) " + + while read -r heal_entry + do + heal_log "$LINE_SEP" + heal_log "${heal_entry}" + + file_stat=$(echo "$heal_entry" | cut -d "|" -f 1) + frag_size=$(echo "$file_stat" | rev | cut -d ":" -f 1 | rev) + total_size="$(total_file_size_in_hex "$frag_size")" + file_paths=$(echo "$heal_entry" | cut -d "|" -f 2) + match=$(check_all_frag_and_set_xattr "$file_paths" "$total_size" "$file_stat") + if [[ "$match" == "True" ]] + then + can_heal_count=$((can_heal_count + 1)) + fi + + sed -i '1d' potential_heal + count=$((count - 1)) + if [ $count == 0 ] + then + num=$(cat potential_heal | wc -l) + heal_log "$LINE_SEP" + heal_log "${1} : Processed" + heal_log "${can_heal_count} : Modified to Heal" + heal_log "$((${1} - can_heal_count)) : Moved to can_not_heal." + heal_log "${num} : Pending as Potential Heal" + exit 0 + fi + + done < potential_heal +} + +function main () +{ + local count=0 + + read -p "Number of files to correct: [choose between 1-1000] (0 for All):" count + if [[ $count -lt 0 || $count -gt 1000 ]] + then + echo "Provide correct value:" + exit 2 + fi + + if [[ $count -eq 0 ]] + then + count=$(cat potential_heal | wc -l) + fi + set_xattr "$count" +} + +_init "$@" && main "$@" diff --git a/extras/ec-heal-script/gfid_needing_heal_parallel.sh b/extras/ec-heal-script/gfid_needing_heal_parallel.sh new file mode 100755 index 00000000000..d7f53c97c33 --- /dev/null +++ b/extras/ec-heal-script/gfid_needing_heal_parallel.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +# This script provides a list of all the files which can be healed or not healed. +# It also generates two files, potential_heal and can_not_heal, which contains the information +# of all theose files. These files could be used by correct_pending_heals.sh to correct +# the fragmnets so that files could be healed by shd. + +CAN_NOT_HEAL="can_not_heal" +CAN_HEAL="potential_heal" +LINE_SEP="===================================================" +LOG_DIR="/var/log/glusterfs" +LOG_FILE="$LOG_DIR/ec-heal-script.log" + +function heal_log() +{ + echo "$1" >> "$LOG_FILE" +} + +function _init () +{ + if [ $# -ne 1 ]; then + echo "usage: $0 <gluster volume name>"; + echo "This script provides a list of all the files which can be healed or not healed. +It also generates two files, potential_heal and can_not_heal, which contains the information +of all theose files. These files could be used by correct_pending_heals.sh to correct +the fragmnets so that files could be healed by shd." + exit 2; + fi + + volume=$1; +} + +function get_pending_entries () +{ + local volume_name=$1 + + gluster volume heal "$volume_name" info | grep -v ":/" | grep -v "Number of entries" | grep -v "Status:" | sort -u | sed '/^$/d' +} + +function get_entry_path_on_brick() +{ + local path="$1" + local gfid_string="" + if [[ "${path:0:1}" == "/" ]]; + then + echo "$path" + else + gfid_string="$(echo "$path" | cut -f2 -d':' | cut -f1 -d '>')" + echo "/.glusterfs/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string" + fi +} + +function run_command_on_server() +{ + local subvolume="$1" + local host="$2" + local cmd="$3" + local output + output=$(ssh -n "${host}" "${cmd}") + if [ -n "$output" ] + then + echo "$subvolume:$output" + fi +} + +function get_entry_path_all_bricks () +{ + local entry="$1" + local bricks="$2" + local cmd="" + for brick in $bricks + do + echo "${brick}#$(get_entry_path_on_brick "$entry")" + done | tr '\n' ',' +} + +function get_stat_for_entry_from_all_bricks () +{ + local entry="$1" + local bricks="$2" + local subvolume=0 + local host="" + local bpath="" + local cmd="" + + for brick in $bricks + do + if [[ "$((subvolume % 6))" == "0" ]] + then + subvolume=$((subvolume+1)) + fi + host=$(echo "$brick" | cut -f1 -d':') + bpath=$(echo "$brick" | cut -f2 -d':') + + cmd="stat --format=%F:%B:%s $bpath$(get_entry_path_on_brick "$entry") 2>/dev/null" + run_command_on_server "$subvolume" "${host}" "${cmd}" & + done | sort | uniq -c | sort -rnk1 +} + +function get_bricks_from_volume() +{ + local v=$1 + gluster volume info "$v" | grep -E "^Brick[0-9][0-9]*:" | cut -f2- -d':' +} + +function print_entry_gfid() +{ + local host="$1" + local dirpath="$2" + local entry="$3" + local gfid + gfid="$(ssh -n "${host}" "getfattr -d -m. -e hex $dirpath/$entry 2>/dev/null | grep trusted.gfid=|cut -f2 -d'='")" + echo "$entry" - "$gfid" +} + +function print_brick_directory_info() +{ + local h="$1" + local dirpath="$2" + while read -r e + do + print_entry_gfid "${h}" "${dirpath}" "${e}" + done < <(ssh -n "${h}" "ls $dirpath 2>/dev/null") +} + +function print_directory_info() +{ + local entry="$1" + local bricks="$2" + local h + local b + local gfid + for brick in $bricks; + do + h="$(echo "$brick" | cut -f1 -d':')" + b="$(echo "$brick" | cut -f2 -d':')" + dirpath="$b$(get_entry_path_on_brick "$entry")" + print_brick_directory_info "${h}" "${dirpath}" & + done | sort | uniq -c +} + +function print_entries_needing_heal() +{ + local quorum=0 + local entry="$1" + local bricks="$2" + while read -r line + do + quorum=$(echo "$line" | awk '{print $1}') + if [[ "$quorum" -lt 4 ]] + then + echo "$line - Not in Quorum" + else + echo "$line - In Quorum" + fi + done < <(print_directory_info "$entry" "$bricks") +} + +function find_file_paths () +{ + local bpath=$1 + local file_entry="" + local file_host="" + local cmd="" + local brick_root="" + local gfid="" + local actual_path="" + local gfid_path="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + + gfid=$(echo "${file_entry}" | grep ".glusterfs") + + if [[ -n "$gfid" ]] + then + gfid_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' " + actual_path=$(ssh -n "${file_host}" "${cmd}") + #removing absolute path so that user can refer this from mount point + actual_path=${actual_path#"$brick_root"} + else + actual_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' " + gfid_path=$(ssh -n "${file_host}" "${cmd}") + gfid_path=${gfid_path#"$brick_root"} + fi + + echo "${gfid_path}|${actual_path}" +} + +function log_can_not_heal () +{ + local gfid_actual_paths=$1 + local file_paths=$2 + file_paths=${file_paths//#} + + echo "${LINE_SEP}" >> "$CAN_NOT_HEAL" + echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL" + for bpath in ${file_paths//,/ } + do + echo "${bpath}" >> "$CAN_NOT_HEAL" + done +} + +function main () +{ + local bricks="" + local quorum=0 + local stat_info="" + local file_type="" + local gfid_actual_paths="" + local bpath="" + local file_paths="" + local good=0 + local bad=0 + bricks=$(get_bricks_from_volume "$volume") + rm -f "$CAN_HEAL" + rm -f "$CAN_NOT_HEAL" + mkdir "$LOG_DIR" -p + + heal_log "Started $(basename "$BASH_SOURCE") on $(date) " + while read -r heal_entry + do + heal_log "------------------------------------------------------------------" + heal_log "$heal_entry" + + gfid_actual_paths="" + file_paths="$(get_entry_path_all_bricks "$heal_entry" "$bricks")" + stat_info="$(get_stat_for_entry_from_all_bricks "$heal_entry" "$bricks")" + heal_log "$stat_info" + + quorum=$(echo "$stat_info" | head -1 | awk '{print $1}') + good_stat=$(echo "$stat_info" | head -1 | awk '{print $3}') + file_type="$(echo "$stat_info" | head -1 | cut -f2 -d':')" + if [[ "$file_type" == "directory" ]] + then + print_entries_needing_heal "$heal_entry" "$bricks" + else + if [[ "$quorum" -ge 4 ]] + then + good=$((good + 1)) + heal_log "Verdict: Healable" + + echo "${good_stat}|$file_paths" >> "$CAN_HEAL" + else + bad=$((bad + 1)) + heal_log "Verdict: Not Healable" + for bpath in ${file_paths//,/ } + do + if [[ -z "$gfid_actual_paths" ]] + then + gfid_actual_paths=$(find_file_paths "$bpath") + else + break + fi + done + log_can_not_heal "$gfid_actual_paths" "${file_paths}" + fi + fi + done < <(get_pending_entries "$volume") + heal_log "=========================================" + heal_log "Total number of potential heal : ${good}" + heal_log "Total number of can not heal : ${bad}" + heal_log "=========================================" +} + +_init "$@" && main "$@" diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am new file mode 100644 index 00000000000..9eaa401b6c8 --- /dev/null +++ b/extras/ganesha/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS = scripts config ocf +CLEANFILES = diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am new file mode 100644 index 00000000000..c729273096e --- /dev/null +++ b/extras/ganesha/config/Makefile.am @@ -0,0 +1,4 @@ +EXTRA_DIST= ganesha-ha.conf.sample + +confdir = $(sysconfdir)/ganesha +conf_DATA = ganesha-ha.conf.sample diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample new file mode 100644 index 00000000000..c22892bde56 --- /dev/null +++ b/extras/ganesha/config/ganesha-ha.conf.sample @@ -0,0 +1,19 @@ +# Name of the HA cluster created. +# must be unique within the subnet +HA_NAME="ganesha-ha-360" +# +# N.B. you may use short names or long names; you may not use IP addrs. +# Once you select one, stay with it as it will be mildly unpleasant to +# clean up if you switch later on. Ensure that all names - short and/or +# long - are in DNS or /etc/hosts on all machines in the cluster. +# +# The subset of nodes of the Gluster Trusted Pool that form the ganesha +# HA cluster. Hostname is specified. +HA_CLUSTER_NODES="server1,server2,..." +#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..." +# +# Virtual IPs for each of the nodes specified above. +VIP_server1="10.0.2.1" +VIP_server2="10.0.2.2" +#VIP_server1_lab_redhat_com="10.0.2.1" +#VIP_server2_lab_redhat_com="10.0.2.2" diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am new file mode 100644 index 00000000000..990a609f254 --- /dev/null +++ b/extras/ganesha/ocf/Makefile.am @@ -0,0 +1,11 @@ +EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd + +# The root of the OCF resource agent hierarchy +# Per the OCF standard, it's always "lib", +# not "lib64" (even on 64-bit platforms). +ocfdir = $(prefix)/lib/ocf + +# The provider directory +radir = $(ocfdir)/resource.d/heartbeat + +ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace new file mode 100644 index 00000000000..825f7164597 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_grace @@ -0,0 +1,221 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "$OCF_DEBUG_LIBRARY" ]; then + . $OCF_DEBUG_LIBRARY +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_grace_active_default="grace-active" +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_grace"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="40s" /> +<action name="stop" timeout="40s" /> +<action name="status" timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="5s" /> +<action name="notify" timeout="10s" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_grace_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_grace_start() +{ + local rc=${OCF_ERR_GENERIC} + local host=$(hostname -s) + + ocf_log debug "ganesha_grace_start()" + # give ganesha_mon RA a chance to set the crm_attr first + # I mislike the sleep, but it's not clear that looping + # with a small sleep is necessarily better + # start has a 40sec timeout, so a 5sec sleep here is okay + sleep 5 + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) + if [ $? -ne 0 ]; then + ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # Three possibilities: + # 1. There is no attribute at all and attr_updater returns + # a zero length string. This happens when + # ganesha_mon::monitor hasn't run at least once to set + # the attribute. The assumption here is that the system + # is coming up. We pretend, for now, that the node is + # healthy, to allow the system to continue coming up. + # It will cure itself in a few seconds + # 2. There is an attribute, and it has the value "1"; this + # node is healthy. + # 3. There is an attribute, but it has no value or the value + # "0"; this node is not healthy. + + # case 1 + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} + fi + + # case 2 + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} + fi + + # case 3 + return ${OCF_NOT_RUNNING} +} + +ganesha_grace_stop() +{ + + ocf_log debug "ganesha_grace_stop()" + return ${OCF_SUCCESS} +} + +ganesha_grace_notify() +{ + # since this is a clone RA we should only ever see pre-start + # or post-stop + mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + case "${mode}" in + pre-start | post-stop) + dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} + if [ $? -ne 0 ]; then + ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" + fi + ;; + esac + + return ${OCF_SUCCESS} +} + +ganesha_grace_monitor() +{ + local host=$(hostname -s) + + ocf_log debug "monitor" + + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # if there is no attribute (yet), maybe it's because + # this RA started before ganesha_mon (nfs-mon) has had + # chance to create it. In which case we'll pretend + # everything is okay this time around + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} + fi + + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} + fi + + return ${OCF_NOT_RUNNING} +} + +ganesha_grace_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_grace_validate + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) ganesha_grace_start + ;; +stop) ganesha_grace_stop + ;; +status|monitor) ganesha_grace_monitor + ;; +notify) ganesha_grace_notify + ;; +*) ganesha_grace_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon new file mode 100644 index 00000000000..2b4a9d6da84 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_mon @@ -0,0 +1,234 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +# Defaults +OCF_RESKEY_ganesha_active_default="ganesha-active" +OCF_RESKEY_grace_active_default="grace-active" +OCF_RESKEY_grace_delay_default="5" + +: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} +: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_mon"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ganesha_active"> +<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc> +<content type="string" default="ganesha-active" /> +</parameter> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +<parameter name="grace_delay"> +<longdesc lang="en"> +NFS-Ganesha grace delay. +When changing this, adjust the ganesha_grace RA's monitor interval to match. +</longdesc> +<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc> +<content type="string" default="5" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="40s" /> +<action name="stop" timeout="40s" /> +<action name="status" timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="10s" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_mon_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case ${__OCF_ACTION} in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_mon_start() +{ + ocf_log debug "ganesha_mon_start" + ganesha_mon_monitor + return $OCF_SUCCESS +} + +ganesha_mon_stop() +{ + ocf_log debug "ganesha_mon_stop" + return $OCF_SUCCESS +} + +ganesha_mon_monitor() +{ + local host=$(hostname -s) + local pid_file="/var/run/ganesha.pid" + local rhel6_pid_file="/var/run/ganesha.nfsd.pid" + local proc_pid="/proc/" + + # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid + # RHEL7 systemd does not. Would be nice if all distros used the + # same pid file. + if [ -e ${rhel6_pid_file} ]; then + pid_file=${rhel6_pid_file} + fi + if [ -e ${pid_file} ]; then + proc_pid="${proc_pid}$(cat ${pid_file})" + fi + + if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then + + attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" + fi + + # ganesha_grace (nfs-grace) RA follows grace-active attr + # w/ constraint location + attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" + fi + + # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) + # track grace-active crm_attr (attr != crm_attr) + # we can't just use the attr as there's no way to query + # its value in RHEL6 pacemaker + + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null + if [ $? -ne 0 ]; then + host=$(hostname) + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" + fi + fi + + return ${OCF_SUCCESS} + fi + + # VIP fail-over is triggered by clearing the + # ganesha-active node attribute on this node. + # + # Meanwhile the ganesha_grace notify() runs when its + # nfs-grace resource is disabled on a node; which + # is triggered by clearing the grace-active attribute + # on this node. + # + # We need to allow time for it to run and put + # the remaining ganesha.nfsds into grace before + # initiating the VIP fail-over. + + attrd_updater -D -n ${OCF_RESKEY_grace_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" + fi + + host=$(hostname -s) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + host=$(hostname) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" + fi + fi + + sleep ${OCF_RESKEY_grace_delay} + + attrd_updater -D -n ${OCF_RESKEY_ganesha_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" + fi + + return ${OCF_SUCCESS} +} + +ganesha_mon_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_mon_validate + +# Translate each action into the appropriate function call +case ${__OCF_ACTION} in +start) ganesha_mon_start + ;; +stop) ganesha_mon_stop + ;; +status|monitor) ganesha_mon_monitor + ;; +*) ganesha_mon_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd new file mode 100644 index 00000000000..f91e8b6b8f7 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_nfsd @@ -0,0 +1,167 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" +: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_nfsd"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ha_vol_mnt"> +<longdesc lang="en">HA State Volume Mount Point</longdesc> +<shortdesc lang="en">HA_State Volume Mount Point</shortdesc> +<content type="string" default="" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="5s" /> +<action name="stop" timeout="5s" /> +<action name="status" depth="0" timeout="5s" interval="0" /> +<action name="monitor" depth="0" timeout="5s" interval="0" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_nfsd_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_nfsd_start() +{ + local long_host=$(hostname) + + if [[ -d /var/lib/nfs ]]; then + mv /var/lib/nfs /var/lib/nfs.backup + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" + fi + ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" + fi + fi + + return ${OCF_SUCCESS} +} + +ganesha_nfsd_stop() +{ + + if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then + rm -f /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "rm -f /var/lib/nfs failed" + fi + mv /var/lib/nfs.backup /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" + fi + fi + + return ${OCF_SUCCESS} +} + +ganesha_nfsd_monitor() +{ + # pacemaker checks to see if RA is already running before starting it. + # if we return success, then it's presumed it's already running and + # doesn't need to be started, i.e. invoke the start action. + # return something other than success to make pacemaker invoke the + # start action + if [[ -L /var/lib/nfs ]]; then + return ${OCF_SUCCESS} + fi + return ${OCF_NOT_RUNNING} +} + +ganesha_nfsd_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_nfsd_validate + +# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) ganesha_nfsd_start + ;; +stop) ganesha_nfsd_stop + ;; +status|monitor) ganesha_nfsd_monitor + ;; +*) ganesha_nfsd_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am new file mode 100644 index 00000000000..7e345fd5f19 --- /dev/null +++ b/extras/ganesha/scripts/Makefile.am @@ -0,0 +1,6 @@ +EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \ + ganesha-ha.sh + +scriptsdir = $(libexecdir)/ganesha +scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \ + ganesha-ha.sh diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh new file mode 100755 index 00000000000..3040e8138b0 --- /dev/null +++ b/extras/ganesha/scripts/create-export-ganesha.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +#This script is called by glusterd when the user +#tries to export a volume via NFS-Ganesha. +#An export file specific to a volume +#is created in GANESHA_DIR/exports. + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_DIR=${1%/} +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" +declare -i EXPORT_ID + +function check_cmd_status() +{ + if [ "$1" != "0" ] + then + rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF + exit 1 + fi +} + + +if [ ! -d "$GANESHA_DIR/exports" ]; + then + mkdir $GANESHA_DIR/exports + check_cmd_status `echo $?` +fi + +function write_conf() +{ +echo -e "# WARNING : Using Gluster CLI will overwrite manual +# changes made to this file. To avoid it, edit the +# file and run ganesha-ha.sh --refresh-config." + +echo "EXPORT{" +echo " Export_Id = 2;" +echo " Path = \"/$VOL\";" +echo " FSAL {" +echo " name = "GLUSTER";" +echo " hostname=\"localhost\";" +echo " volume=\"$VOL\";" +echo " }" +echo " Access_type = RW;" +echo " Disable_ACL = true;" +echo ' Squash="No_root_squash";' +echo " Pseudo=\"/$VOL\";" +echo ' Protocols = "3", "4" ;' +echo ' Transports = "UDP","TCP";' +echo ' SecType = "sys";' +echo ' Security_Label = False;' +echo " }" +} +if [ "$OPTION" = "on" ]; +then + if ! (cat $CONF | grep $VOL.conf\"$ ) + then + write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf + echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF + count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` + if [ "$count" = "1" ] ; then + EXPORT_ID=2 + else + EXPORT_ID=`cat $GANESHA_DIR/.export_added` + check_cmd_status `echo $?` + EXPORT_ID=EXPORT_ID+1 + sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ + $GANESHA_DIR/exports/export.$VOL.conf + check_cmd_status `echo $?` + fi + echo $EXPORT_ID > $GANESHA_DIR/.export_added + fi +else + rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF +fi diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh new file mode 100755 index 00000000000..9d613a0e7ad --- /dev/null +++ b/extras/ganesha/scripts/dbus-send.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_DIR=${1%/} +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" + +function check_cmd_status() +{ + if [ "$1" != "0" ] + then + logger "dynamic export failed on node :${hostname -s}" + fi +} + +#This function keeps track of export IDs and increments it with every new entry +function dynamic_export_add() +{ + dbus-send --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \ +string:"EXPORT(Path=/$VOL)" + check_cmd_status `echo $?` +} + +#This function removes an export dynamically(uses the export_id of the export) +function dynamic_export_remove() +{ + # Below bash fetch all the export from ShowExport command and search + # export entry based on path and then get its export entry. + # There are two possiblities for path, either entire volume will be + # exported or subdir. It handles both cases. But it remove only first + # entry from the list based on assumption that entry exported via cli + # has lowest export id value + removed_id=$(dbus-send --type=method_call --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \ + "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \ + | head -1) + + dbus-send --print-reply --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id + check_cmd_status `echo $?` +} + +if [ "$OPTION" = "on" ]; +then + dynamic_export_add $@ +fi + +if [ "$OPTION" = "off" ]; +then + dynamic_export_remove $@ +fi diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh new file mode 100644 index 00000000000..9790a719e10 --- /dev/null +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -0,0 +1,1199 @@ +#!/bin/bash + +# Copyright 2015-2016 Red Hat Inc. All Rights Reserved +# +# Pacemaker+Corosync High Availability for NFS-Ganesha +# +# setup, teardown, add, delete, refresh-config, and status +# +# Each participating node in the cluster is assigned a virtual IP (VIP) +# which fails over to another node when its associated ganesha.nfsd dies +# for any reason. After the VIP is moved to another node all the +# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE. +# +# There are six resource agent types used: ganesha_mon, ganesha_grace, +# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the +# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put +# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start +# and stop the ganesha.nfsd during setup and teardown. IPaddr manages +# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to +# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to +# the new host. + +GANESHA_HA_SH=$(realpath $0) +HA_NUM_SERVERS=0 +HA_SERVERS="" +HA_VOL_NAME="gluster_shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" +HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" +SERVICE_MAN="DISTRO_NOT_FOUND" + +# rhel, fedora id, version +ID="" +VERSION_ID="" + +PCS9OR10_PCS_CNAME_OPTION="" +PCS9OR10_PCS_CLONE_OPTION="clone" +SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" + +# UNBLOCK RA uses shared_storage which may become unavailable +# during any of the nodes reboot. Hence increase timeout value. +PORTBLOCK_UNBLOCK_TIMEOUT="60s" + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_CONF= + +function find_rhel7_conf +{ + while [[ $# > 0 ]] + do + key="$1" + case $key in + -f) + CONFFILE="$2" + break; + ;; + *) + ;; + esac + shift + done +} + +if [ -z ${CONFFILE} ] + then + find_rhel7_conf ${OPTIONS} + +fi + +GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} + +usage() { + + echo "Usage : add|delete|refresh-config|status" + echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ +<NODE-HOSTNAME> <NODE-VIP>" + echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ +<NODE-HOSTNAME>" + echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" + echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" +} + +determine_service_manager () { + + if [ -e "/bin/systemctl" ]; + then + SERVICE_MAN="/bin/systemctl" + elif [ -e "/sbin/invoke-rc.d" ]; + then + SERVICE_MAN="/sbin/invoke-rc.d" + elif [ -e "/sbin/service" ]; + then + SERVICE_MAN="/sbin/service" + fi + if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] + then + logger "Service manager not recognized, exiting" + exit 1 + fi +} + +manage_service () +{ + local action=${1} + local new_node=${2} + local option= + + if [[ "${action}" == "start" ]]; then + option="yes" + else + option="no" + fi + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" + fi +} + + +check_cluster_exists() +{ + local name=${1} + local cluster_name="" + + if [ -e /var/run/corosync.pid ]; then + cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) + if [[ "${cluster_name}X" == "${name}X" ]]; then + logger "$name already exists, exiting" + exit 0 + fi + fi +} + + +determine_servers() +{ + local cmd=${1} + local num_servers=0 + local tmp_ifs=${IFS} + local ha_servers="" + + if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then + ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') + IFS=$' ' + for server in ${ha_servers} ; do + num_servers=$(expr ${num_servers} + 1) + done + IFS=${tmp_ifs} + HA_NUM_SERVERS=${num_servers} + HA_SERVERS="${ha_servers}" + else + IFS=$',' + for server in ${HA_CLUSTER_NODES} ; do + num_servers=$(expr ${num_servers} + 1) + done + IFS=${tmp_ifs} + HA_NUM_SERVERS=${num_servers} + HA_SERVERS="${HA_CLUSTER_NODES//,/ }" + fi +} + +stop_ganesha_all() +{ + local serverlist=${1} + for node in ${serverlist} ; do + manage_service "stop" ${node} + done +} + +setup_cluster() +{ + local name=${1} + local num_servers=${2} + local servers=${3} + local unclean="" + local quorum_policy="stop" + + logger "setting up cluster ${name} with the following ${servers}" + + # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; + fi + + # pcs cluster auth ${servers} + pcs cluster auth + if [ $? -ne 0 ]; then + logger "pcs cluster auth failed" + fi + + pcs cluster start --all + if [ $? -ne 0 ]; then + logger "pcs cluster start failed" + exit 1; + fi + + sleep 1 + # wait for the cluster to elect a DC before querying or writing + # to the CIB. BZ 1334092 + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + while [ $? -ne 0 ]; do + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + done + + unclean=$(pcs status | grep -u "UNCLEAN") + while [[ "${unclean}X" == "UNCLEANX" ]]; do + sleep 1 + unclean=$(pcs status | grep -u "UNCLEAN") + done + sleep 1 + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi + + pcs property set stonith-enabled=false + if [ $? -ne 0 ]; then + logger "warning: pcs property set stonith-enabled=false failed" + fi +} + + +setup_finalize_ha() +{ + local cibfile=${1} + local stopped="" + + stopped=$(pcs status | grep -u "Stopped") + while [[ "${stopped}X" == "StoppedX" ]]; do + sleep 1 + stopped=$(pcs status | grep -u "Stopped") + done +} + + +refresh_config () +{ + local short_host=$(hostname -s) + local VOL=${1} + local HA_CONFDIR=${2} + local short_host=$(hostname -s) + + local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + + + if [ -e ${SECRET_PEM} ]; then + while [[ ${3} ]]; do + current_host=`echo ${3} | cut -d "." -f 1` + if [[ ${short_host} != ${current_host} ]]; then + output=$(ssh -oPasswordAuthentication=no \ +-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ +"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then + echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi + + fi + shift + done + else + echo "Error: refresh-config failed. Passwordless ssh is not enabled." + exit 1 + fi + + # Run the same command on the localhost, + output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:"EXPORT(Export_Id=$export_id)" 2>&1) + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ] ; then + echo "Refresh-config failed on localhost." + else + echo "Success: refresh-config completed." + fi +} + + +teardown_cluster() +{ + local name=${1} + + for server in ${HA_SERVERS} ; do + if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then + logger "info: ${server} is not in config, removing" + + pcs cluster stop ${server} --force + if [ $? -ne 0 ]; then + logger "warning: pcs cluster stop ${server} failed" + fi + + pcs cluster node remove ${server} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node remove ${server} failed" + fi + fi + done + + # BZ 1193433 - pcs doesn't reload cluster.conf after modification + # after teardown completes, a subsequent setup will appear to have + # 'remembered' the deleted node. You can work around this by + # issuing another `pcs cluster node remove $node`, + # `crm_node -f -R $server`, or + # `cibadmin --delete --xml-text '<node id="$server" + # uname="$server"/>' + + pcs cluster stop --all + if [ $? -ne 0 ]; then + logger "warning pcs cluster stop --all failed" + fi + + pcs cluster destroy + if [ $? -ne 0 ]; then + logger "error pcs cluster destroy failed" + exit 1 + fi +} + + +cleanup_ganesha_config () +{ + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf +} + +do_create_virt_ip_constraints() +{ + local cibfile=${1}; shift + local primary=${1}; shift + local weight="1000" + + # first a constraint location rule that says the VIP must be where + # there's a ganesha.nfsd running + pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" + fi + + # then a set of constraint location prefers to set the prefered order + # for where a VIP should move + while [[ ${1} ]]; do + pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" + fi + weight=$(expr ${weight} + 1000) + shift + done + # and finally set the highest preference for the VIP to its home node + # default weight when created is/was 100. + # on Fedora setting appears to be additive, so to get the desired + # value we adjust the weight + # weight=$(expr ${weight} - 100) + pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" + fi +} + + +wrap_create_virt_ip_constraints() +{ + local cibfile=${1}; shift + local primary=${1}; shift + local head="" + local tail="" + + # build a list of peers, e.g. for a four node cluster, for node1, + # the result is "node2 node3 node4"; for node2, "node3 node4 node1" + # and so on. + while [[ ${1} ]]; do + if [[ ${1} == ${primary} ]]; then + shift + while [[ ${1} ]]; do + tail=${tail}" "${1} + shift + done + else + head=${head}" "${1} + fi + shift + done + do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head} +} + + +create_virt_ip_constraints() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS} + shift + done +} + + +setup_create_resources() +{ + local cibfile=$(mktemp -u) + + # fixup /var/lib/nfs + logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" + pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace + # start method. Allow time for ganesha_mon to start and set the + # ganesha-active crm_attribute + sleep 5 + + pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" + fi + + pcs cluster cib ${cibfile} + + while [[ ${1} ]]; do + + # this is variable indirection + # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"' + # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...) + # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1') + # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"') + # after the `eval ${clean_nvs}` there is a variable VIP_host_1 + # with the value '10_7_6_5', and the following \$$ magic to + # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us + # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s + # to give us ipaddr="10.7.6.5". whew! + name="VIP_${1}" + clean_name=${name//[-.]/_} + nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) + clean_nvs=${nvs//[-.]/_} + eval ${clean_nvs} + eval tmp_ipaddr=\$${clean_name} + ipaddr=${tmp_ipaddr//_/.} + + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" + fi + + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + + + shift + done + + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} +} + + +teardown_resources() +{ + # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) + + # restore /var/lib/nfs + logger "notice: pcs resource delete nfs_setup-clone" + pcs resource delete nfs_setup-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs_setup-clone failed" + fi + + # delete -clone resource agents + # in particular delete the ganesha monitor so we don't try to + # trigger anything when we shut down ganesha next. + pcs resource delete nfs-mon-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs-mon-clone failed" + fi + + pcs resource delete nfs-grace-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs-grace-clone failed" + fi + + while [[ ${1} ]]; do + pcs resource delete ${1}-group + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete ${1}-group failed" + fi + shift + done + +} + + +recreate_resources() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + # this is variable indirection + # see the comment on the same a few lines up + name="VIP_${1}" + clean_name=${name//[-.]/_} + nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) + clean_nvs=${nvs//[-.]/_} + eval ${clean_nvs} + eval tmp_ipaddr=\$${clean_name} + ipaddr=${tmp_ipaddr//_/.} + + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" + fi + + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + + shift + done +} + + +addnode_recreate_resources() +{ + local cibfile=${1}; shift + local add_node=${1}; shift + local add_vip=${1}; shift + + recreate_resources ${cibfile} ${HA_SERVERS} + + pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ + --after ${add_node}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" + fi + pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ + ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_unblock failed" + fi +} + + +clear_resources() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + pcs -f ${cibfile} resource delete ${1}-group + if [ $? -ne 0 ]; then + logger "warning: pcs -f ${cibfile} resource delete ${1}-group" + fi + + shift + done +} + + +addnode_create_resources() +{ + local add_node=${1}; shift + local add_vip=${1}; shift + local cibfile=$(mktemp -u) + + # start HA on the new node + pcs cluster start ${add_node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster start ${add_node} failed" + fi + + pcs cluster cib ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib ${cibfile} failed" + fi + + # delete all the -cluster_ip-1 resources, clearing + # their constraints, then create them again so we can + # recompute their constraints + clear_resources ${cibfile} ${HA_SERVERS} + addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} + + HA_SERVERS="${HA_SERVERS} ${add_node}" + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} +} + + +deletenode_delete_resources() +{ + local node=${1}; shift + local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//) + local cibfile=$(mktemp -u) + + pcs cluster cib ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib ${cibfile} failed" + fi + + # delete all the -cluster_ip-1 and -trigger_ip-1 resources, + # clearing their constraints, then create them again so we can + # recompute their constraints + clear_resources ${cibfile} ${HA_SERVERS} + recreate_resources ${cibfile} ${ha_servers} + HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /") + + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} + +} + + +deletenode_update_haconfig() +{ + local name="VIP_${1}" + local clean_name=${name//[-.]/_} + + ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") + sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf +} + + +setup_state_volume() +{ + local mnt=${HA_VOL_MNT} + local longname="" + local shortname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + while [[ ${1} ]]; do + + if [[ ${1} == *${dname} ]]; then + dirname=${1} + else + dirname=${1}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then + mkdir ${mnt}/nfs-ganesha/tickle_dir + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + fi + done + shift + done + +} + + +enable_pacemaker() +{ + while [[ ${1} ]]; do + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" + fi + shift + done +} + + +addnode_state_volume() +{ + local newnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${newnode} == *${dname} ]]; then + dirname=${newnode} + else + dirname=${newnode}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + + for server in ${HA_SERVERS} ; do + + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done + +} + + +delnode_state_volume() +{ + local delnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${delnode} == *${dname} ]]; then + dirname=${delnode} + else + dirname=${delnode}${dname} + fi + + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done +} + + +status() +{ + local scratch=$(mktemp) + local regex_str="^${1}-cluster_ip-1" + local healthy=0 + local index=1 + local nodes + + # change tabs to spaces, strip leading spaces, including any + # new '*' at the beginning of a line introduced in pcs-0.10.x + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift + + # make a regex of the configured nodes + # and initalize the nodes array for later + while [[ ${1} ]]; do + + regex_str="${regex_str}|^${1}-cluster_ip-1" + nodes[${index}]=${1} + ((index++)) + shift + done + + # print the nodes that are expected to be online + grep -E "Online:" ${scratch} + + echo + + # print the VIPs and which node they are on + grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 + + echo + + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + + grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + + grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then + echo "Cluster HA Status: BAD" + elif [ ${healthy} -eq 0 ]; then + echo "Cluster HA Status: HEALTHY" + else + echo "Cluster HA Status: FAILOVER" + fi + + rm -f ${scratch} +} + +create_ganesha_conf_file() +{ + if [[ "$1" == "yes" ]]; + then + if [ -e $GANESHA_CONF ]; + then + rm -rf $GANESHA_CONF + fi + # The symlink /etc/ganesha/ganesha.conf need to be + # created using ganesha conf file mentioned in the + # shared storage. Every node will only have this + # link and actual file will stored in shared storage, + # so that ganesha conf editing of ganesha conf will + # be easy as well as it become more consistent. + + ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF + else + # Restoring previous file + rm -rf $GANESHA_CONF + cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF + fi +} + +set_quorum_policy() +{ + local quorum_policy="stop" + local num_servers=${1} + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi +} + +main() +{ + + local cmd=${1}; shift + if [[ ${cmd} == *help ]]; then + usage + exit 0 + fi + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --on + fi + + local osid="" + + osid=$(grep ^ID= /etc/os-release) + eval $(echo ${osid} | grep -F ID=) + osid=$(grep ^VERSION_ID= /etc/os-release) + eval $(echo ${osid} | grep -F VERSION_ID=) + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" + local vip="" + + # ignore any comment lines + cfgline=$(grep ^HA_NAME= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_NAME=) + cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + + case "${cmd}" in + + setup | --setup) + logger "setting up ${HA_NAME}" + + check_cluster_exists ${HA_NAME} + + determine_servers "setup" + + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly + if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" + fi + fi + + if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager + + setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" + + setup_create_resources ${HA_SERVERS} + + setup_finalize_ha + + setup_state_volume ${HA_SERVERS} + + enable_pacemaker ${HA_SERVERS} + + else + + logger "insufficient servers for HA, aborting" + fi + ;; + + teardown | --teardown) + logger "tearing down ${HA_NAME}" + + determine_servers "teardown" + + teardown_resources ${HA_SERVERS} + + teardown_cluster ${HA_NAME} + + cleanup_ganesha_config ${HA_CONFDIR} + ;; + + cleanup | --cleanup) + cleanup_ganesha_config ${HA_CONFDIR} + ;; + + add | --add) + node=${1}; shift + vip=${1}; shift + + logger "adding ${node} with ${vip} to ${HA_NAME}" + + determine_service_manager + + manage_service "start" ${node} + + determine_servers "add" + + pcs cluster node add ${node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node add ${node} failed" + fi + + addnode_create_resources ${node} ${vip} + # Subsequent add-node recreates resources for all the nodes + # that already exist in the cluster. The nodes are picked up + # from the entries in the ganesha-ha.conf file. Adding the + # newly added node to the file so that the resources specfic + # to this node is correctly recreated in the future. + clean_node=${node//[-.]/_} + echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf + + NEW_NODES="$HA_CLUSTER_NODES,${node}" + + sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ +$HA_CONFDIR/ganesha-ha.conf + + addnode_state_volume ${node} + + # addnode_create_resources() already appended ${node} to + # HA_SERVERS, so only need to increment HA_NUM_SERVERS + # and set quorum policy + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) + set_quorum_policy ${HA_NUM_SERVERS} + ;; + + delete | --delete) + node=${1}; shift + + logger "deleting ${node} from ${HA_NAME}" + + determine_servers "delete" + + deletenode_delete_resources ${node} + + pcs cluster node remove ${node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node remove ${node} failed" + fi + + deletenode_update_haconfig ${node} + + delnode_state_volume ${node} + + determine_service_manager + + manage_service "stop" ${node} + + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) + set_quorum_policy ${HA_NUM_SERVERS} + ;; + + status | --status) + determine_servers "status" + + status ${HA_SERVERS} + ;; + + refresh-config | --refresh-config) + VOL=${1} + + determine_servers "refresh-config" + + refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} + ;; + + setup-ganesha-conf-files | --setup-ganesha-conf-files) + + create_ganesha_conf_file ${1} + ;; + + *) + # setup and teardown are not intended to be used by a + # casual user + usage + logger "Usage: ganesha-ha.sh add|delete|status" + ;; + + esac + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --off + fi +} + +main $* diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py new file mode 100755 index 00000000000..77af014bab9 --- /dev/null +++ b/extras/ganesha/scripts/generate-epoch.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# +# Generates unique epoch value on each gluster node to be used by +# nfs-ganesha service on that node. +# +# Configure 'EPOCH_EXEC' option to this script path in +# '/etc/sysconfig/ganesha' file used by nfs-ganesha service. +# +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid + +import time +import binascii + +# Calculate the now() time into a 64-bit integer value +def epoch_now(): + epoch_time = int(time.mktime(time.localtime())) << 32 + return epoch_time + +# Read glusterd UUID and extract first 32-bit of it +def epoch_uuid(): + file_name = '/var/lib/glusterd/glusterd.info' + + for line in open(file_name): + if "UUID" in line: + glusterd_uuid = line.split('=')[1].strip() + + uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) + + epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000 + return epoch_uuid + +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid +epoch = (epoch_now() | epoch_uuid()) +print((str(epoch))) + +exit(0) diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am index e4603ae80b8..09eff308ac4 100644 --- a/extras/geo-rep/Makefile.am +++ b/extras/geo-rep/Makefile.am @@ -1,4 +1,4 @@ -scriptsdir = $(datadir)/glusterfs/scripts +scriptsdir = $(libexecdir)/glusterfs/scripts scripts_SCRIPTS = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \ slave-upgrade.sh schedule_georep.py diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in index f29ae020b8f..48b2b507060 100644 --- a/extras/geo-rep/schedule_georep.py.in +++ b/extras/geo-rep/schedule_georep.py.in @@ -352,7 +352,7 @@ def get_summary(mastervol, slave_url): def touch_mount_root(mastervol): # Create a Mount and Touch the Mount point root, # Hack to make sure some event available after - # setting Checkpoint. Without this their is a chance of + # setting Checkpoint. Without this there is a chance of # Checkpoint never completes. with glustermount("localhost", mastervol) as mnt: execute(["touch", mnt]) @@ -459,8 +459,8 @@ if __name__ == "__main__": description=__doc__) parser.add_argument("mastervol", help="Master Volume Name") parser.add_argument("slave", - help="SLAVEHOST or root@SLAVEHOST " - "or user@SLAVEHOST", + help="Slave hostname " + "(<username>@SLAVEHOST or SLAVEHOST)", metavar="SLAVE") parser.add_argument("slavevol", help="Slave Volume Name") parser.add_argument("--interval", help="Interval in Seconds. " diff --git a/extras/glusterd.vol.in b/extras/glusterd.vol.in index 6141d8a736e..5d7bad0e4c8 100644 --- a/extras/glusterd.vol.in +++ b/extras/glusterd.vol.in @@ -1,12 +1,11 @@ volume management type mgmt/glusterd option working-directory @GLUSTERD_WORKDIR@ - option transport-type socket,rdma + option transport-type socket option transport.socket.keepalive-time 10 option transport.socket.keepalive-interval 2 option transport.socket.read-fail-log off option transport.socket.listen-port 24007 - option transport.rdma.listen-port 24008 option ping-timeout 0 option event-threads 1 # option lock-timer 180 diff --git a/extras/glusterfs-georep-upgrade.py b/extras/glusterfs-georep-upgrade.py new file mode 100755 index 00000000000..634576058d6 --- /dev/null +++ b/extras/glusterfs-georep-upgrade.py @@ -0,0 +1,77 @@ +#!/usr/bin/python3 +""" + +Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com> +This file is part of GlusterFS. + +This file is licensed to you under your choice of the GNU Lesser +General Public License, version 3 or any later version (LGPLv3 or +later), or the GNU General Public License, version 2 (GPLv2), in all +cases as published by the Free Software Foundation. + +""" + +import argparse +import errno +import os, sys +import shutil +from datetime import datetime + +def find_htime_path(brick_path): + dirs = [] + htime_dir = os.path.join(brick_path, '.glusterfs/changelogs/htime') + for file in os.listdir(htime_dir): + if os.path.isfile(os.path.join(htime_dir,file)) and file.startswith("HTIME"): + dirs.append(os.path.join(htime_dir, file)) + else: + raise FileNotFoundError("%s unavailable" % (os.path.join(htime_dir, file))) + return dirs + +def modify_htime_file(brick_path): + htime_file_path_list = find_htime_path(brick_path) + + for htime_file_path in htime_file_path_list: + changelog_path = os.path.join(brick_path, '.glusterfs/changelogs') + temp_htime_path = os.path.join(changelog_path, 'htime/temp_htime_file') + with open(htime_file_path, 'r') as htime_file, open(temp_htime_path, 'w') as temp_htime_file: + #extract epoch times from htime file + paths = htime_file.read().split("\x00") + + for pth in paths: + epoch_no = pth.split(".")[-1] + changelog = os.path.basename(pth) + #convert epoch time to year, month and day + if epoch_no != '': + date=(datetime.fromtimestamp(float(int(epoch_no))).strftime("%Y/%m/%d")) + #update paths in temp htime file + temp_htime_file.write("%s/%s/%s\x00" % (changelog_path, date, changelog)) + #create directory in the format year/month/days + path = os.path.join(changelog_path, date) + + if changelog.startswith("CHANGELOG."): + try: + os.makedirs(path, mode = 0o600); + except OSError as exc: + if exc.errno == errno.EEXIST: + pass + else: + raise + + #copy existing changelogs to new directory structure, delete old changelog files + shutil.copyfile(pth, os.path.join(path, changelog)) + os.remove(pth) + + #rename temp_htime_file with htime file + os.rename(htime_file_path, os.path.join('%s.bak'%htime_file_path)) + os.rename(temp_htime_path, htime_file_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('brick_path', help="This upgrade script, which is to be run on\ + server side, takes brick path as the argument, \ + updates paths inside htime file and alters the directory structure \ + above the changelog files inorder to support new optimised format \ + of the directory structure as per \ + https://review.gluster.org/#/c/glusterfs/+/23733/") + args = parser.parse_args() + modify_htime_file(args.brick_path) diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate index 75f700e6459..6ba6ef18e9f 100644 --- a/extras/glusterfs-logrotate +++ b/extras/glusterfs-logrotate @@ -45,3 +45,24 @@ compress delaycompress } + +# Rotate snapd log +/var/log/glusterfs/snaps/*/*.log { + sharedscripts + weekly + maxsize 10M + minsize 100k + + # 6 months of logs are good enough + rotate 26 + + missingok + compress + delaycompress + notifempty + postrotate + for pid in `ps -aef | grep glusterfs | egrep "snapd" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript +} diff --git a/extras/group-gluster-block b/extras/group-gluster-block index 56b406e3641..1e398019e6b 100644 --- a/extras/group-gluster-block +++ b/extras/group-gluster-block @@ -5,6 +5,14 @@ performance.stat-prefetch=off performance.open-behind=off performance.readdir-ahead=off performance.strict-o-direct=on +performance.client-io-threads=on +performance.io-thread-count=32 +performance.high-prio-threads=32 +performance.normal-prio-threads=32 +performance.low-prio-threads=32 +performance.least-prio-threads=4 +client.event-threads=8 +server.event-threads=8 network.remote-dio=disable cluster.eager-lock=enable cluster.quorum-type=auto diff --git a/extras/group-virt.example b/extras/group-virt.example index c2ce89d7b9c..cc37c98a25c 100644 --- a/extras/group-virt.example +++ b/extras/group-virt.example @@ -2,7 +2,8 @@ performance.quick-read=off performance.read-ahead=off performance.io-cache=off performance.low-prio-threads=32 -network.remote-dio=enable +network.remote-dio=disable +performance.strict-o-direct=on cluster.eager-lock=enable cluster.quorum-type=auto cluster.server-quorum-type=server @@ -16,3 +17,8 @@ cluster.choose-local=off client.event-threads=4 server.event-threads=4 performance.client-io-threads=on +network.ping-timeout=20 +server.tcp-user-timeout=20 +server.keepalive-time=10 +server.keepalive-interval=2 +server.keepalive-count=5 diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh index 885ed03ad5b..1f2564b44ff 100755 --- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh @@ -79,9 +79,9 @@ done if [ "$option" == "disable" ]; then # Unmount the volume on all the nodes - umount /var/run/gluster/shared_storage - cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + umount /run/gluster/shared_storage + cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi if [ "$is_originator" == 1 ]; then @@ -104,8 +104,15 @@ function check_volume_status() echo $status } -mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ - /var/run/gluster/shared_storage" +key=`echo $5 | cut -d '=' -f 1` +val=`echo $5 | cut -d '=' -f 2` +if [ "$key" == "transport.address-family" ]; then + mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \ + $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage" +else + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ + /run/gluster/shared_storage" +fi if [ "$option" == "enable" ]; then retry=0; @@ -120,10 +127,10 @@ if [ "$option" == "enable" ]; then status=$(check_volume_status) done # Mount the volume on all the nodes - umount /var/run/gluster/shared_storage - mkdir -p /var/run/gluster/shared_storage + umount /run/gluster/shared_storage + mkdir -p /run/gluster/shared_storage $mount_cmd - cp /etc/fstab /var/run/gluster/fstab.tmp - echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + cp /etc/fstab /run/gluster/fstab.tmp + echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am index e32546dc999..792019d3c9f 100644 --- a/extras/hook-scripts/start/post/Makefile.am +++ b/extras/hook-scripts/start/post/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh +EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/ if WITH_SERVER diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh new file mode 100755 index 00000000000..7ad6f23ad06 --- /dev/null +++ b/extras/hook-scripts/start/post/S31ganesha-start.sh @@ -0,0 +1,122 @@ +#!/bin/bash +PROGNAME="Sganesha-start" +OPTSPEC="volname:,gd-workdir:" +VOL= +declare -i EXPORT_ID +ganesha_key="ganesha.enable" +GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" +CONF1="$GANESHA_DIR/ganesha.conf" +GLUSTERD_WORKDIR= + +function parse_args () +{ + ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@) + eval set -- "$ARGS" + + while true; do + case $1 in + --volname) + shift + VOL=$1 + ;; + --gd-workdir) + shift + GLUSTERD_WORKDIR=$1 + ;; + *) + shift + break + ;; + esac + shift + done +} + + + +#This function generates a new export entry as export.volume_name.conf +function write_conf() +{ +echo -e "# WARNING : Using Gluster CLI will overwrite manual +# changes made to this file. To avoid it, edit the +# file, copy it over to all the NFS-Ganesha nodes +# and run ganesha-ha.sh --refresh-config." + +echo "EXPORT{" +echo " Export_Id = 2;" +echo " Path = \"/$VOL\";" +echo " FSAL {" +echo " name = \"GLUSTER\";" +echo " hostname=\"localhost\";" +echo " volume=\"$VOL\";" +echo " }" +echo " Access_type = RW;" +echo " Disable_ACL = true;" +echo " Squash=\"No_root_squash\";" +echo " Pseudo=\"/$VOL\";" +echo " Protocols = \"3\", \"4\" ;" +echo " Transports = \"UDP\",\"TCP\";" +echo " SecType = \"sys\";" +echo "}" +} + +#It adds the export dynamically by sending dbus signals +function export_add() +{ + dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)" + +} + +# based on src/scripts/ganeshactl/Ganesha/export_mgr.py +function is_exported() +{ + local volume="${1}" + + dbus-send --type=method_call --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.ShowExports \ + | grep -w -q "/${volume}" + + return $? +} + +# Check the info file (contains the volume options) to see if Ganesha is +# enabled for this volume. +function ganesha_enabled() +{ + local volume="${1}" + local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info" + local enabled="off" + + enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2) + + [ "${enabled}" == "on" ] + + return $? +} + +parse_args $@ + +if ganesha_enabled ${VOL} && ! is_exported ${VOL} +then + if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ] + then + #Remove export entry from nfs-ganesha.conf + sed -i /$VOL.conf/d $CONF1 + write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf + EXPORT_ID=`cat $GANESHA_DIR/.export_added` + EXPORT_ID=EXPORT_ID+1 + echo $EXPORT_ID > $GANESHA_DIR/.export_added + sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \ + $GANESHA_DIR/exports/export.$VOL.conf + echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1 + else + EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + fi + export_add $VOL +fi + +exit 0 diff --git a/extras/mount-shared-storage.sh b/extras/mount-shared-storage.sh index e99233f7e1e..cc40e13c3e3 100755 --- a/extras/mount-shared-storage.sh +++ b/extras/mount-shared-storage.sh @@ -21,7 +21,7 @@ do continue fi - mount -t glusterfs "${arr[0]}" "${arr[1]}" + mount -t glusterfs -o "${arr[3]}" "${arr[0]}" "${arr[1]}" #wait for few seconds sleep 10 diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in index 46dd20b8ced..76cc649e55f 100755 --- a/extras/ocf/volume.in +++ b/extras/ocf/volume.in @@ -6,6 +6,7 @@ # HA resource # # Authors: Florian Haas (hastexo Professional Services GmbH) +# Jiri Lunacek (Hosting90 Systems s.r.o.) # # License: GNU General Public License (GPL) @@ -54,6 +55,14 @@ must have clone ordering enabled. <shortdesc lang="en">gluster executable</shortdesc> <content type="string" default="$OCF_RESKEY_binary_default"/> </parameter> + <parameter name="peer_map"> + <longdesc lang="en"> + Mapping of hostname - peer name in the gluster cluster + in format hostname1:peername1,hostname2:peername2,... + </longdesc> + <shortdesc lang="en">gluster peer map</shortdesc> + <content type="string" default=""/> + </parameter> </parameters> <actions> <action name="start" timeout="20" /> @@ -68,6 +77,10 @@ EOF } +if [ -n "${OCF_RESKEY_peer_map}" ]; then + SHORTHOSTNAME=`echo "${OCF_RESKEY_peer_map}" | egrep -o "$SHORTHOSTNAME\:[^,]+" | awk -F: '{print $2}'` +fi + volume_getdir() { local voldir voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}" @@ -108,6 +121,10 @@ volume_getpids() { volpid_dir=`volume_getpid_dir` bricks=`volume_getbricks` + + if [ -z "$bricks" ]; then + return 1 + fi for brick in ${bricks}; do pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid" @@ -214,6 +231,11 @@ volume_validate_all() { # Test for required binaries check_binary $OCF_RESKEY_binary + + if [ -z "$SHORTHOSTNAME" ]; then + ocf_log err 'Unable to get host in node map' + return $OCF_ERR_CONFIGURED + fi return $OCF_SUCCESS } diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py index f03895de114..e62f7fc52a3 100755 --- a/extras/quota/quota_fsck.py +++ b/extras/quota/quota_fsck.py @@ -52,17 +52,17 @@ epilog_msg=''' def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): if log_type == QUOTA_VERBOSE: - print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf}) + print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf)) elif log_type == QUOTA_META_ABSENT: - print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict}) + print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict)) elif log_type == QUOTA_SIZE_MISMATCH: print("mismatch") if dir_size is not None: - print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'], - dir_size}) + print('%24s %60s %12s %12s' % ("Size Mismatch", path, + xattr_dict, dir_size)) else: - print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'], - stbuf.st_size}) + print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict, + stbuf.st_size)) def size_differs_lot(s1, s2): ''' @@ -156,12 +156,10 @@ def get_quota_xattr_brick(dpath): xattr_dict = {} xattr_dict['parents'] = {} - for xattr in pairs: + for xattr in pairs[1:]: + xattr = xattr.decode("utf-8") xattr_key = xattr.split("=")[0] - if re.search("# file:", xattr_key): - # skip the file comment - continue - elif xattr_key is "": + if xattr_key == "": # skip any empty lines continue elif not re.search("quota", xattr_key): diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py index 1127be0e976..0e4df77d481 100755 --- a/extras/snap_scheduler/gcron.py +++ b/extras/snap_scheduler/gcron.py @@ -19,10 +19,10 @@ import logging.handlers import fcntl -GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" +GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" -LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" +LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" log = logging.getLogger("gcron-logger") start_time = 0.0 @@ -38,7 +38,8 @@ def initLogger(script_name): sh.setFormatter(formatter) process = subprocess.Popen(["gluster", "--print-logdir"], - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, + universal_newlines=True) out, err = process.communicate() if process.returncode == 0: logfile = os.path.join(out.strip(), script_name[:-3]+".log") diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py index a66c5e3d5ce..e8fcc449a9b 100755 --- a/extras/snap_scheduler/snap_scheduler.py +++ b/extras/snap_scheduler/snap_scheduler.py @@ -67,7 +67,7 @@ except ImportError: SCRIPT_NAME = "snap_scheduler" scheduler_enabled = False log = logging.getLogger(SCRIPT_NAME) -SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" +SHARED_STORAGE_DIR="/run/gluster/shared_storage" GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" @@ -149,7 +149,7 @@ def initLogger(): sh.setFormatter(formatter) process = subprocess.Popen(["gluster", "--print-logdir"], - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, universal_newlines=True) logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") fh = logging.FileHandler(logfile) diff --git a/extras/statedumpparse.rb b/extras/statedumpparse.rb new file mode 100755 index 00000000000..1aff43377db --- /dev/null +++ b/extras/statedumpparse.rb @@ -0,0 +1,208 @@ +#!/usr/bin/env ruby + +require 'time' +require 'optparse' + +unless Array.instance_methods.include? :to_h + class Array + def to_h + h = {} + each { |k,v| h[k]=v } + h + end + end +end + +# statedump.c:gf_proc_dump_mempool_info uses a five-dash record separator, +# client.c:client_fd_lk_ctx_dump uses a six-dash record separator. +ARRSEP = /^(-{5,6}=-{5,6})?$/ +HEAD = /^\[(.*)\]$/ +INPUT_FORMATS = %w[statedump json] + +format = 'json' +input_format = 'statedump' +tz = '+0000' +memstat_select,memstat_reject = //,/\Z./ +OptionParser.new do |op| + op.banner << " [<] <STATEDUMP>" + op.on("-f", "--format=F", "json/yaml/memstat(-[plain|human|json])") { |s| format = s } + op.on("--input-format=F", INPUT_FORMATS.join(?/)) { |s| input_format = s } + op.on("--timezone=T", + "time zone to apply to zoneless timestamps [default UTC]") { |s| tz = s } + op.on("--memstat-select=RX", "memstat: select memory types matching RX") { |s| + memstat_select = Regexp.new s + } + op.on("--memstat-reject=RX", "memstat: reject memory types matching RX") { |s| + memstat_reject = Regexp.new s + } +end.parse! + + +if format =~ /\Amemstat(?:-(.*))?/ + memstat_type = $1 || 'plain' + unless %w[plain human json].include? memstat_type + raise "unknown memstat type #{memstat_type.dump}" + end + format = 'memstat' +end + +repr, logsep = case format +when 'yaml' + require 'yaml' + + [proc { |e| e.to_yaml }, "\n"] +when 'json', 'memstat' + require 'json' + + [proc { |e| e.to_json }, " "] +else + raise "unkonwn format '#{format}'" +end +formatter = proc { |e| puts repr.call(e) } + +INPUT_FORMATS.include? input_format or raise "unkwown input format '#{input_format}'" + +dumpinfo = {} + +# parse a statedump entry +elem_cbk = proc { |s,&cbk| + arraylike = false + s.grep(/\S/).empty? and next + head = nil + while s.last =~ /^\s*$/ + s.pop + end + body = catch { |misc2| + s[0] =~ HEAD ? (head = $1) : (throw misc2) + body = [[]] + s[1..-1].each { |l| + if l =~ ARRSEP + arraylike = true + body << [] + next + end + body.last << l + } + + body.reject(&:empty?).map { |e| + ea = e.map { |l| + k,v = l.split("=",2) + m = /\A(0|-?[1-9]\d*)(\.\d+)?\Z/.match v + [k, m ? (m[2] ? Float(v) : Integer(v)) : v] + } + begin + ea.to_h + rescue + throw misc2 + end + } + } + + if body + cbk.call [head, arraylike ? body : (body.empty? ? {} : body[0])] + else + STDERR.puts ["WARNING: failed to parse record:", repr.call(s)].join(logsep) + end +} + +# aggregator routine +aggr = case format +when 'memstat' + meminfo = {} + # commit memory-related entries to meminfo + proc { |k,r| + case k + when /memusage/ + (meminfo["GF_MALLOC"]||={})[k] ||= r["size"] if k =~ memstat_select and k !~ memstat_reject + when "mempool" + r.each {|e| + kk = "mempool:#{e['pool-name']}" + (meminfo["mempool"]||={})[kk] ||= e["size"] if kk =~ memstat_select and kk !~ memstat_reject + } + end + } +else + # just format data, don't actually aggregate anything + proc { |pair| formatter.call pair } +end + +# processing the data +case input_format +when 'statedump' + acc = [] + $<.each { |l| + l = l.strip + if l =~ /^(DUMP-(?:START|END)-TIME):\s+(.*)/ + dumpinfo["_meta"]||={} + (dumpinfo["_meta"]["date"]||={})[$1] = Time.parse([$2, tz].join " ") + next + end + + if l =~ HEAD + elem_cbk.call(acc, &aggr) + acc = [l] + next + end + + acc << l + } + elem_cbk.call(acc, &aggr) +when 'json' + $<.each { |l| + r = JSON.load l + case r + when Array + aggr[r] + when Hash + dumpinfo.merge! r + end + } +end + +# final actions: output aggregated data +case format +when 'memstat' + ma = meminfo.values.map(&:to_a).inject(:+) + totals = meminfo.map { |coll,h| [coll, h.values.inject(:+)] }.to_h + tt = ma.transpose[1].inject(:+) + + summary_sep,showm = case memstat_type + when 'json' + ["", proc { |k,v| puts({type: k, value: v}.to_json) }] + when 'plain', 'human' + # human-friendly number representation + hr = proc { |n| + qa = %w[B kB MB GB] + q = ((1...qa.size).find {|i| n < (1 << i*10)} || qa.size) - 1 + "%.2f%s" % [n.to_f / (1 << q*10), qa[q]] + } + + templ = "%{val} %{key}" + tft = proc { |t| t } + nft = if memstat_type == 'human' + nw = [ma.transpose[1], totals.values, tt].flatten.map{|n| hr[n].size}.max + proc { |n| + hn = hr[n] + " " * (nw - hn.size) + hn + } + else + nw = tt.to_s.size + proc { |n| "%#{nw}d" % n } + end + ## Alternative template, key first: + # templ = "%{key} %{val}" + # tw = ma.transpose[0].map(&:size).max + # tft = proc { |t| t + " " * [tw - t.size, 0].max } + # nft = (memstat_type == 'human') ? hr : proc { |n| n } + ["\n", proc { |k,v| puts templ % {key: tft[k], val: nft[v]} }] + else + raise 'this should be impossible' + end + + ma.sort_by { |k,v| v }.each(&showm) + print summary_sep + totals.each { |coll,t| showm.call "Total #{coll}", t } + showm.call "TOTAL", tt +else + formatter.call dumpinfo +end diff --git a/extras/systemd/gluster-ta-volume.service.in b/extras/systemd/gluster-ta-volume.service.in index 452c01c419f..2802bca05bf 100644 --- a/extras/systemd/gluster-ta-volume.service.in +++ b/extras/systemd/gluster-ta-volume.service.in @@ -4,7 +4,7 @@ After=network.target [Service] Environment="LOG_LEVEL=WARNING" -ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 +ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 -LWARNING Restart=always KillMode=process SuccessExitStatus=15 diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in index 89ef402ac83..abb0d82911f 100644 --- a/extras/systemd/glusterd.service.in +++ b/extras/systemd/glusterd.service.in @@ -1,6 +1,8 @@ [Unit] Description=GlusterFS, a clustered file-system server Documentation=man:glusterd(8) +StartLimitBurst=6 +StartLimitIntervalSec=3600 Requires=@RPCBIND_SERVICE@ After=network.target @RPCBIND_SERVICE@ Before=network-online.target @@ -10,10 +12,15 @@ Type=forking PIDFile=@localstatedir@/run/glusterd.pid LimitNOFILE=65536 Environment="LOG_LEVEL=INFO" -EnvironmentFile=-@sysconfdir@/sysconfig/glusterd +EnvironmentFile=-@SYSCONF_DIR@/sysconfig/glusterd ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS KillMode=process +TimeoutSec=300 SuccessExitStatus=15 +Restart=on-abnormal +RestartSec=60 +StartLimitBurst=6 +StartLimitInterval=3600 [Install] WantedBy=multi-user.target diff --git a/extras/thin-arbiter/thin-arbiter.vol b/extras/thin-arbiter/thin-arbiter.vol index 244a4caf485..c76babc7b3c 100644 --- a/extras/thin-arbiter/thin-arbiter.vol +++ b/extras/thin-arbiter/thin-arbiter.vol @@ -33,11 +33,10 @@ volume ta-index subvolumes ta-io-threads end-volume -volume ta-io-stats +volume /mnt/thin-arbiter type debug/io-stats option count-fop-hits off option latency-measurement off - option log-level WARNING option unique-id /mnt/thin-arbiter subvolumes ta-index end-volume @@ -54,5 +53,5 @@ volume ta-server option auth-path /mnt/thin-arbiter option transport.address-family inet option transport-type tcp - subvolumes ta-io-stats + subvolumes /mnt/thin-arbiter end-volume diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map index 315355b08b8..7cd2bbd605b 100644 --- a/extras/who-wrote-glusterfs/gitdm.domain-map +++ b/extras/who-wrote-glusterfs/gitdm.domain-map @@ -4,6 +4,7 @@ active.by ActiveCloud appeartv.com Appear TV cern.ch CERN +cmss.chinamobile.com China Mobile(Suzhou) Software Technology datalab.es DataLab S.L. fb.com Facebook fedoraproject.org Fedora Project |
