diff options
Diffstat (limited to 'extras')
71 files changed, 4102 insertions, 286 deletions
diff --git a/extras/Makefile.am b/extras/Makefile.am index a1c657659f3..983f014cca6 100644 --- a/extras/Makefile.am +++ b/extras/Makefile.am @@ -11,23 +11,25 @@ EditorModedir = $(docdir) EditorMode_DATA = glusterfs-mode.el glusterfs.vim SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \ - $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils + $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \ + ganesha confdir = $(sysconfdir)/glusterfs if WITH_SERVER conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \ logger.conf.example glusterfs-georep-logrotate group-virt.example \ group-metadata-cache group-gluster-block group-nl-cache \ - group-db-workload + group-db-workload group-distributed-virt group-samba endif voldir = $(sysconfdir)/glusterfs +vol_DATA = thin-arbiter/thin-arbiter.vol if WITH_SERVER -vol_DATA = glusterd.vol +vol_DATA += glusterd.vol endif scriptsdir = $(datadir)/glusterfs/scripts -scripts_SCRIPTS = +scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh if WITH_SERVER scripts_SCRIPTS += post-upgrade-script-for-quota.sh \ pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh @@ -40,14 +42,15 @@ endif EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \ logger.conf.example glusterfs-georep-logrotate group-virt.example \ group-metadata-cache group-gluster-block group-nl-cache \ - group-db-workload specgen.scm glusterfs-mode.el glusterfs.vim \ + group-db-workload group-samba specgen.scm glusterfs-mode.el glusterfs.vim \ migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \ backend-cleanup.sh disk_usage_sync.sh clear_xattrs.sh \ glusterd-sysconfig glusterd.vol post-upgrade-script-for-quota.sh \ pre-upgrade-script-for-quota.sh command-completion/gluster.bash \ command-completion/Makefile command-completion/README \ stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \ - control-cpu-load.sh control-mem.sh + control-cpu-load.sh control-mem.sh group-distributed-virt \ + thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh if WITH_SERVER install-data-local: @@ -67,4 +70,8 @@ install-data-local: $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/nl-cache $(INSTALL_DATA) $(top_srcdir)/extras/group-db-workload \ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload + $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \ + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt + $(INSTALL_DATA) $(top_srcdir)/extras/group-samba \ + $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/samba endif diff --git a/extras/cliutils/README.md b/extras/cliutils/README.md index e11166774e3..309beb1ca25 100644 --- a/extras/cliutils/README.md +++ b/extras/cliutils/README.md @@ -221,7 +221,7 @@ required.(Under `%files` section) - gluster-mountbroker http://review.gluster.org/14544 - gluster-eventsapi http://review.gluster.org/14248 - gluster-georep-sshkey http://review.gluster.org/14732 -- gluster-restapi https://github.com/aravindavk/glusterfs-restapi +- gluster-restapi https://github.com/gluster/restapi ## Limitations/TODOs - Not yet possible to create CLI without any subcommand, For example diff --git a/extras/cliutils/cliutils.py b/extras/cliutils/cliutils.py index 0095586827d..55fbaf56704 100644 --- a/extras/cliutils/cliutils.py +++ b/extras/cliutils/cliutils.py @@ -20,7 +20,14 @@ _common_args_func = lambda p: True class GlusterCmdException(Exception): - pass + def __init__(self, message): + self.message = message + try: + # Python 3 + super().__init__(message) + except TypeError: + # Python 2 + super(GlusterCmdException, self).__init__(message) def get_node_uuid(): diff --git a/extras/collect-system-stats.sh b/extras/collect-system-stats.sh new file mode 100755 index 00000000000..865e70bbc11 --- /dev/null +++ b/extras/collect-system-stats.sh @@ -0,0 +1,52 @@ +#!/bin/bash +################################################################################ +# Usage: collect-system-stats.sh <delay-in-seconds> +# This script starts sar/top/iostat/vmstat processes which collect system stats +# with the interval <delay-in-seconds> given as argument to the script. When +# the script is stopped either by entering any input or Ctrl+C the list of +# files where output is captured will be printed on the screen which can be +# observed to find any problems/bottlenecks. +############################################################################### + +function stop_processes { + echo "Stopping the monitoring processes" + echo "sar pid:$sar_pid", "top pid: $top_pid", "iostat pid: $iostat_pid", "vmstat pid: $vmstat_pid" + kill "$sar_pid" "$top_pid" "$iostat_pid" "$vmstat_pid" + echo "Files created: ${timestamp}-network.out, ${timestamp}-top.out, ${timestamp}-iostat.out, ${timestamp}-vmstat.out" +} + +function check_dependent_commands_exist() +{ + declare -a arr=("sar" "top" "iostat" "vmstat") + for i in "${arr[@]}" + do + if ! command -v "$i" > /dev/null 2>&1 + then + echo "ERROR: '$i' command is not found" + exit 1 + fi + done + +} + +case "$1" in + ''|*[!0-9]*) echo "Usage: $0 <delay-between-successive-metrics-collection-in-seconds>"; exit 1 ;; + *) interval="$1" ;; +esac + +timestamp=$(date +"%s") + +check_dependent_commands_exist +sar -n DEV "$interval" > "${timestamp}"-network.out & +sar_pid="$!" +top -bHd "$interval" > "${timestamp}"-top.out & +top_pid="$!" +iostat -Ntkdx "$interval" > "${timestamp}"-iostat.out & +iostat_pid="$!" +vmstat -t "$interval" > "${timestamp}"-vmstat.out & +vmstat_pid="$!" +echo "Started sar, vmstat, iostat, top for collecting stats" + + +trap stop_processes EXIT +read -r -p "Press anything and ENTER to exit"; diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh index b739c821055..52dcf62fd9f 100755 --- a/extras/control-cpu-load.sh +++ b/extras/control-cpu-load.sh @@ -104,7 +104,7 @@ echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup." echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then - for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; + for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; do echo ${thid} > ${LOC}/${cgroup_name}/tasks ; done diff --git a/extras/control-mem.sh b/extras/control-mem.sh index 38aa2a08748..91b36f8107a 100755 --- a/extras/control-mem.sh +++ b/extras/control-mem.sh @@ -116,7 +116,7 @@ else fi if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then - for thid in `ps -T -p ${daemon_pid} | grep gluster | awk -F " " '{print $2}'`; + for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`; do echo ${thid} > ${LOC}/${cgroup_name}/tasks ; done diff --git a/extras/create_new_xlator/generate_xlator.py b/extras/create_new_xlator/generate_xlator.py index 2c554c991c9..983868c04db 100755 --- a/extras/create_new_xlator/generate_xlator.py +++ b/extras/create_new_xlator/generate_xlator.py @@ -113,7 +113,7 @@ def gen_xlator(): xlator_methods = fragments["XLATOR_METHODS"].replace("@XL_NAME@", xl_name) xlator_methods = xlator_methods.replace("@FOP_PREFIX@", fop_prefix) - print(xlator_methods, file=xl) + print(xlator_methods, file=xl) xl.close() @@ -169,7 +169,7 @@ def load_fragments (): cur_value = "" result = {} basepath = os.path.abspath(os.path.dirname(__file__)) - fragpath = basepath + "/new-xlator-tmpl.c" + fragpath = basepath + "/new-xlator.c.tmpl" for line in open(fragpath, "r").readlines(): m = pragma_re.search(line) if m: diff --git a/extras/create_new_xlator/new-xlator-tmpl.c b/extras/create_new_xlator/new-xlator.c.tmpl index caa10b374d1..fe9735bfcf1 100644 --- a/extras/create_new_xlator/new-xlator-tmpl.c +++ b/extras/create_new_xlator/new-xlator.c.tmpl @@ -1,32 +1,30 @@ #pragma fragment CBK_TEMPLATE -int32_t @FOP_PREFIX @_ @NAME - @_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, @UNWIND_PARAMS @) +int32_t @FOP_PREFIX@_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, @UNWIND_PARAMS@) { - STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @UNWIND_ARGS @); + STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @UNWIND_ARGS@); return 0; } #pragma fragment COMMENT If you are generating the leaf xlators, remove the STACK_WIND and replace the - @ERROR_ARGS @to @UNWIND_ARGS @ if necessary + @ERROR_ARGS@ to @UNWIND_ARGS@ if necessary #pragma fragment FOP_TEMPLATE - int32_t @FOP_PREFIX @_ @NAME - @(call_frame_t *frame, xlator_t *this, @WIND_PARAMS @) + int32_t @FOP_PREFIX@_@NAME@(call_frame_t *frame, xlator_t *this, @WIND_PARAMS@) { - STACK_WIND(frame, @FOP_PREFIX @_ @NAME @_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->@NAME @, @WIND_ARGS @); + STACK_WIND(frame, @FOP_PREFIX@_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @WIND_ARGS@); return 0; err: - STACK_UNWIND_STRICT(@NAME @, frame, -1, errno, @ERROR_ARGS @); + STACK_UNWIND_STRICT(@NAME@, frame, -1, errno, @ERROR_ARGS@); return 0; } #pragma fragment FUNC_TEMPLATE -@RET_TYPE @ @FOP_PREFIX @_ @NAME @(@FUNC_PARAMS @) +@RET_TYPE@ @FOP_PREFIX@_@NAME@(@FUNC_PARAMS@) { - return @RET_VAR @; + return @RET_VAR@; } #pragma fragment CP @@ -45,40 +43,40 @@ err: #pragma fragment XLATOR_METHODS -static int32_t @FOP_PREFIX @_init(xlator_t *this) +static int32_t @FOP_PREFIX@_init(xlator_t *this) { return 0; } -static void @FOP_PREFIX @_fini(xlator_t *this) +static void @FOP_PREFIX@_fini(xlator_t *this) { return; } -static int32_t @FOP_PREFIX @_reconfigure(xlator_t *this, dict_t *dict) +static int32_t @FOP_PREFIX@_reconfigure(xlator_t *this, dict_t *dict) { return 0; } -static int @FOP_PREFIX @_notify(xlator_t *this, int event, void *data, ...) +static int @FOP_PREFIX@_notify(xlator_t *this, int event, void *data, ...) { return default_notify(this, event, data); } -static int32_t @FOP_PREFIX @_mem_acct_init(xlator_t *this) +static int32_t @FOP_PREFIX@_mem_acct_init(xlator_t *this) { int ret = -1; - ret = xlator_mem_acct_init(this, gf_ @FOP_PREFIX @_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_@FOP_PREFIX@_mt_end + 1); return ret; } -static int32_t @FOP_PREFIX @_dump_metrics(xlator_t *this, int fd) +static int32_t @FOP_PREFIX@_dump_metrics(xlator_t *this, int fd) { return 0; } -struct volume_options @FOP_PREFIX @_options[] = { +struct volume_options @FOP_PREFIX@_options[] = { /*{ .key = {""}, .type = GF_OPTION_TYPE_BOOL, .default_value = "", @@ -93,50 +91,50 @@ struct volume_options @FOP_PREFIX @_options[] = { }; xlator_api_t xlator_api = { - .init = @FOP_PREFIX @_init, - .fini = @FOP_PREFIX @_fini, - .notify = @FOP_PREFIX @_notify, - .reconfigure = @FOP_PREFIX @_reconfigure, - .mem_acct_init = @FOP_PREFIX @_mem_acct_init, - .dump_metrics = @FOP_PREFIX @_dump_metrics, + .init = @FOP_PREFIX@_init, + .fini = @FOP_PREFIX@_fini, + .notify = @FOP_PREFIX@_notify, + .reconfigure = @FOP_PREFIX@_reconfigure, + .mem_acct_init = @FOP_PREFIX@_mem_acct_init, + .dump_metrics = @FOP_PREFIX@_dump_metrics, .op_version = {GD_OP_VERSION_}, - .dumpops = &@FOP_PREFIX @_dumpops, - .fops = &@FOP_PREFIX @_fops, + .dumpops = &@FOP_PREFIX@_dumpops, + .fops = &@FOP_PREFIX@_fops, .cbks = &@FOP_PREFIX @_cbks, - .options = @FOP_PREFIX @_options, + .options = @FOP_PREFIX@_options, .identifier = "@XL_NAME@", .category = GF_EXPERIMENTAL, }; #pragma fragment HEADER_FMT -#ifndef __ @HFL_NAME @_H__ -#define __ @HFL_NAME @_H__ +#ifndef __ @HFL_NAME@_H__ +#define __ @HFL_NAME@_H__ #include "@XL_NAME@-mem-types.h" #include "@XL_NAME@-messages.h" -#include "glusterfs.h" -#include "xlator.h" -#include "defaults.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> #endif /* __@HFL_NAME@_H__ */ #pragma fragment MEM_HEADER_FMT -#ifndef __ @HFL_NAME @_H__ -#define __ @HFL_NAME @_H__ +#ifndef __ @HFL_NAME@_H__ +#define __ @HFL_NAME@_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_mdc_mem_types_ { - gf_ @FOP_PREFIX @_mt_ = gf_common_mt_end + 1, - gf_ @FOP_PREFIX @_mt_end + gf_@FOP_PREFIX@_mt_ = gf_common_mt_end + 1, + gf_@FOP_PREFIX@_mt_end }; #endif /* __@HFL_NAME@_H__ */ #pragma fragment MSG_HEADER_FMT -#ifndef __ @HFL_NAME @_H__ -#define __ @HFL_NAME @_H__ +#ifndef __@HFL_NAME@_H__ +#define __@HFL_NAME@_H__ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * @@ -148,6 +146,6 @@ enum gf_mdc_mem_types_ { * glfs-message-id.h. */ -GLFS_MSGID(@FOP_PREFIX @, @FOP_PREFIX @_MSG_NO_MEMORY); +GLFS_MSGID(@FOP_PREFIX@, @FOP_PREFIX@_MSG_NO_MEMORY); #endif /* __@HFL_NAME@_H__ */ diff --git a/extras/devel-tools/print-backtrace.sh b/extras/devel-tools/print-backtrace.sh index 72b93c1353f..33fbae288bc 100755 --- a/extras/devel-tools/print-backtrace.sh +++ b/extras/devel-tools/print-backtrace.sh @@ -40,9 +40,11 @@ if [ ! $debuginfo_rpm ] || [ ! $backtrace_file ]; then exit 1 fi -if [ $debuginfo_rpm != "none" ] && [ ! -f $debuginfo_rpm ]; then - echo "no such rpm file: $debuginfo_rpm" - exit 1 +if [ $debuginfo_rpm != "none" ]; then + if [ ! -f $debuginfo_rpm ]; then + echo "no such rpm file: $debuginfo_rpm" + exit 1 + fi fi if [ ! -f $backtrace_file ]; then @@ -50,9 +52,11 @@ if [ ! -f $backtrace_file ]; then exit 1 fi -if ! file $debuginfo_rpm | grep RPM >/dev/null 2>&1 ; then - echo "file does not look like an rpm: $debuginfo_rpm" - exit 1 +if [ "$debuginfo_rpm" != "none" ]; then + if ! file $debuginfo_rpm | grep RPM >/dev/null 2>&1 ; then + echo "file does not look like an rpm: $debuginfo_rpm" + exit 1 + fi fi cpio_version=$(cpio --version|grep cpio|cut -f 2 -d ')'|sed -e 's/^[[:space:]]*//') @@ -92,15 +96,16 @@ else fi # NOTE: backtrace file should contain only the lines which need to be resolved -for bt in $(grep glusterfs $backtrace_file) +for bt in $(cat $backtrace_file) do libname=$(echo $bt | cut -f 1 -d '(') addr=$(echo $bt | cut -f 2 -d '(' | cut -f 1 -d ')') - # only unresolved addresses start with a '+' - if echo $addr | egrep '^\+' >/dev/null 2>&1 ; then - newbt=( $(eu-addr2line --functions --exe=${debuginfo_path}${libname}${debuginfo_extension} $addr) ) - echo "$bt ${newbt[*]}" + libpath=${debuginfo_path}${libname}${debuginfo_extension} + if [ ! -f $libpath ]; then + continue fi + newbt=( $(eu-addr2line --functions --exe=$libpath $addr) ) + echo "$bt ${newbt[*]}" done # remove the temporary directory diff --git a/extras/devel-tools/strace-brick.sh b/extras/devel-tools/strace-brick.sh new file mode 100755 index 00000000000..a140729111c --- /dev/null +++ b/extras/devel-tools/strace-brick.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Usage: +# nice -n -19 strace-brick.sh glusterfsd 50 + +brick_process_name=$1 +min_watch_cpu=$2 +if [ ! $brick_process_name ]; then + brick_process_name=glusterfsd +fi + +if [ ! $min_watch_cpu ]; then + min_watch_cpu=50 +fi + +echo "min_watch_cpu: $min_watch_cpu" + +break=false + +while ! $break; +do + mypids=( $(pgrep $brick_process_name) ) + echo "mypids: ${mypids[*]}" + + pid_args=$(echo ${mypids[*]} | sed -e 's/ / -p /g;s/^/-p /') + echo "pid_args: $pid_args" + + pcpu=( $(ps $pid_args -o pcpu -h ) ) + echo "pcpu: ${pcpu[*]}" + + wait_longer=false + + for i in $( seq 0 $((${#pcpu[*]} - 1)) ) + do + echo "i: $i" + echo "mypids[$i]: ${mypids[$i]}" + + int_pcpu=$(echo ${pcpu[$i]} | cut -f 1 -d '.') + echo "int_pcpu: $int_pcpu" + if [ ! $int_pcpu ] || [ ! $min_watch_cpu ]; then + break=true + echo "breaking" + fi + if [ $int_pcpu -ge $min_watch_cpu ]; then + wait_longer=true + mydirname="${brick_process_name}-${mypids[$i]}-$(date --utc +'%Y%m%d-%H%M%S.%N')" + $(mkdir $mydirname && cd $mydirname && timeout --kill-after=5 --signal=KILL 60 nice -n -19 strace -p ${mypids[$i]} -ff -tt -T -o $brick_process_name) & + fi + done + + if $wait_longer; then + sleep 90 + else + sleep 15 + fi +done diff --git a/extras/distributed-testing/distributed-test-env b/extras/distributed-testing/distributed-test-env index 0d57d5237dd..36fdd82e5dd 100644 --- a/extras/distributed-testing/distributed-test-env +++ b/extras/distributed-testing/distributed-test-env @@ -32,7 +32,6 @@ BROKEN_TESTS="\ tests/bugs/gfapi/bug-1093594.t\ tests/bugs/replicate/bug-1473026.t\ tests/bugs/replicate/bug-802417.t\ - tests/encryption/crypt.t\ tests/basic/inode-leak.t\ tests/basic/distribute/force-migration.t\ tests/basic/ec/heal-info.t\ diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py index 7bfb6c9652a..5a07e2feab1 100755 --- a/extras/distributed-testing/distributed-test-runner.py +++ b/extras/distributed-testing/distributed-test-runner.py @@ -383,14 +383,17 @@ class Handlers: return self.shell.call("make install") == 0 @synchronized - def prove(self, id, test, timeout, valgrind=False, asan_noleaks=True): + def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True): assert id == self.client_id self.shell.cd(self.gluster_root) env = "DEBUG=1 " - if valgrind: + if valgrind == "memcheck" or valgrind == "yes": cmd = "valgrind" cmd += " --tool=memcheck --leak-check=full --track-origins=yes" cmd += " --show-leak-kinds=all -v prove -v" + elif valgrind == "drd": + cmd = "valgrind" + cmd += " --tool=drd -v prove -v" elif asan_noleaks: cmd = "prove -v" env += "ASAN_OPTIONS=detect_leaks=0 " @@ -827,8 +830,9 @@ parser.add_argument("--port", help="server port to listen", type=int, default=DEFAULT_PORT) # test role parser.add_argument("--tester", help="start tester", action="store_true") -parser.add_argument("--valgrind", help="run tests under valgrind", - action="store_true") +parser.add_argument("--valgrind[=memcheck,drd]", + help="run tests with valgrind tool 'memcheck' or 'drd'", + default="no") parser.add_argument("--asan", help="test with asan enabled", action="store_true") parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks", diff --git a/extras/ec-heal-script/README.md b/extras/ec-heal-script/README.md new file mode 100644 index 00000000000..aaefd6681f6 --- /dev/null +++ b/extras/ec-heal-script/README.md @@ -0,0 +1,69 @@ +# gluster-heal-scripts +Scripts to correct extended attributes of fragments of files to make them healble. + +Following are the guidelines/suggestions to use these scripts. + +1 - Passwordless ssh should be setup for all the nodes of the cluster. + +2 - Scripts should be executed from one of these nodes. + +3 - Make sure NO "IO" is going on for the files for which we are running +these two scripts. + +4 - There should be no heal going on for the file for which xattrs are being +set by correct_pending_heals.sh. Disable the self heal while running this script. + +5 - All the bricks of the volume should be UP to identify good and bad fragments +and to decide if an entry is healble or not. + +6 - If correct_pending_heals.sh is stopped in the middle while it was processing +healble entries, it is suggested to re-run gfid_needing_heal_parallel.sh to create +latest list of healble and non healble entries and "potential_heal" "can_not_heal" files. + +7 - Based on the number of entries, these files might take time to get and set the +stats and xattrs of entries. + +8 - A backup of the fragments will be taken on <brick path>/.glusterfs/correct_pending_heals + directory with a file name same as gfid. + +9 - Once the correctness of the file gets verified by user, these backup should be removed. + +10 - Make sure we have enough space on bricks to take these backups. + +11 - At the end this will create two files - + 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed. + 2 - can_not_heal - Contains list of files which can not be healed. + +12 - It is suggested that the integrity of the data of files, which were modified and healed, + should be checked by the user. + + +Usage: + +Following are the sequence of steps to use these scripts - + +1 - ./gfid_needing_heal_parallel.sh <volume name> + + Execute gfid_needing_heal_parallel.sh with volume name to create list of files which could + be healed and can not be healed. It creates "potential_heal" and "can_not_heal" files. + During execution, it also displays the list of files on consol with the verdict. + +2 - ./correct_pending_heals.sh + + Execute correct_pending_heals.sh without any argument. This script processes entries present + in "heal" file. It asks user to enter how many files we want to process in one attempt. + Once the count is provided, this script will fetch the entries one by one from "potential_heal" file and takes necessary action. + If at this point also a file can not be healed, it will be pushed to "can_not_heal" file. + If a file can be healed, this script will modify the xattrs of that file fragments and create an entry in "modified_and_backedup_files" file + +3 - At the end, all the entries of "potential_heal" will be processed and based on the processing only two files will be left. + + 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed. + 2 - can_not_heal - Contains list of files which can not be healed. + +Logs and other files - + +1 - modified_and_backedup_files - It contains all the files which could be healed and the location of backup of each fragments. +2 - can_not_heal - It contains all the files which can not be healed. +3 - potential_heal - List of files which could be healed and should be processed by "correct_pending_heals.sh" +4 - /var/log/glusterfs/ec-heal-script.log - It contains logs of both the files. diff --git a/extras/ec-heal-script/correct_pending_heals.sh b/extras/ec-heal-script/correct_pending_heals.sh new file mode 100755 index 00000000000..c9f19dd7c89 --- /dev/null +++ b/extras/ec-heal-script/correct_pending_heals.sh @@ -0,0 +1,415 @@ +#!/bin/bash +# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +# This script finally resets the xattrs of all the fragments of a file +# which can be healed as per gfid_needing_heal_parallel.sh. +# gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal. +# This script takes potential_heal as input and resets xattrs of all the fragments +# of those files present in this file and which could be healed as per +# trusted.ec.size xattar of the file else it will place the entry in can_not_heal +# file. Those entries which must be healed will be place in must_heal file +# after setting xattrs so that user can track those files. + + +MOD_BACKUP_FILES="modified_and_backedup_files" +CAN_NOT_HEAL="can_not_heal" +LOG_DIR="/var/log/glusterfs" +LOG_FILE="$LOG_DIR/ec-heal-script.log" +LINE_SEP="===================================================" + +function heal_log() +{ + echo "$1" >> "$LOG_FILE" +} + +function desc () +{ + echo "" + echo "This script finally resets the xattrs of all the fragments of a file +which can be healed as per gfid_needing_heal_parallel.sh. +gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal. +This script takes potential_heal as input and resets xattrs of all the fragments +of those files present in this file and which could be healed as per +trusted.ec.size xattar of the file else it will place the entry in can_not_heal +file. Those entries which must be healed will be place in must_heal file +after setting xattrs so that user can track those files." +} + +function _init () +{ + if [ $# -ne 0 ] + then + echo "usage: $0" + desc + exit 2 + fi + + if [ ! -f "potential_heal" ] + then + echo "Nothing to correct. File "potential_heal" does not exist" + echo "" + desc + exit 2 + fi +} + +function total_file_size_in_hex() +{ + local frag_size=$1 + local size=0 + local hex_size="" + + size=$((frag_size * 4)) + hex_size=$(printf '0x%016x' $size) + echo "$hex_size" +} + +function backup_file_fragment() +{ + local file_host=$1 + local file_entry=$2 + local gfid_actual_paths=$3 + local brick_root="" + local temp="" + local backup_dir="" + local cmd="" + local gfid="" + + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + temp=$(echo "$(basename "$BASH_SOURCE")" | cut -d '.' -f 1) + backup_dir=$(echo "${brick_root}/.glusterfs/${temp}") + file_entry=${file_entry//#} + + gfid=$(echo "${gfid_actual_paths}" | cut -d '|' -f 1 | cut -d '/' -f 5) + echo "${file_host}:${backup_dir}/${gfid}" >> "$MOD_BACKUP_FILES" + + cmd="mkdir -p ${backup_dir} && yes | cp -af ${file_entry} ${backup_dir}/${gfid} 2>/dev/null" + ssh -n "${file_host}" "${cmd}" +} + +function set_frag_xattr () +{ + local file_host=$1 + local file_entry=$2 + local good=$3 + local cmd1="" + local cmd2="" + local cmd="" + local version="0x00000000000000010000000000000001" + local dirty="0x00000000000000010000000000000001" + + if [[ $good -eq 0 ]] + then + version="0x00000000000000000000000000000000" + fi + + cmd1=" setfattr -n trusted.ec.version -v ${version} ${file_entry} &&" + cmd2=" setfattr -n trusted.ec.dirty -v ${dirty} ${file_entry}" + cmd=${cmd1}${cmd2} + ssh -n "${file_host}" "${cmd}" +} + +function set_version_dirty_xattr () +{ + local file_paths=$1 + local good=$2 + local gfid_actual_paths=$3 + local file_entry="" + local file_host="" + local bpath="" + + for bpath in ${file_paths//,/ } + do + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + backup_file_fragment "$file_host" "$file_entry" "$gfid_actual_paths" + file_entry=${file_entry//#} + set_frag_xattr "$file_host" "$file_entry" "$good" + done +} + +function match_size_xattr_quorum () +{ + local file_paths=$1 + local file_entry="" + local file_host="" + local cmd="" + local size_xattr="" + local bpath="" + declare -A xattr_count + + for bpath in ${file_paths//,/ } + do + size_xattr="" + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + file_entry=${file_entry//#} + + cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2" + size_xattr=$(ssh -n "${file_host}" "${cmd}") + if [[ -n $size_xattr ]] + then + count=$((xattr_count["$size_xattr"] + 1)) + xattr_count["$size_xattr"]=${count} + if [[ $count -ge 4 ]] + then + echo "${size_xattr}" + return + fi + fi + done + echo "False" +} + +function match_version_xattr () +{ + local file_paths=$1 + local file_entry="" + local file_host="" + local cmd="" + local version="" + local bpath="" + declare -A ver_count + + for bpath in ${file_paths//,/ } + do + version="" + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + file_entry=${file_entry//#} + + cmd="getfattr -n trusted.ec.version -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.version" | cut -d '=' -f 2" + version=$(ssh -n "${file_host}" "${cmd}") + ver_count["$version"]=$((ver_count["$version"] + 1)) + done + for key in "${ver_count[@]}" + do + if [[ $key -ge 4 ]] + then + echo "True" + return + else + echo "False" + return + fi + done +} + +function match_stat_size_with_xattr () +{ + local bpath=$1 + local size=$2 + local file_stat=$3 + local xattr=$4 + local file_entry="" + local file_host="" + local cmd="" + local stat_output="" + local hex_size="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + + file_entry=${file_entry//#} + cmd="stat --format=%F:%B:%s $file_entry 2>/dev/null" + stat_output=$(ssh -n "${file_host}" "${cmd}") + echo "$stat_output" | grep -w "${file_stat}" > /dev/null + + if [[ $? -eq 0 ]] + then + cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2" + hex_size=$(ssh -n "${file_host}" "${cmd}") + + if [[ -z $hex_size || "$hex_size" != "$xattr" ]] + then + echo "False" + return + fi + size_diff=$(printf '%d' $(( size - hex_size ))) + if [[ $size_diff -gt 2047 ]] + then + echo "False" + return + else + echo "True" + return + fi + else + echo "False" + return + fi +} + +function find_file_paths () +{ + local bpath=$1 + local file_entry="" + local file_host="" + local cmd="" + local brick_root="" + local gfid="" + local actual_path="" + local gfid_path="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + + gfid=$(echo "${file_entry}" | grep ".glusterfs") + if [[ -n "$gfid" ]] + then + gfid_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' " + actual_path=$(ssh -n "${file_host}" "${cmd}") + #removing absolute path so that user can refer this from mount point + actual_path=${actual_path#"$brick_root"} + else + actual_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' " + gfid_path=$(ssh -n "${file_host}" "${cmd}") + gfid_path=${gfid_path#"$brick_root"} + fi + + echo "${gfid_path}|${actual_path}" +} + +function log_can_not_heal () +{ + local gfid_actual_paths=$1 + local file_paths=$2 + file_paths=${file_paths//#} + + echo "${LINE_SEP}" >> "$CAN_NOT_HEAL" + echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL" + for bpath in ${file_paths//,/ } + do + echo "${bpath}" >> "$CAN_NOT_HEAL" + done +} + +function check_all_frag_and_set_xattr () +{ + local file_paths=$1 + local total_size=$2 + local file_stat=$3 + local bpath="" + local healthy_count=0 + local match="False" + local matching_bricks="" + local bad_bricks="" + local gfid_actual_paths="" + + for bpath in ${file_paths//,/ } + do + if [[ -n "$gfid_actual_paths" ]] + then + break + fi + gfid_actual_paths=$(find_file_paths "$bpath") + done + + match=$(match_size_xattr_quorum "$file_paths") + +# echo "${match} : $bpath" >> "$MOD_BACKUP_FILES" + + if [[ "$match" != "False" ]] + then + xattr="$match" + for bpath in ${file_paths//,/ } + do + match="False" + match=$(match_stat_size_with_xattr "$bpath" "$total_size" "$file_stat" "$xattr") + if [[ "$match" == "True" ]] + then + matching_bricks="${bpath},${matching_bricks}" + healthy_count=$((healthy_count + 1)) + else + bad_bricks="${bpath},${bad_bricks}" + fi + done + fi + + if [[ $healthy_count -ge 4 ]] + then + match="True" + echo "${LINE_SEP}" >> "$MOD_BACKUP_FILES" + echo "Modified : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$MOD_BACKUP_FILES" + set_version_dirty_xattr "$matching_bricks" 1 "$gfid_actual_paths" + set_version_dirty_xattr "$bad_bricks" 0 "$gfid_actual_paths" + else + log_can_not_heal "$gfid_actual_paths" "${file_paths}" + fi + + echo "$match" +} +function set_xattr() +{ + local count=$1 + local heal_entry="" + local file_stat="" + local frag_size="" + local total_size="" + local file_paths="" + local num="" + local can_heal_count=0 + + heal_log "Started $(basename $BASH_SOURCE) on $(date) " + + while read -r heal_entry + do + heal_log "$LINE_SEP" + heal_log "${heal_entry}" + + file_stat=$(echo "$heal_entry" | cut -d "|" -f 1) + frag_size=$(echo "$file_stat" | rev | cut -d ":" -f 1 | rev) + total_size="$(total_file_size_in_hex "$frag_size")" + file_paths=$(echo "$heal_entry" | cut -d "|" -f 2) + match=$(check_all_frag_and_set_xattr "$file_paths" "$total_size" "$file_stat") + if [[ "$match" == "True" ]] + then + can_heal_count=$((can_heal_count + 1)) + fi + + sed -i '1d' potential_heal + count=$((count - 1)) + if [ $count == 0 ] + then + num=$(cat potential_heal | wc -l) + heal_log "$LINE_SEP" + heal_log "${1} : Processed" + heal_log "${can_heal_count} : Modified to Heal" + heal_log "$((${1} - can_heal_count)) : Moved to can_not_heal." + heal_log "${num} : Pending as Potential Heal" + exit 0 + fi + + done < potential_heal +} + +function main () +{ + local count=0 + + read -p "Number of files to correct: [choose between 1-1000] (0 for All):" count + if [[ $count -lt 0 || $count -gt 1000 ]] + then + echo "Provide correct value:" + exit 2 + fi + + if [[ $count -eq 0 ]] + then + count=$(cat potential_heal | wc -l) + fi + set_xattr "$count" +} + +_init "$@" && main "$@" diff --git a/extras/ec-heal-script/gfid_needing_heal_parallel.sh b/extras/ec-heal-script/gfid_needing_heal_parallel.sh new file mode 100755 index 00000000000..d7f53c97c33 --- /dev/null +++ b/extras/ec-heal-script/gfid_needing_heal_parallel.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +# This script provides a list of all the files which can be healed or not healed. +# It also generates two files, potential_heal and can_not_heal, which contains the information +# of all theose files. These files could be used by correct_pending_heals.sh to correct +# the fragmnets so that files could be healed by shd. + +CAN_NOT_HEAL="can_not_heal" +CAN_HEAL="potential_heal" +LINE_SEP="===================================================" +LOG_DIR="/var/log/glusterfs" +LOG_FILE="$LOG_DIR/ec-heal-script.log" + +function heal_log() +{ + echo "$1" >> "$LOG_FILE" +} + +function _init () +{ + if [ $# -ne 1 ]; then + echo "usage: $0 <gluster volume name>"; + echo "This script provides a list of all the files which can be healed or not healed. +It also generates two files, potential_heal and can_not_heal, which contains the information +of all theose files. These files could be used by correct_pending_heals.sh to correct +the fragmnets so that files could be healed by shd." + exit 2; + fi + + volume=$1; +} + +function get_pending_entries () +{ + local volume_name=$1 + + gluster volume heal "$volume_name" info | grep -v ":/" | grep -v "Number of entries" | grep -v "Status:" | sort -u | sed '/^$/d' +} + +function get_entry_path_on_brick() +{ + local path="$1" + local gfid_string="" + if [[ "${path:0:1}" == "/" ]]; + then + echo "$path" + else + gfid_string="$(echo "$path" | cut -f2 -d':' | cut -f1 -d '>')" + echo "/.glusterfs/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string" + fi +} + +function run_command_on_server() +{ + local subvolume="$1" + local host="$2" + local cmd="$3" + local output + output=$(ssh -n "${host}" "${cmd}") + if [ -n "$output" ] + then + echo "$subvolume:$output" + fi +} + +function get_entry_path_all_bricks () +{ + local entry="$1" + local bricks="$2" + local cmd="" + for brick in $bricks + do + echo "${brick}#$(get_entry_path_on_brick "$entry")" + done | tr '\n' ',' +} + +function get_stat_for_entry_from_all_bricks () +{ + local entry="$1" + local bricks="$2" + local subvolume=0 + local host="" + local bpath="" + local cmd="" + + for brick in $bricks + do + if [[ "$((subvolume % 6))" == "0" ]] + then + subvolume=$((subvolume+1)) + fi + host=$(echo "$brick" | cut -f1 -d':') + bpath=$(echo "$brick" | cut -f2 -d':') + + cmd="stat --format=%F:%B:%s $bpath$(get_entry_path_on_brick "$entry") 2>/dev/null" + run_command_on_server "$subvolume" "${host}" "${cmd}" & + done | sort | uniq -c | sort -rnk1 +} + +function get_bricks_from_volume() +{ + local v=$1 + gluster volume info "$v" | grep -E "^Brick[0-9][0-9]*:" | cut -f2- -d':' +} + +function print_entry_gfid() +{ + local host="$1" + local dirpath="$2" + local entry="$3" + local gfid + gfid="$(ssh -n "${host}" "getfattr -d -m. -e hex $dirpath/$entry 2>/dev/null | grep trusted.gfid=|cut -f2 -d'='")" + echo "$entry" - "$gfid" +} + +function print_brick_directory_info() +{ + local h="$1" + local dirpath="$2" + while read -r e + do + print_entry_gfid "${h}" "${dirpath}" "${e}" + done < <(ssh -n "${h}" "ls $dirpath 2>/dev/null") +} + +function print_directory_info() +{ + local entry="$1" + local bricks="$2" + local h + local b + local gfid + for brick in $bricks; + do + h="$(echo "$brick" | cut -f1 -d':')" + b="$(echo "$brick" | cut -f2 -d':')" + dirpath="$b$(get_entry_path_on_brick "$entry")" + print_brick_directory_info "${h}" "${dirpath}" & + done | sort | uniq -c +} + +function print_entries_needing_heal() +{ + local quorum=0 + local entry="$1" + local bricks="$2" + while read -r line + do + quorum=$(echo "$line" | awk '{print $1}') + if [[ "$quorum" -lt 4 ]] + then + echo "$line - Not in Quorum" + else + echo "$line - In Quorum" + fi + done < <(print_directory_info "$entry" "$bricks") +} + +function find_file_paths () +{ + local bpath=$1 + local file_entry="" + local file_host="" + local cmd="" + local brick_root="" + local gfid="" + local actual_path="" + local gfid_path="" + + file_host=$(echo "$bpath" | cut -d ":" -f 1) + file_entry=$(echo "$bpath" | cut -d ":" -f 2) + brick_root=$(echo "$file_entry" | cut -d "#" -f 1) + + gfid=$(echo "${file_entry}" | grep ".glusterfs") + + if [[ -n "$gfid" ]] + then + gfid_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' " + actual_path=$(ssh -n "${file_host}" "${cmd}") + #removing absolute path so that user can refer this from mount point + actual_path=${actual_path#"$brick_root"} + else + actual_path=$(echo "$file_entry" | cut -d "#" -f 2) + file_entry=${file_entry//#} + cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' " + gfid_path=$(ssh -n "${file_host}" "${cmd}") + gfid_path=${gfid_path#"$brick_root"} + fi + + echo "${gfid_path}|${actual_path}" +} + +function log_can_not_heal () +{ + local gfid_actual_paths=$1 + local file_paths=$2 + file_paths=${file_paths//#} + + echo "${LINE_SEP}" >> "$CAN_NOT_HEAL" + echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL" + for bpath in ${file_paths//,/ } + do + echo "${bpath}" >> "$CAN_NOT_HEAL" + done +} + +function main () +{ + local bricks="" + local quorum=0 + local stat_info="" + local file_type="" + local gfid_actual_paths="" + local bpath="" + local file_paths="" + local good=0 + local bad=0 + bricks=$(get_bricks_from_volume "$volume") + rm -f "$CAN_HEAL" + rm -f "$CAN_NOT_HEAL" + mkdir "$LOG_DIR" -p + + heal_log "Started $(basename "$BASH_SOURCE") on $(date) " + while read -r heal_entry + do + heal_log "------------------------------------------------------------------" + heal_log "$heal_entry" + + gfid_actual_paths="" + file_paths="$(get_entry_path_all_bricks "$heal_entry" "$bricks")" + stat_info="$(get_stat_for_entry_from_all_bricks "$heal_entry" "$bricks")" + heal_log "$stat_info" + + quorum=$(echo "$stat_info" | head -1 | awk '{print $1}') + good_stat=$(echo "$stat_info" | head -1 | awk '{print $3}') + file_type="$(echo "$stat_info" | head -1 | cut -f2 -d':')" + if [[ "$file_type" == "directory" ]] + then + print_entries_needing_heal "$heal_entry" "$bricks" + else + if [[ "$quorum" -ge 4 ]] + then + good=$((good + 1)) + heal_log "Verdict: Healable" + + echo "${good_stat}|$file_paths" >> "$CAN_HEAL" + else + bad=$((bad + 1)) + heal_log "Verdict: Not Healable" + for bpath in ${file_paths//,/ } + do + if [[ -z "$gfid_actual_paths" ]] + then + gfid_actual_paths=$(find_file_paths "$bpath") + else + break + fi + done + log_can_not_heal "$gfid_actual_paths" "${file_paths}" + fi + fi + done < <(get_pending_entries "$volume") + heal_log "=========================================" + heal_log "Total number of potential heal : ${good}" + heal_log "Total number of can not heal : ${bad}" + heal_log "=========================================" +} + +_init "$@" && main "$@" diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am new file mode 100644 index 00000000000..9eaa401b6c8 --- /dev/null +++ b/extras/ganesha/Makefile.am @@ -0,0 +1,2 @@ +SUBDIRS = scripts config ocf +CLEANFILES = diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am new file mode 100644 index 00000000000..c729273096e --- /dev/null +++ b/extras/ganesha/config/Makefile.am @@ -0,0 +1,4 @@ +EXTRA_DIST= ganesha-ha.conf.sample + +confdir = $(sysconfdir)/ganesha +conf_DATA = ganesha-ha.conf.sample diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample new file mode 100644 index 00000000000..c22892bde56 --- /dev/null +++ b/extras/ganesha/config/ganesha-ha.conf.sample @@ -0,0 +1,19 @@ +# Name of the HA cluster created. +# must be unique within the subnet +HA_NAME="ganesha-ha-360" +# +# N.B. you may use short names or long names; you may not use IP addrs. +# Once you select one, stay with it as it will be mildly unpleasant to +# clean up if you switch later on. Ensure that all names - short and/or +# long - are in DNS or /etc/hosts on all machines in the cluster. +# +# The subset of nodes of the Gluster Trusted Pool that form the ganesha +# HA cluster. Hostname is specified. +HA_CLUSTER_NODES="server1,server2,..." +#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..." +# +# Virtual IPs for each of the nodes specified above. +VIP_server1="10.0.2.1" +VIP_server2="10.0.2.2" +#VIP_server1_lab_redhat_com="10.0.2.1" +#VIP_server2_lab_redhat_com="10.0.2.2" diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am new file mode 100644 index 00000000000..990a609f254 --- /dev/null +++ b/extras/ganesha/ocf/Makefile.am @@ -0,0 +1,11 @@ +EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd + +# The root of the OCF resource agent hierarchy +# Per the OCF standard, it's always "lib", +# not "lib64" (even on 64-bit platforms). +ocfdir = $(prefix)/lib/ocf + +# The provider directory +radir = $(ocfdir)/resource.d/heartbeat + +ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace new file mode 100644 index 00000000000..825f7164597 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_grace @@ -0,0 +1,221 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "$OCF_DEBUG_LIBRARY" ]; then + . $OCF_DEBUG_LIBRARY +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_grace_active_default="grace-active" +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_grace"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="40s" /> +<action name="stop" timeout="40s" /> +<action name="status" timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="5s" /> +<action name="notify" timeout="10s" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_grace_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_grace_start() +{ + local rc=${OCF_ERR_GENERIC} + local host=$(hostname -s) + + ocf_log debug "ganesha_grace_start()" + # give ganesha_mon RA a chance to set the crm_attr first + # I mislike the sleep, but it's not clear that looping + # with a small sleep is necessarily better + # start has a 40sec timeout, so a 5sec sleep here is okay + sleep 5 + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null ) + if [ $? -ne 0 ]; then + ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # Three possibilities: + # 1. There is no attribute at all and attr_updater returns + # a zero length string. This happens when + # ganesha_mon::monitor hasn't run at least once to set + # the attribute. The assumption here is that the system + # is coming up. We pretend, for now, that the node is + # healthy, to allow the system to continue coming up. + # It will cure itself in a few seconds + # 2. There is an attribute, and it has the value "1"; this + # node is healthy. + # 3. There is an attribute, but it has no value or the value + # "0"; this node is not healthy. + + # case 1 + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} + fi + + # case 2 + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} + fi + + # case 3 + return ${OCF_NOT_RUNNING} +} + +ganesha_grace_stop() +{ + + ocf_log debug "ganesha_grace_stop()" + return ${OCF_SUCCESS} +} + +ganesha_grace_notify() +{ + # since this is a clone RA we should only ever see pre-start + # or post-stop + mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + case "${mode}" in + pre-start | post-stop) + dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} + if [ $? -ne 0 ]; then + ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed" + fi + ;; + esac + + return ${OCF_SUCCESS} +} + +ganesha_grace_monitor() +{ + local host=$(hostname -s) + + ocf_log debug "monitor" + + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + host=$(hostname) + attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null) + if [ $? -ne 0 ]; then + ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed" + fi + fi + + # if there is no attribute (yet), maybe it's because + # this RA started before ganesha_mon (nfs-mon) has had + # chance to create it. In which case we'll pretend + # everything is okay this time around + if [[ -z "${attr}" ]]; then + return ${OCF_SUCCESS} + fi + + if [[ "${attr}" = *"value=1" ]]; then + return ${OCF_SUCCESS} + fi + + return ${OCF_NOT_RUNNING} +} + +ganesha_grace_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_grace_validate + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) ganesha_grace_start + ;; +stop) ganesha_grace_stop + ;; +status|monitor) ganesha_grace_monitor + ;; +notify) ganesha_grace_notify + ;; +*) ganesha_grace_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon new file mode 100644 index 00000000000..2b4a9d6da84 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_mon @@ -0,0 +1,234 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +# Defaults +OCF_RESKEY_ganesha_active_default="ganesha-active" +OCF_RESKEY_grace_active_default="grace-active" +OCF_RESKEY_grace_delay_default="5" + +: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}} +: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}} +: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_mon"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ganesha_active"> +<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc> +<content type="string" default="ganesha-active" /> +</parameter> +<parameter name="grace_active"> +<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc> +<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc> +<content type="string" default="grace-active" /> +</parameter> +<parameter name="grace_delay"> +<longdesc lang="en"> +NFS-Ganesha grace delay. +When changing this, adjust the ganesha_grace RA's monitor interval to match. +</longdesc> +<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc> +<content type="string" default="5" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="40s" /> +<action name="stop" timeout="40s" /> +<action name="status" timeout="20s" interval="60s" /> +<action name="monitor" depth="0" timeout="10s" interval="10s" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_mon_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case ${__OCF_ACTION} in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_mon_start() +{ + ocf_log debug "ganesha_mon_start" + ganesha_mon_monitor + return $OCF_SUCCESS +} + +ganesha_mon_stop() +{ + ocf_log debug "ganesha_mon_stop" + return $OCF_SUCCESS +} + +ganesha_mon_monitor() +{ + local host=$(hostname -s) + local pid_file="/var/run/ganesha.pid" + local rhel6_pid_file="/var/run/ganesha.nfsd.pid" + local proc_pid="/proc/" + + # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid + # RHEL7 systemd does not. Would be nice if all distros used the + # same pid file. + if [ -e ${rhel6_pid_file} ]; then + pid_file=${rhel6_pid_file} + fi + if [ -e ${pid_file} ]; then + proc_pid="${proc_pid}$(cat ${pid_file})" + fi + + if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then + + attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed" + fi + + # ganesha_grace (nfs-grace) RA follows grace-active attr + # w/ constraint location + attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed" + fi + + # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace) + # track grace-active crm_attr (attr != crm_attr) + # we can't just use the attr as there's no way to query + # its value in RHEL6 pacemaker + + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null + if [ $? -ne 0 ]; then + host=$(hostname) + crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed" + fi + fi + + return ${OCF_SUCCESS} + fi + + # VIP fail-over is triggered by clearing the + # ganesha-active node attribute on this node. + # + # Meanwhile the ganesha_grace notify() runs when its + # nfs-grace resource is disabled on a node; which + # is triggered by clearing the grace-active attribute + # on this node. + # + # We need to allow time for it to run and put + # the remaining ganesha.nfsds into grace before + # initiating the VIP fail-over. + + attrd_updater -D -n ${OCF_RESKEY_grace_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed" + fi + + host=$(hostname -s) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + host=$(hostname) + crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null + if [ $? -ne 0 ]; then + ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed" + fi + fi + + sleep ${OCF_RESKEY_grace_delay} + + attrd_updater -D -n ${OCF_RESKEY_ganesha_active} + if [ $? -ne 0 ]; then + ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed" + fi + + return ${OCF_SUCCESS} +} + +ganesha_mon_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_mon_validate + +# Translate each action into the appropriate function call +case ${__OCF_ACTION} in +start) ganesha_mon_start + ;; +stop) ganesha_mon_stop + ;; +status|monitor) ganesha_mon_monitor + ;; +*) ganesha_mon_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd new file mode 100644 index 00000000000..f91e8b6b8f7 --- /dev/null +++ b/extras/ganesha/ocf/ganesha_nfsd @@ -0,0 +1,167 @@ +#!/bin/bash +# +# Copyright (c) 2014 Anand Subramanian anands@redhat.com +# Copyright (c) 2015 Red Hat Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# +# + +# Initialization: +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +if [ -n "${OCF_DEBUG_LIBRARY}" ]; then + . ${OCF_DEBUG_LIBRARY} +else + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs +fi + +OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage" +: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}} + +ganesha_meta_data() { + cat <<END +<?xml version="1.0"?> +<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> +<resource-agent name="ganesha_nfsd"> +<version>1.0</version> + +<longdesc lang="en"> +This Linux-specific resource agent acts as a dummy +resource agent for nfs-ganesha. +</longdesc> + +<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc> + +<parameters> +<parameter name="ha_vol_mnt"> +<longdesc lang="en">HA State Volume Mount Point</longdesc> +<shortdesc lang="en">HA_State Volume Mount Point</shortdesc> +<content type="string" default="" /> +</parameter> +</parameters> + +<actions> +<action name="start" timeout="5s" /> +<action name="stop" timeout="5s" /> +<action name="status" depth="0" timeout="5s" interval="0" /> +<action name="monitor" depth="0" timeout="5s" interval="0" /> +<action name="meta-data" timeout="20s" /> +</actions> +</resource-agent> +END + +return ${OCF_SUCCESS} +} + +ganesha_nfsd_usage() { + echo "ganesha.nfsd USAGE" +} + +# Make sure meta-data and usage always succeed +case $__OCF_ACTION in + meta-data) ganesha_meta_data + exit ${OCF_SUCCESS} + ;; + usage|help) ganesha_usage + exit ${OCF_SUCCESS} + ;; + *) + ;; +esac + +ganesha_nfsd_start() +{ + local long_host=$(hostname) + + if [[ -d /var/lib/nfs ]]; then + mv /var/lib/nfs /var/lib/nfs.backup + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed" + fi + ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed" + fi + fi + + return ${OCF_SUCCESS} +} + +ganesha_nfsd_stop() +{ + + if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then + rm -f /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "rm -f /var/lib/nfs failed" + fi + mv /var/lib/nfs.backup /var/lib/nfs + if [ $? -ne 0 ]; then + ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed" + fi + fi + + return ${OCF_SUCCESS} +} + +ganesha_nfsd_monitor() +{ + # pacemaker checks to see if RA is already running before starting it. + # if we return success, then it's presumed it's already running and + # doesn't need to be started, i.e. invoke the start action. + # return something other than success to make pacemaker invoke the + # start action + if [[ -L /var/lib/nfs ]]; then + return ${OCF_SUCCESS} + fi + return ${OCF_NOT_RUNNING} +} + +ganesha_nfsd_validate() +{ + return ${OCF_SUCCESS} +} + +ganesha_nfsd_validate + +# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION" + +# Translate each action into the appropriate function call +case $__OCF_ACTION in +start) ganesha_nfsd_start + ;; +stop) ganesha_nfsd_stop + ;; +status|monitor) ganesha_nfsd_monitor + ;; +*) ganesha_nfsd_usage + exit ${OCF_ERR_UNIMPLEMENTED} + ;; +esac + +rc=$? + +# The resource agent may optionally log a debug message +ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc" +exit $rc diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am new file mode 100644 index 00000000000..7e345fd5f19 --- /dev/null +++ b/extras/ganesha/scripts/Makefile.am @@ -0,0 +1,6 @@ +EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \ + ganesha-ha.sh + +scriptsdir = $(libexecdir)/ganesha +scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \ + ganesha-ha.sh diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh new file mode 100755 index 00000000000..3040e8138b0 --- /dev/null +++ b/extras/ganesha/scripts/create-export-ganesha.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +#This script is called by glusterd when the user +#tries to export a volume via NFS-Ganesha. +#An export file specific to a volume +#is created in GANESHA_DIR/exports. + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_DIR=${1%/} +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" +declare -i EXPORT_ID + +function check_cmd_status() +{ + if [ "$1" != "0" ] + then + rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF + exit 1 + fi +} + + +if [ ! -d "$GANESHA_DIR/exports" ]; + then + mkdir $GANESHA_DIR/exports + check_cmd_status `echo $?` +fi + +function write_conf() +{ +echo -e "# WARNING : Using Gluster CLI will overwrite manual +# changes made to this file. To avoid it, edit the +# file and run ganesha-ha.sh --refresh-config." + +echo "EXPORT{" +echo " Export_Id = 2;" +echo " Path = \"/$VOL\";" +echo " FSAL {" +echo " name = "GLUSTER";" +echo " hostname=\"localhost\";" +echo " volume=\"$VOL\";" +echo " }" +echo " Access_type = RW;" +echo " Disable_ACL = true;" +echo ' Squash="No_root_squash";' +echo " Pseudo=\"/$VOL\";" +echo ' Protocols = "3", "4" ;' +echo ' Transports = "UDP","TCP";' +echo ' SecType = "sys";' +echo ' Security_Label = False;' +echo " }" +} +if [ "$OPTION" = "on" ]; +then + if ! (cat $CONF | grep $VOL.conf\"$ ) + then + write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf + echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF + count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l` + if [ "$count" = "1" ] ; then + EXPORT_ID=2 + else + EXPORT_ID=`cat $GANESHA_DIR/.export_added` + check_cmd_status `echo $?` + EXPORT_ID=EXPORT_ID+1 + sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \ + $GANESHA_DIR/exports/export.$VOL.conf + check_cmd_status `echo $?` + fi + echo $EXPORT_ID > $GANESHA_DIR/.export_added + fi +else + rm -rf $GANESHA_DIR/exports/export.$VOL.conf + sed -i /$VOL.conf/d $CONF +fi diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh new file mode 100755 index 00000000000..9d613a0e7ad --- /dev/null +++ b/extras/ganesha/scripts/dbus-send.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_DIR=${1%/} +OPTION=$2 +VOL=$3 +CONF=$GANESHA_DIR"/ganesha.conf" + +function check_cmd_status() +{ + if [ "$1" != "0" ] + then + logger "dynamic export failed on node :${hostname -s}" + fi +} + +#This function keeps track of export IDs and increments it with every new entry +function dynamic_export_add() +{ + dbus-send --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \ +string:"EXPORT(Path=/$VOL)" + check_cmd_status `echo $?` +} + +#This function removes an export dynamically(uses the export_id of the export) +function dynamic_export_remove() +{ + # Below bash fetch all the export from ShowExport command and search + # export entry based on path and then get its export entry. + # There are two possiblities for path, either entire volume will be + # exported or subdir. It handles both cases. But it remove only first + # entry from the list based on assumption that entry exported via cli + # has lowest export id value + removed_id=$(dbus-send --type=method_call --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \ + "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \ + | head -1) + + dbus-send --print-reply --system \ +--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ +org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id + check_cmd_status `echo $?` +} + +if [ "$OPTION" = "on" ]; +then + dynamic_export_add $@ +fi + +if [ "$OPTION" = "off" ]; +then + dynamic_export_remove $@ +fi diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh new file mode 100644 index 00000000000..9790a719e10 --- /dev/null +++ b/extras/ganesha/scripts/ganesha-ha.sh @@ -0,0 +1,1199 @@ +#!/bin/bash + +# Copyright 2015-2016 Red Hat Inc. All Rights Reserved +# +# Pacemaker+Corosync High Availability for NFS-Ganesha +# +# setup, teardown, add, delete, refresh-config, and status +# +# Each participating node in the cluster is assigned a virtual IP (VIP) +# which fails over to another node when its associated ganesha.nfsd dies +# for any reason. After the VIP is moved to another node all the +# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE. +# +# There are six resource agent types used: ganesha_mon, ganesha_grace, +# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the +# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put +# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start +# and stop the ganesha.nfsd during setup and teardown. IPaddr manages +# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to +# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to +# the new host. + +GANESHA_HA_SH=$(realpath $0) +HA_NUM_SERVERS=0 +HA_SERVERS="" +HA_VOL_NAME="gluster_shared_storage" +HA_VOL_MNT="/run/gluster/shared_storage" +HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha" +SERVICE_MAN="DISTRO_NOT_FOUND" + +# rhel, fedora id, version +ID="" +VERSION_ID="" + +PCS9OR10_PCS_CNAME_OPTION="" +PCS9OR10_PCS_CLONE_OPTION="clone" +SECRET_PEM="/var/lib/glusterd/nfs/secret.pem" + +# UNBLOCK RA uses shared_storage which may become unavailable +# during any of the nodes reboot. Hence increase timeout value. +PORTBLOCK_UNBLOCK_TIMEOUT="60s" + +# Try loading the config from any of the distro +# specific configuration locations +if [ -f /etc/sysconfig/ganesha ] + then + . /etc/sysconfig/ganesha +fi +if [ -f /etc/conf.d/ganesha ] + then + . /etc/conf.d/ganesha +fi +if [ -f /etc/default/ganesha ] + then + . /etc/default/ganesha +fi + +GANESHA_CONF= + +function find_rhel7_conf +{ + while [[ $# > 0 ]] + do + key="$1" + case $key in + -f) + CONFFILE="$2" + break; + ;; + *) + ;; + esac + shift + done +} + +if [ -z ${CONFFILE} ] + then + find_rhel7_conf ${OPTIONS} + +fi + +GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf} + +usage() { + + echo "Usage : add|delete|refresh-config|status" + echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \ +<NODE-HOSTNAME> <NODE-VIP>" + echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \ +<NODE-HOSTNAME>" + echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \ +<volume>" + echo "Status : ganesha-ha.sh --status <HA_CONFDIR>" +} + +determine_service_manager () { + + if [ -e "/bin/systemctl" ]; + then + SERVICE_MAN="/bin/systemctl" + elif [ -e "/sbin/invoke-rc.d" ]; + then + SERVICE_MAN="/sbin/invoke-rc.d" + elif [ -e "/sbin/service" ]; + then + SERVICE_MAN="/sbin/service" + fi + if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]] + then + logger "Service manager not recognized, exiting" + exit 1 + fi +} + +manage_service () +{ + local action=${1} + local new_node=${2} + local option= + + if [[ "${action}" == "start" ]]; then + option="yes" + else + option="no" + fi + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option" + + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]] + then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}" + fi +} + + +check_cluster_exists() +{ + local name=${1} + local cluster_name="" + + if [ -e /var/run/corosync.pid ]; then + cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3) + if [[ "${cluster_name}X" == "${name}X" ]]; then + logger "$name already exists, exiting" + exit 0 + fi + fi +} + + +determine_servers() +{ + local cmd=${1} + local num_servers=0 + local tmp_ifs=${IFS} + local ha_servers="" + + if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then + ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//') + IFS=$' ' + for server in ${ha_servers} ; do + num_servers=$(expr ${num_servers} + 1) + done + IFS=${tmp_ifs} + HA_NUM_SERVERS=${num_servers} + HA_SERVERS="${ha_servers}" + else + IFS=$',' + for server in ${HA_CLUSTER_NODES} ; do + num_servers=$(expr ${num_servers} + 1) + done + IFS=${tmp_ifs} + HA_NUM_SERVERS=${num_servers} + HA_SERVERS="${HA_CLUSTER_NODES//,/ }" + fi +} + +stop_ganesha_all() +{ + local serverlist=${1} + for node in ${serverlist} ; do + manage_service "stop" ${node} + done +} + +setup_cluster() +{ + local name=${1} + local num_servers=${2} + local servers=${3} + local unclean="" + local quorum_policy="stop" + + logger "setting up cluster ${name} with the following ${servers}" + + # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers} + pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} + if [ $? -ne 0 ]; then + logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out" + #set up failed stop all ganesha process and clean up symlinks in cluster + stop_ganesha_all "${servers}" + exit 1; + fi + + # pcs cluster auth ${servers} + pcs cluster auth + if [ $? -ne 0 ]; then + logger "pcs cluster auth failed" + fi + + pcs cluster start --all + if [ $? -ne 0 ]; then + logger "pcs cluster start failed" + exit 1; + fi + + sleep 1 + # wait for the cluster to elect a DC before querying or writing + # to the CIB. BZ 1334092 + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + while [ $? -ne 0 ]; do + crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1 + done + + unclean=$(pcs status | grep -u "UNCLEAN") + while [[ "${unclean}X" == "UNCLEANX" ]]; do + sleep 1 + unclean=$(pcs status | grep -u "UNCLEAN") + done + sleep 1 + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi + + pcs property set stonith-enabled=false + if [ $? -ne 0 ]; then + logger "warning: pcs property set stonith-enabled=false failed" + fi +} + + +setup_finalize_ha() +{ + local cibfile=${1} + local stopped="" + + stopped=$(pcs status | grep -u "Stopped") + while [[ "${stopped}X" == "StoppedX" ]]; do + sleep 1 + stopped=$(pcs status | grep -u "Stopped") + done +} + + +refresh_config () +{ + local short_host=$(hostname -s) + local VOL=${1} + local HA_CONFDIR=${2} + local short_host=$(hostname -s) + + local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + + + if [ -e ${SECRET_PEM} ]; then + while [[ ${3} ]]; do + current_host=`echo ${3} | cut -d "." -f 1` + if [[ ${short_host} != ${current_host} ]]; then + output=$(ssh -oPasswordAuthentication=no \ +-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \ +"dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:\"EXPORT(Export_Id=$export_id)\" 2>&1") + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ]; then + echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}" + else + echo "Refresh-config completed on ${current_host}." + fi + + fi + shift + done + else + echo "Error: refresh-config failed. Passwordless ssh is not enabled." + exit 1 + fi + + # Run the same command on the localhost, + output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \ +string:$HA_CONFDIR/exports/export.$VOL.conf \ +string:"EXPORT(Export_Id=$export_id)" 2>&1) + ret=$? + logger <<< "${output}" + if [ ${ret} -ne 0 ] ; then + echo "Refresh-config failed on localhost." + else + echo "Success: refresh-config completed." + fi +} + + +teardown_cluster() +{ + local name=${1} + + for server in ${HA_SERVERS} ; do + if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then + logger "info: ${server} is not in config, removing" + + pcs cluster stop ${server} --force + if [ $? -ne 0 ]; then + logger "warning: pcs cluster stop ${server} failed" + fi + + pcs cluster node remove ${server} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node remove ${server} failed" + fi + fi + done + + # BZ 1193433 - pcs doesn't reload cluster.conf after modification + # after teardown completes, a subsequent setup will appear to have + # 'remembered' the deleted node. You can work around this by + # issuing another `pcs cluster node remove $node`, + # `crm_node -f -R $server`, or + # `cibadmin --delete --xml-text '<node id="$server" + # uname="$server"/>' + + pcs cluster stop --all + if [ $? -ne 0 ]; then + logger "warning pcs cluster stop --all failed" + fi + + pcs cluster destroy + if [ $? -ne 0 ]; then + logger "error pcs cluster destroy failed" + exit 1 + fi +} + + +cleanup_ganesha_config () +{ + rm -f /etc/corosync/corosync.conf + rm -rf /etc/cluster/cluster.conf* + rm -rf /var/lib/pacemaker/cib/* + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf +} + +do_create_virt_ip_constraints() +{ + local cibfile=${1}; shift + local primary=${1}; shift + local weight="1000" + + # first a constraint location rule that says the VIP must be where + # there's a ganesha.nfsd running + pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed" + fi + + # then a set of constraint location prefers to set the prefered order + # for where a VIP should move + while [[ ${1} ]]; do + pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight} + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed" + fi + weight=$(expr ${weight} + 1000) + shift + done + # and finally set the highest preference for the VIP to its home node + # default weight when created is/was 100. + # on Fedora setting appears to be additive, so to get the desired + # value we adjust the weight + # weight=$(expr ${weight} - 100) + pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight} + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed" + fi +} + + +wrap_create_virt_ip_constraints() +{ + local cibfile=${1}; shift + local primary=${1}; shift + local head="" + local tail="" + + # build a list of peers, e.g. for a four node cluster, for node1, + # the result is "node2 node3 node4"; for node2, "node3 node4 node1" + # and so on. + while [[ ${1} ]]; do + if [[ ${1} == ${primary} ]]; then + shift + while [[ ${1} ]]; do + tail=${tail}" "${1} + shift + done + else + head=${head}" "${1} + fi + shift + done + do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head} +} + + +create_virt_ip_constraints() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS} + shift + done +} + + +setup_create_resources() +{ + local cibfile=$(mktemp -u) + + # fixup /var/lib/nfs + logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}" + pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace + # start method. Allow time for ganesha_mon to start and set the + # ganesha-active crm_attribute + sleep 5 + + pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true + if [ $? -ne 0 ]; then + logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed" + fi + + pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1" + fi + + pcs cluster cib ${cibfile} + + while [[ ${1} ]]; do + + # this is variable indirection + # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"' + # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...) + # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1') + # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"') + # after the `eval ${clean_nvs}` there is a variable VIP_host_1 + # with the value '10_7_6_5', and the following \$$ magic to + # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us + # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s + # to give us ipaddr="10.7.6.5". whew! + name="VIP_${1}" + clean_name=${name//[-.]/_} + nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) + clean_nvs=${nvs//[-.]/_} + eval ${clean_nvs} + eval tmp_ipaddr=\$${clean_name} + ipaddr=${tmp_ipaddr//_/.} + + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" + fi + + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + + + shift + done + + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} +} + + +teardown_resources() +{ + # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2) + + # restore /var/lib/nfs + logger "notice: pcs resource delete nfs_setup-clone" + pcs resource delete nfs_setup-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs_setup-clone failed" + fi + + # delete -clone resource agents + # in particular delete the ganesha monitor so we don't try to + # trigger anything when we shut down ganesha next. + pcs resource delete nfs-mon-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs-mon-clone failed" + fi + + pcs resource delete nfs-grace-clone + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete nfs-grace-clone failed" + fi + + while [[ ${1} ]]; do + pcs resource delete ${1}-group + if [ $? -ne 0 ]; then + logger "warning: pcs resource delete ${1}-group failed" + fi + shift + done + +} + + +recreate_resources() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + # this is variable indirection + # see the comment on the same a few lines up + name="VIP_${1}" + clean_name=${name//[-.]/_} + nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf) + clean_nvs=${nvs//[-.]/_} + eval ${clean_nvs} + eval tmp_ipaddr=\$${clean_name} + ipaddr=${tmp_ipaddr//_/.} + + pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=block ip=${ipaddr} --group ${1}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \ + cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed" + fi + + pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \ + portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \ + op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \ + op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${1}-nfs_unblock failed" + fi + + shift + done +} + + +addnode_recreate_resources() +{ + local cibfile=${1}; shift + local add_node=${1}; shift + local add_vip=${1}; shift + + recreate_resources ${cibfile} ${HA_SERVERS} + + pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_block failed" + fi + pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \ + --after ${add_node}-nfs_block + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \ + ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed" + fi + + pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 + if [ $? -ne 0 ]; then + logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed" + fi + pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \ + protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \ + tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \ + ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \ + timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} + if [ $? -ne 0 ]; then + logger "warning pcs resource create ${add_node}-nfs_unblock failed" + fi +} + + +clear_resources() +{ + local cibfile=${1}; shift + + while [[ ${1} ]]; do + pcs -f ${cibfile} resource delete ${1}-group + if [ $? -ne 0 ]; then + logger "warning: pcs -f ${cibfile} resource delete ${1}-group" + fi + + shift + done +} + + +addnode_create_resources() +{ + local add_node=${1}; shift + local add_vip=${1}; shift + local cibfile=$(mktemp -u) + + # start HA on the new node + pcs cluster start ${add_node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster start ${add_node} failed" + fi + + pcs cluster cib ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib ${cibfile} failed" + fi + + # delete all the -cluster_ip-1 resources, clearing + # their constraints, then create them again so we can + # recompute their constraints + clear_resources ${cibfile} ${HA_SERVERS} + addnode_recreate_resources ${cibfile} ${add_node} ${add_vip} + + HA_SERVERS="${HA_SERVERS} ${add_node}" + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} +} + + +deletenode_delete_resources() +{ + local node=${1}; shift + local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//) + local cibfile=$(mktemp -u) + + pcs cluster cib ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib ${cibfile} failed" + fi + + # delete all the -cluster_ip-1 and -trigger_ip-1 resources, + # clearing their constraints, then create them again so we can + # recompute their constraints + clear_resources ${cibfile} ${HA_SERVERS} + recreate_resources ${cibfile} ${ha_servers} + HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /") + + create_virt_ip_constraints ${cibfile} ${HA_SERVERS} + + pcs cluster cib-push ${cibfile} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster cib-push ${cibfile} failed" + fi + rm -f ${cibfile} + +} + + +deletenode_update_haconfig() +{ + local name="VIP_${1}" + local clean_name=${name//[-.]/_} + + ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/") + sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf +} + + +setup_state_volume() +{ + local mnt=${HA_VOL_MNT} + local longname="" + local shortname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + while [[ ${1} ]]; do + + if [[ ${1} == *${dname} ]]; then + dirname=${1} + else + dirname=${1}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then + mkdir ${mnt}/nfs-ganesha/tickle_dir + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + fi + done + shift + done + +} + + +enable_pacemaker() +{ + while [[ ${1} ]]; do + if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker" + else + ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \ +${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable" + fi + shift + done +} + + +addnode_state_volume() +{ + local newnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${newnode} == *${dname} ]]; then + dirname=${newnode} + else + dirname=${newnode}${dname} + fi + + if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then + mkdir ${mnt}/nfs-ganesha/${dirname} + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/state + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm + fi + if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then + mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak + fi + if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then + touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state + fi + + for server in ${HA_SERVERS} ; do + + if [[ ${server} != ${dirname} ]]; then + ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server} + ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server} + + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done + +} + + +delnode_state_volume() +{ + local delnode=${1}; shift + local mnt=${HA_VOL_MNT} + local longname="" + local dname="" + local dirname="" + + longname=$(hostname) + dname=${longname#$(hostname -s)} + + if [[ ${delnode} == *${dname} ]]; then + dirname=${delnode} + else + dirname=${delnode}${dname} + fi + + rm -rf ${mnt}/nfs-ganesha/${dirname} + + for server in ${HA_SERVERS} ; do + if [[ ${server} != ${dirname} ]]; then + rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname} + rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname} + fi + done +} + + +status() +{ + local scratch=$(mktemp) + local regex_str="^${1}-cluster_ip-1" + local healthy=0 + local index=1 + local nodes + + # change tabs to spaces, strip leading spaces, including any + # new '*' at the beginning of a line introduced in pcs-0.10.x + pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch} + + nodes[0]=${1}; shift + + # make a regex of the configured nodes + # and initalize the nodes array for later + while [[ ${1} ]]; do + + regex_str="${regex_str}|^${1}-cluster_ip-1" + nodes[${index}]=${1} + ((index++)) + shift + done + + # print the nodes that are expected to be online + grep -E "Online:" ${scratch} + + echo + + # print the VIPs and which node they are on + grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4 + + echo + + # check if the VIP and port block/unblock RAs are on the expected nodes + for n in ${nodes[*]}; do + + grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch} + result=$? + ((healthy+=${result})) + done + + grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch} + result=$? + + if [ ${result} -eq 0 ]; then + echo "Cluster HA Status: BAD" + elif [ ${healthy} -eq 0 ]; then + echo "Cluster HA Status: HEALTHY" + else + echo "Cluster HA Status: FAILOVER" + fi + + rm -f ${scratch} +} + +create_ganesha_conf_file() +{ + if [[ "$1" == "yes" ]]; + then + if [ -e $GANESHA_CONF ]; + then + rm -rf $GANESHA_CONF + fi + # The symlink /etc/ganesha/ganesha.conf need to be + # created using ganesha conf file mentioned in the + # shared storage. Every node will only have this + # link and actual file will stored in shared storage, + # so that ganesha conf editing of ganesha conf will + # be easy as well as it become more consistent. + + ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF + else + # Restoring previous file + rm -rf $GANESHA_CONF + cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF + sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF + fi +} + +set_quorum_policy() +{ + local quorum_policy="stop" + local num_servers=${1} + + if [ ${num_servers} -lt 3 ]; then + quorum_policy="ignore" + fi + pcs property set no-quorum-policy=${quorum_policy} + if [ $? -ne 0 ]; then + logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed" + fi +} + +main() +{ + + local cmd=${1}; shift + if [[ ${cmd} == *help ]]; then + usage + exit 0 + fi + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --on + fi + + local osid="" + + osid=$(grep ^ID= /etc/os-release) + eval $(echo ${osid} | grep -F ID=) + osid=$(grep ^VERSION_ID= /etc/os-release) + eval $(echo ${osid} | grep -F VERSION_ID=) + + HA_CONFDIR=${1%/}; shift + local ha_conf=${HA_CONFDIR}/ganesha-ha.conf + local node="" + local vip="" + + # ignore any comment lines + cfgline=$(grep ^HA_NAME= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_NAME=) + cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf}) + eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=) + + case "${cmd}" in + + setup | --setup) + logger "setting up ${HA_NAME}" + + check_cluster_exists ${HA_NAME} + + determine_servers "setup" + + # Fedora 29+ and rhel/centos 8 has PCS-0.10.x + # default is pcs-0.10.x options but check for + # rhel/centos 7 (pcs-0.9.x) and adjust accordingly + if [[ ! ${ID} =~ {rhel,centos} ]]; then + if [[ ${VERSION_ID} == 7.* ]]; then + PCS9OR10_PCS_CNAME_OPTION="--name" + PCS9OR10_PCS_CLONE_OPTION="--clone" + fi + fi + + if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then + + determine_service_manager + + setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}" + + setup_create_resources ${HA_SERVERS} + + setup_finalize_ha + + setup_state_volume ${HA_SERVERS} + + enable_pacemaker ${HA_SERVERS} + + else + + logger "insufficient servers for HA, aborting" + fi + ;; + + teardown | --teardown) + logger "tearing down ${HA_NAME}" + + determine_servers "teardown" + + teardown_resources ${HA_SERVERS} + + teardown_cluster ${HA_NAME} + + cleanup_ganesha_config ${HA_CONFDIR} + ;; + + cleanup | --cleanup) + cleanup_ganesha_config ${HA_CONFDIR} + ;; + + add | --add) + node=${1}; shift + vip=${1}; shift + + logger "adding ${node} with ${vip} to ${HA_NAME}" + + determine_service_manager + + manage_service "start" ${node} + + determine_servers "add" + + pcs cluster node add ${node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node add ${node} failed" + fi + + addnode_create_resources ${node} ${vip} + # Subsequent add-node recreates resources for all the nodes + # that already exist in the cluster. The nodes are picked up + # from the entries in the ganesha-ha.conf file. Adding the + # newly added node to the file so that the resources specfic + # to this node is correctly recreated in the future. + clean_node=${node//[-.]/_} + echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf + + NEW_NODES="$HA_CLUSTER_NODES,${node}" + + sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \ +$HA_CONFDIR/ganesha-ha.conf + + addnode_state_volume ${node} + + # addnode_create_resources() already appended ${node} to + # HA_SERVERS, so only need to increment HA_NUM_SERVERS + # and set quorum policy + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1) + set_quorum_policy ${HA_NUM_SERVERS} + ;; + + delete | --delete) + node=${1}; shift + + logger "deleting ${node} from ${HA_NAME}" + + determine_servers "delete" + + deletenode_delete_resources ${node} + + pcs cluster node remove ${node} + if [ $? -ne 0 ]; then + logger "warning: pcs cluster node remove ${node} failed" + fi + + deletenode_update_haconfig ${node} + + delnode_state_volume ${node} + + determine_service_manager + + manage_service "stop" ${node} + + HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1) + set_quorum_policy ${HA_NUM_SERVERS} + ;; + + status | --status) + determine_servers "status" + + status ${HA_SERVERS} + ;; + + refresh-config | --refresh-config) + VOL=${1} + + determine_servers "refresh-config" + + refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS} + ;; + + setup-ganesha-conf-files | --setup-ganesha-conf-files) + + create_ganesha_conf_file ${1} + ;; + + *) + # setup and teardown are not intended to be used by a + # casual user + usage + logger "Usage: ganesha-ha.sh add|delete|status" + ;; + + esac + + if (selinuxenabled) ;then + semanage boolean -m gluster_use_execmem --off + fi +} + +main $* diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py new file mode 100755 index 00000000000..77af014bab9 --- /dev/null +++ b/extras/ganesha/scripts/generate-epoch.py @@ -0,0 +1,48 @@ +#!/usr/bin/python3 +# +# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# +# Generates unique epoch value on each gluster node to be used by +# nfs-ganesha service on that node. +# +# Configure 'EPOCH_EXEC' option to this script path in +# '/etc/sysconfig/ganesha' file used by nfs-ganesha service. +# +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid + +import time +import binascii + +# Calculate the now() time into a 64-bit integer value +def epoch_now(): + epoch_time = int(time.mktime(time.localtime())) << 32 + return epoch_time + +# Read glusterd UUID and extract first 32-bit of it +def epoch_uuid(): + file_name = '/var/lib/glusterd/glusterd.info' + + for line in open(file_name): + if "UUID" in line: + glusterd_uuid = line.split('=')[1].strip() + + uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-","")) + + epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000 + return epoch_uuid + +# Construct epoch as follows - +# first 32-bit contains the now() time +# rest 32-bit value contains the local glusterd node uuid +epoch = (epoch_now() | epoch_uuid()) +print((str(epoch))) + +exit(0) diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am index e4603ae80b8..09eff308ac4 100644 --- a/extras/geo-rep/Makefile.am +++ b/extras/geo-rep/Makefile.am @@ -1,4 +1,4 @@ -scriptsdir = $(datadir)/glusterfs/scripts +scriptsdir = $(libexecdir)/glusterfs/scripts scripts_SCRIPTS = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \ slave-upgrade.sh schedule_georep.py diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c index b0c5d31ba0b..47dca0413e9 100644 --- a/extras/geo-rep/gsync-sync-gfid.c +++ b/extras/geo-rep/gsync-sync-gfid.c @@ -7,8 +7,8 @@ #include <libgen.h> #include <ctype.h> #include <stdlib.h> -#include "glusterfs.h" -#include "syscall.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/syscall.h> #ifndef UUID_CANONICAL_FORM_LEN #define UUID_CANONICAL_FORM_LEN 36 diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in index f29ae020b8f..48b2b507060 100644 --- a/extras/geo-rep/schedule_georep.py.in +++ b/extras/geo-rep/schedule_georep.py.in @@ -352,7 +352,7 @@ def get_summary(mastervol, slave_url): def touch_mount_root(mastervol): # Create a Mount and Touch the Mount point root, # Hack to make sure some event available after - # setting Checkpoint. Without this their is a chance of + # setting Checkpoint. Without this there is a chance of # Checkpoint never completes. with glustermount("localhost", mastervol) as mnt: execute(["touch", mnt]) @@ -459,8 +459,8 @@ if __name__ == "__main__": description=__doc__) parser.add_argument("mastervol", help="Master Volume Name") parser.add_argument("slave", - help="SLAVEHOST or root@SLAVEHOST " - "or user@SLAVEHOST", + help="Slave hostname " + "(<username>@SLAVEHOST or SLAVEHOST)", metavar="SLAVE") parser.add_argument("slavevol", help="Slave Volume Name") parser.add_argument("--interval", help="Interval in Seconds. " diff --git a/extras/glusterd.vol.in b/extras/glusterd.vol.in index fe413a9b4ab..5d7bad0e4c8 100644 --- a/extras/glusterd.vol.in +++ b/extras/glusterd.vol.in @@ -1,14 +1,15 @@ volume management type mgmt/glusterd option working-directory @GLUSTERD_WORKDIR@ - option transport-type socket,rdma + option transport-type socket option transport.socket.keepalive-time 10 option transport.socket.keepalive-interval 2 option transport.socket.read-fail-log off + option transport.socket.listen-port 24007 option ping-timeout 0 option event-threads 1 # option lock-timer 180 # option transport.address-family inet6 # option base-port 49152 -# option max-port 65535 + option max-port 60999 end-volume diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate index 6fdb8c65aaf..3e7ecf373a1 100644 --- a/extras/glusterfs-georep-logrotate +++ b/extras/glusterfs-georep-logrotate @@ -1,6 +1,12 @@ /var/log/glusterfs/geo-replication/*/*.log { sharedscripts - rotate 52 + weekly + maxsize 10M + minsize 100k + + # 6 months of logs are good enough + rotate 26 + missingok compress delaycompress @@ -15,7 +21,13 @@ /var/log/glusterfs/geo-replication-slaves/*.log { sharedscripts - rotate 52 + weekly + maxsize 10M + minsize 100k + + # 6 months of logs are good enough + rotate 26 + missingok compress delaycompress @@ -30,7 +42,13 @@ /var/log/glusterfs/geo-replication-slaves/*/*.log { sharedscripts - rotate 52 + weekly + maxsize 10M + minsize 100k + + # 6 months of logs are good enough + rotate 26 + missingok compress delaycompress diff --git a/extras/glusterfs-georep-upgrade.py b/extras/glusterfs-georep-upgrade.py new file mode 100755 index 00000000000..634576058d6 --- /dev/null +++ b/extras/glusterfs-georep-upgrade.py @@ -0,0 +1,77 @@ +#!/usr/bin/python3 +""" + +Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com> +This file is part of GlusterFS. + +This file is licensed to you under your choice of the GNU Lesser +General Public License, version 3 or any later version (LGPLv3 or +later), or the GNU General Public License, version 2 (GPLv2), in all +cases as published by the Free Software Foundation. + +""" + +import argparse +import errno +import os, sys +import shutil +from datetime import datetime + +def find_htime_path(brick_path): + dirs = [] + htime_dir = os.path.join(brick_path, '.glusterfs/changelogs/htime') + for file in os.listdir(htime_dir): + if os.path.isfile(os.path.join(htime_dir,file)) and file.startswith("HTIME"): + dirs.append(os.path.join(htime_dir, file)) + else: + raise FileNotFoundError("%s unavailable" % (os.path.join(htime_dir, file))) + return dirs + +def modify_htime_file(brick_path): + htime_file_path_list = find_htime_path(brick_path) + + for htime_file_path in htime_file_path_list: + changelog_path = os.path.join(brick_path, '.glusterfs/changelogs') + temp_htime_path = os.path.join(changelog_path, 'htime/temp_htime_file') + with open(htime_file_path, 'r') as htime_file, open(temp_htime_path, 'w') as temp_htime_file: + #extract epoch times from htime file + paths = htime_file.read().split("\x00") + + for pth in paths: + epoch_no = pth.split(".")[-1] + changelog = os.path.basename(pth) + #convert epoch time to year, month and day + if epoch_no != '': + date=(datetime.fromtimestamp(float(int(epoch_no))).strftime("%Y/%m/%d")) + #update paths in temp htime file + temp_htime_file.write("%s/%s/%s\x00" % (changelog_path, date, changelog)) + #create directory in the format year/month/days + path = os.path.join(changelog_path, date) + + if changelog.startswith("CHANGELOG."): + try: + os.makedirs(path, mode = 0o600); + except OSError as exc: + if exc.errno == errno.EEXIST: + pass + else: + raise + + #copy existing changelogs to new directory structure, delete old changelog files + shutil.copyfile(pth, os.path.join(path, changelog)) + os.remove(pth) + + #rename temp_htime_file with htime file + os.rename(htime_file_path, os.path.join('%s.bak'%htime_file_path)) + os.rename(temp_htime_path, htime_file_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('brick_path', help="This upgrade script, which is to be run on\ + server side, takes brick path as the argument, \ + updates paths inside htime file and alters the directory structure \ + above the changelog files inorder to support new optimised format \ + of the directory structure as per \ + https://review.gluster.org/#/c/glusterfs/+/23733/") + args = parser.parse_args() + modify_htime_file(args.brick_path) diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate index 575c0eee771..6ba6ef18e9f 100644 --- a/extras/glusterfs-logrotate +++ b/extras/glusterfs-logrotate @@ -2,7 +2,12 @@ /var/log/glusterfs/*.log { sharedscripts weekly - rotate 52 + maxsize 10M + minsize 100k + +# 6 months of logs are good enough + rotate 26 + missingok compress delaycompress @@ -17,7 +22,12 @@ /var/log/glusterfs/bricks/*.log { sharedscripts weekly - rotate 52 + maxsize 10M + minsize 100k + +# 6 months of logs are good enough + rotate 26 + missingok compress delaycompress @@ -35,3 +45,24 @@ compress delaycompress } + +# Rotate snapd log +/var/log/glusterfs/snaps/*/*.log { + sharedscripts + weekly + maxsize 10M + minsize 100k + + # 6 months of logs are good enough + rotate 26 + + missingok + compress + delaycompress + notifempty + postrotate + for pid in `ps -aef | grep glusterfs | egrep "snapd" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript +} diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py index 6341d007188..6341d007188 100644..100755 --- a/extras/gnfs-loganalyse.py +++ b/extras/gnfs-loganalyse.py diff --git a/extras/group-db-workload b/extras/group-db-workload index c9caf218268..9334d6fb942 100644 --- a/extras/group-db-workload +++ b/extras/group-db-workload @@ -1,4 +1,4 @@ -performance.open-behind=off +performance.open-behind=on performance.write-behind=off performance.stat-prefetch=off performance.quick-read=off @@ -6,3 +6,7 @@ performance.strict-o-direct=on performance.read-ahead=off performance.io-cache=off performance.readdir-ahead=off +performance.client-io-threads=on +server.event-threads=4 +client.event-threads=4 +performance.read-after-open=yes diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt new file mode 100644 index 00000000000..a960b76c694 --- /dev/null +++ b/extras/group-distributed-virt @@ -0,0 +1,10 @@ +performance.quick-read=off +performance.read-ahead=off +performance.io-cache=off +performance.low-prio-threads=32 +network.remote-dio=enable +features.shard=on +user.cifs=off +client.event-threads=4 +server.event-threads=4 +performance.client-io-threads=on diff --git a/extras/group-gluster-block b/extras/group-gluster-block index 56b406e3641..1e398019e6b 100644 --- a/extras/group-gluster-block +++ b/extras/group-gluster-block @@ -5,6 +5,14 @@ performance.stat-prefetch=off performance.open-behind=off performance.readdir-ahead=off performance.strict-o-direct=on +performance.client-io-threads=on +performance.io-thread-count=32 +performance.high-prio-threads=32 +performance.normal-prio-threads=32 +performance.low-prio-threads=32 +performance.least-prio-threads=4 +client.event-threads=8 +server.event-threads=8 network.remote-dio=disable cluster.eager-lock=enable cluster.quorum-type=auto diff --git a/extras/group-samba b/extras/group-samba new file mode 100644 index 00000000000..eeee6e06031 --- /dev/null +++ b/extras/group-samba @@ -0,0 +1,11 @@ +features.cache-invalidation=on +features.cache-invalidation-timeout=600 +performance.cache-samba-metadata=on +performance.stat-prefetch=on +performance.cache-invalidation=on +performance.md-cache-timeout=600 +network.inode-lru-limit=200000 +performance.nl-cache=on +performance.nl-cache-timeout=600 +performance.readdir-ahead=on +performance.parallel-readdir=on diff --git a/extras/group-virt.example b/extras/group-virt.example index 7e34b7297be..cc37c98a25c 100644 --- a/extras/group-virt.example +++ b/extras/group-virt.example @@ -2,7 +2,8 @@ performance.quick-read=off performance.read-ahead=off performance.io-cache=off performance.low-prio-threads=32 -network.remote-dio=enable +network.remote-dio=disable +performance.strict-o-direct=on cluster.eager-lock=enable cluster.quorum-type=auto cluster.server-quorum-type=server @@ -13,3 +14,11 @@ cluster.shd-wait-qlength=10000 features.shard=on user.cifs=off cluster.choose-local=off +client.event-threads=4 +server.event-threads=4 +performance.client-io-threads=on +network.ping-timeout=20 +server.tcp-user-timeout=20 +server.keepalive-time=10 +server.keepalive-interval=2 +server.keepalive-count=5 diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh index d8707a8fb4d..7d6052315bb 100755 --- a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh +++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh @@ -92,11 +92,13 @@ if [ -f $pub_file ]; then ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}" ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" + ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on" else scp -P ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}" ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null" + ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on" fi fi fi diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am index bfc0c1cf080..9b236df096d 100644 --- a/extras/hook-scripts/add-brick/post/Makefile.am +++ b/extras/hook-scripts/add-brick/post/Makefile.am @@ -1,6 +1,6 @@ -EXTRA_DIST = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh +EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/ if WITH_SERVER -hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S13create-subdir-mounts.sh +hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh endif diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh new file mode 100755 index 00000000000..4a17c993a77 --- /dev/null +++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# +# Install to hooks/<HOOKS_VER>/add-brick/post +# +# Add an SELinux file context for each brick using the glusterd_brick_t type. +# This ensures that the brick is relabeled correctly on an SELinux restart or +# restore. Subsequently, run a restore on the brick path to set the selinux +# labels. +# +### + +PROGNAME="Sselinux" +OPTSPEC="volname:,version:,gd-workdir:,volume-op:" +VOL= + +parse_args () { + ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") + eval set -- "${ARGS}" + + while true; do + case ${1} in + --volname) + shift + VOL=${1} + ;; + --gd-workdir) + shift + GLUSTERD_WORKDIR=$1 + ;; + --version) + shift + ;; + --volume-op) + shift + ;; + *) + shift + break + ;; + esac + shift + done +} + +set_brick_labels() +{ + local volname="${1}" + local fctx + local list=() + + fctx="$(semanage fcontext --list -C)" + + # wait for new brick path to be updated under + # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/ + sleep 5 + + # grab the path for each local brick + brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/" + brickdirs=$( + find "${brickpath}" -type f -exec grep '^path=' {} \; | \ + cut -d= -f 2 | \ + sort -u + ) + + # create a list of bricks for which custom SELinux + # label doesn't exist + for b in ${brickdirs}; do + pattern="${b}(/.*)?" + echo "${fctx}" | grep "^${pattern}\s" >/dev/null + if [[ $? -ne 0 ]]; then + list+=("${pattern}") + fi + done + + # Add a file context for each brick path in the list and associate with the + # glusterd_brick_t SELinux type. + for p in ${list[@]} + do + semanage fcontext --add -t glusterd_brick_t -r s0 "${p}" + done + + # Set the labels for which SELinux label was added above + for b in ${brickdirs} + do + echo "${list[@]}" | grep "${b}" >/dev/null + if [[ $? -eq 0 ]]; then + restorecon -R "${b}" + fi + done +} + +SELINUX_STATE=$(which getenforce && getenforce) +[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 + +parse_args "$@" +[ -z "${VOL}" ] && exit 1 + +set_brick_labels "${VOL}" + +exit 0 diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh index de242d20af4..f9b4b1a57e3 100755 --- a/extras/hook-scripts/create/post/S10selinux-label-brick.sh +++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh @@ -34,18 +34,21 @@ parse_args () { set_brick_labels() { - volname=${1} + volname="${1}" # grab the path for each local brick - brickpath="/var/lib/glusterd/vols/${volname}/bricks/*" - brickdirs=$(grep '^path=' "${brickpath}" | cut -d= -f 2 | sort -u) + brickpath="/var/lib/glusterd/vols/${volname}/bricks/" + brickdirs=$( + find "${brickpath}" -type f -exec grep '^path=' {} \; | \ + cut -d= -f 2 | \ + sort -u + ) for b in ${brickdirs}; do # Add a file context for each brick path and associate with the # glusterd_brick_t SELinux type. - pattern="${b}\(/.*\)?" + pattern="${b}(/.*)?" semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}" - # Set the labels on the new brick path. restorecon -R "${b}" done diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh index 6eba66fbe07..056b52afe76 100755 --- a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh +++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh @@ -15,45 +15,59 @@ OPTSPEC="volname:" VOL= function parse_args () { - ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@") - eval set -- "$ARGS" - - while true; do - case $1 in - --volname) - shift - VOL=$1 - ;; - *) - shift - break - ;; - esac + ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@") + eval set -- "${ARGS}" + + while true; do + case ${1} in + --volname) + shift + VOL=${1} + ;; + *) shift - done + break + ;; + esac + shift + done } function delete_brick_fcontext() { - volname=$1 - - # grab the path for each local brick - brickdirs=$(grep '^path=' /var/lib/glusterd/vols/${volname}/bricks/* | cut -d= -f 2) + local volname=$1 + local fctx + local list=() - for b in $brickdirs - do - # remove the file context associated with the brick path - semanage fcontext --delete $b\(/.*\)? - done + fctx="$(semanage fcontext --list -C)" + # grab the path for each local brick + brickpath="/var/lib/glusterd/vols/${volname}/bricks/" + brickdirs=$(find "${brickpath}" -type f -exec grep '^path=' {} \; | \ + cut -d= -f 2 | sort -u) + for b in ${brickdirs} + do + pattern="${b}(/.*)?" + echo "${fctx}" | grep "^${pattern}\s" >/dev/null + if [[ $? -eq 0 ]]; then + list+=("${pattern}") + fi + done + if [[ ${#list[@]} -gt 0 ]]; then + printf 'fcontext --delete %s\n' "${list[@]}" | semanage -i - + fi + for b in ${brickdirs} + do + restorecon -R "${b}" + done } SELINUX_STATE=$(which getenforce && getenforce) [ "${SELINUX_STATE}" = 'Disabled' ] && exit 0 parse_args "$@" -[ -z "$VOL" ] && exit 1 +[ -z "${VOL}" ] && exit 1 -delete_brick_fcontext $VOL +delete_brick_fcontext "${VOL}" # failure to delete the fcontext is not fatal exit 0 diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh index c596db02381..854f131f6c8 100755 --- a/extras/hook-scripts/set/post/S30samba-set.sh +++ b/extras/hook-scripts/set/post/S30samba-set.sh @@ -89,7 +89,7 @@ function add_samba_share () { STRING+="glusterfs:loglevel = 7\n" STRING+="path = /\n" STRING+="read only = no\n" - STRING+="guest ok = yes\n" + STRING+="kernel share modes = no\n" printf "$STRING" >> ${CONFIGFILE} } @@ -123,9 +123,18 @@ function get_smb () { usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\ cut -d"=" -f2) - if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then - uservalue="disable" + if [ -n "$usercifsvalue" ]; then + if [ "$usercifsvalue" = "disable" ] || [ "$usercifsvalue" = "off" ]; then + uservalue="disable" + fi fi + + if [ -n "$usersmbvalue" ]; then + if [ "$usersmbvalue" = "disable" ] || [ "$usersmbvalue" = "off" ]; then + uservalue="disable" + fi + fi + echo "$uservalue" } diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh index 885ed03ad5b..1f2564b44ff 100755 --- a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh +++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh @@ -79,9 +79,9 @@ done if [ "$option" == "disable" ]; then # Unmount the volume on all the nodes - umount /var/run/gluster/shared_storage - cat /etc/fstab | grep -v "gluster_shared_storage /var/run/gluster/shared_storage/" > /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + umount /run/gluster/shared_storage + cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi if [ "$is_originator" == 1 ]; then @@ -104,8 +104,15 @@ function check_volume_status() echo $status } -mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ - /var/run/gluster/shared_storage" +key=`echo $5 | cut -d '=' -f 1` +val=`echo $5 | cut -d '=' -f 2` +if [ "$key" == "transport.address-family" ]; then + mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \ + $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage" +else + mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \ + /run/gluster/shared_storage" +fi if [ "$option" == "enable" ]; then retry=0; @@ -120,10 +127,10 @@ if [ "$option" == "enable" ]; then status=$(check_volume_status) done # Mount the volume on all the nodes - umount /var/run/gluster/shared_storage - mkdir -p /var/run/gluster/shared_storage + umount /run/gluster/shared_storage + mkdir -p /run/gluster/shared_storage $mount_cmd - cp /etc/fstab /var/run/gluster/fstab.tmp - echo "$local_node_hostname:/gluster_shared_storage /var/run/gluster/shared_storage/ glusterfs defaults 0 0" >> /var/run/gluster/fstab.tmp - mv /var/run/gluster/fstab.tmp /etc/fstab + cp /etc/fstab /run/gluster/fstab.tmp + echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp + mv /run/gluster/fstab.tmp /etc/fstab fi diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am index e32546dc999..792019d3c9f 100644 --- a/extras/hook-scripts/start/post/Makefile.am +++ b/extras/hook-scripts/start/post/Makefile.am @@ -1,4 +1,4 @@ -EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh +EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/ if WITH_SERVER diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh index 5d586ee9fae..cac0cbf1464 100755 --- a/extras/hook-scripts/start/post/S30samba-start.sh +++ b/extras/hook-scripts/start/post/S30samba-start.sh @@ -68,14 +68,14 @@ function parse_args () { } function find_config_info () { - cmdout=`smbd -b | grep smb.conf` - if [ $? -ne 0 ];then + cmdout=$(smbd -b 2> /dev/null) + CONFIGFILE=$(echo "$cmdout" | grep CONFIGFILE | awk '{print $2}') + if [ -z "$CONFIGFILE" ]; then echo "Samba is not installed" exit 1 fi - CONFIGFILE=`echo $cmdout | awk '{print $2}'` - PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'` - LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'` + PIDDIR=$(echo "$cmdout" | grep PIDDIR | awk '{print $2}') + LOGFILEBASE=$(echo "$cmdout" | grep 'LOGFILEBASE' | awk '{print $2}') } function add_samba_share () { @@ -88,12 +88,12 @@ function add_samba_share () { STRING+="glusterfs:loglevel = 7\n" STRING+="path = /\n" STRING+="read only = no\n" - STRING+="guest ok = yes\n" - printf "$STRING" >> ${CONFIGFILE} + STRING+="kernel share modes = no\n" + printf "$STRING" >> "${CONFIGFILE}" } function sighup_samba () { - pid=`cat ${PIDDIR}/smbd.pid` + pid=$(cat "${PIDDIR}/smbd.pid" 2> /dev/null) if [ "x$pid" != "x" ] then kill -HUP "$pid"; @@ -106,28 +106,40 @@ function get_smb () { volname=$1 uservalue= - usercifsvalue=$(grep user.cifs $GLUSTERD_WORKDIR/vols/"$volname"/info |\ + usercifsvalue=$(grep user.cifs "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\ cut -d"=" -f2) - usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\ + usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\ cut -d"=" -f2) - if [ $usercifsvalue = "disable" ] || [ $usersmbvalue = "disable" ]; then - uservalue="disable" + if [ -n "$usercifsvalue" ]; then + if [ "$usercifsvalue" = "enable" ] || [ "$usercifsvalue" = "on" ]; then + uservalue="enable" + fi fi + + if [ -n "$usersmbvalue" ]; then + if [ "$usersmbvalue" = "enable" ] || [ "$usersmbvalue" = "on" ]; then + uservalue="enable" + fi + fi + echo "$uservalue" } parse_args "$@" -if [ "$(get_smb "$VOL")" = "disable" ]; then + +value=$(get_smb "$VOL") + +if [ -z "$value" ] || [ "$value" != "enable" ]; then exit 0 fi #Find smb.conf, smbd pid directory and smbd logfile path find_config_info -if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then - add_samba_share $VOL +if ! grep --quiet "\[gluster-$VOL\]" "${CONFIGFILE}" ; then + add_samba_share "$VOL" else - sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE} + sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' "${CONFIGFILE}" fi sighup_samba diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh new file mode 100755 index 00000000000..7ad6f23ad06 --- /dev/null +++ b/extras/hook-scripts/start/post/S31ganesha-start.sh @@ -0,0 +1,122 @@ +#!/bin/bash +PROGNAME="Sganesha-start" +OPTSPEC="volname:,gd-workdir:" +VOL= +declare -i EXPORT_ID +ganesha_key="ganesha.enable" +GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha" +CONF1="$GANESHA_DIR/ganesha.conf" +GLUSTERD_WORKDIR= + +function parse_args () +{ + ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@) + eval set -- "$ARGS" + + while true; do + case $1 in + --volname) + shift + VOL=$1 + ;; + --gd-workdir) + shift + GLUSTERD_WORKDIR=$1 + ;; + *) + shift + break + ;; + esac + shift + done +} + + + +#This function generates a new export entry as export.volume_name.conf +function write_conf() +{ +echo -e "# WARNING : Using Gluster CLI will overwrite manual +# changes made to this file. To avoid it, edit the +# file, copy it over to all the NFS-Ganesha nodes +# and run ganesha-ha.sh --refresh-config." + +echo "EXPORT{" +echo " Export_Id = 2;" +echo " Path = \"/$VOL\";" +echo " FSAL {" +echo " name = \"GLUSTER\";" +echo " hostname=\"localhost\";" +echo " volume=\"$VOL\";" +echo " }" +echo " Access_type = RW;" +echo " Disable_ACL = true;" +echo " Squash=\"No_root_squash\";" +echo " Pseudo=\"/$VOL\";" +echo " Protocols = \"3\", \"4\" ;" +echo " Transports = \"UDP\",\"TCP\";" +echo " SecType = \"sys\";" +echo "}" +} + +#It adds the export dynamically by sending dbus signals +function export_add() +{ + dbus-send --print-reply --system --dest=org.ganesha.nfsd \ +/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \ +string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)" + +} + +# based on src/scripts/ganeshactl/Ganesha/export_mgr.py +function is_exported() +{ + local volume="${1}" + + dbus-send --type=method_call --print-reply --system \ + --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \ + org.ganesha.nfsd.exportmgr.ShowExports \ + | grep -w -q "/${volume}" + + return $? +} + +# Check the info file (contains the volume options) to see if Ganesha is +# enabled for this volume. +function ganesha_enabled() +{ + local volume="${1}" + local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info" + local enabled="off" + + enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2) + + [ "${enabled}" == "on" ] + + return $? +} + +parse_args $@ + +if ganesha_enabled ${VOL} && ! is_exported ${VOL} +then + if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ] + then + #Remove export entry from nfs-ganesha.conf + sed -i /$VOL.conf/d $CONF1 + write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf + EXPORT_ID=`cat $GANESHA_DIR/.export_added` + EXPORT_ID=EXPORT_ID+1 + echo $EXPORT_ID > $GANESHA_DIR/.export_added + sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \ + $GANESHA_DIR/exports/export.$VOL.conf + echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1 + else + EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\ + awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]') + fi + export_add $VOL +fi + +exit 0 diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh new file mode 100755 index 00000000000..ebc6bf144aa --- /dev/null +++ b/extras/identify-hangs.sh @@ -0,0 +1,53 @@ +#!/bin/bash +function get_statedump_fnames_without_timestamps +{ + ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u +} + +function get_non_uniq_fields +{ + local statedump_fname_prefix=$1 + print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}' +} + +function print_stack_lkowner_unique_in_one_line +{ + local statedump_fname_prefix=$1 + sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - - +} + +function get_stacks_that_appear_in_multiple_statedumps +{ + #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps + #print the stack + local statedump_fname_prefix=$1 + while read -r non_uniq_stack; + do + if [ -z "$printed" ]; + then + printed="1" + fi + echo "$statedump_fname_prefix" "$non_uniq_stack" + done < <(get_non_uniq_fields "$statedump_fname_prefix") +} + +statedumpdir=${1} +if [ -z "$statedumpdir" ]; +then + echo "Usage: $0 <statedump-dir>" + exit 1 +fi + +if [ ! -d "$statedumpdir" ]; +then + echo "$statedumpdir: Is not a directory" + echo "Usage: $0 <statedump-dir>" + exit 1 +fi + +cd "$statedumpdir" || exit 1 +for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps); +do + get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix" +done | column -t +echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2 diff --git a/extras/init.d/Makefile.am b/extras/init.d/Makefile.am index 25f9145f120..8d8cc69571a 100644 --- a/extras/init.d/Makefile.am +++ b/extras/init.d/Makefile.am @@ -1,7 +1,7 @@ -EXTRA_DIST = glusterd-Debian glusterd-FreeBSD glusterd-Redhat glusterd-SuSE \ - glusterd.plist rhel5-load-fuse.modules \ - glustereventsd-FreeBSD glustereventsd-Redhat glustereventsd-Debian +EXTRA_DIST = glusterd-Debian glusterd-FreeBSD glusterd-Redhat \ + glusterd-SuSE glusterd.plist glustereventsd-FreeBSD \ + glustereventsd-Redhat glustereventsd-Debian CLEANFILES = diff --git a/extras/init.d/rhel5-load-fuse.modules b/extras/init.d/rhel5-load-fuse.modules deleted file mode 100755 index ee194db99b8..00000000000 --- a/extras/init.d/rhel5-load-fuse.modules +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# -# fusermount-glusterfs requires the /dev/fuse character device. The fuse module -# provides this and is loaded on demand in newer Linux distributions. -# - -[ -c /dev/fuse ] || /sbin/modprobe fuse diff --git a/extras/mount-shared-storage.sh b/extras/mount-shared-storage.sh index e99233f7e1e..cc40e13c3e3 100755 --- a/extras/mount-shared-storage.sh +++ b/extras/mount-shared-storage.sh @@ -21,7 +21,7 @@ do continue fi - mount -t glusterfs "${arr[0]}" "${arr[1]}" + mount -t glusterfs -o "${arr[3]}" "${arr[0]}" "${arr[1]}" #wait for few seconds sleep 10 diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in index 46dd20b8ced..76cc649e55f 100755 --- a/extras/ocf/volume.in +++ b/extras/ocf/volume.in @@ -6,6 +6,7 @@ # HA resource # # Authors: Florian Haas (hastexo Professional Services GmbH) +# Jiri Lunacek (Hosting90 Systems s.r.o.) # # License: GNU General Public License (GPL) @@ -54,6 +55,14 @@ must have clone ordering enabled. <shortdesc lang="en">gluster executable</shortdesc> <content type="string" default="$OCF_RESKEY_binary_default"/> </parameter> + <parameter name="peer_map"> + <longdesc lang="en"> + Mapping of hostname - peer name in the gluster cluster + in format hostname1:peername1,hostname2:peername2,... + </longdesc> + <shortdesc lang="en">gluster peer map</shortdesc> + <content type="string" default=""/> + </parameter> </parameters> <actions> <action name="start" timeout="20" /> @@ -68,6 +77,10 @@ EOF } +if [ -n "${OCF_RESKEY_peer_map}" ]; then + SHORTHOSTNAME=`echo "${OCF_RESKEY_peer_map}" | egrep -o "$SHORTHOSTNAME\:[^,]+" | awk -F: '{print $2}'` +fi + volume_getdir() { local voldir voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}" @@ -108,6 +121,10 @@ volume_getpids() { volpid_dir=`volume_getpid_dir` bricks=`volume_getbricks` + + if [ -z "$bricks" ]; then + return 1 + fi for brick in ${bricks}; do pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid" @@ -214,6 +231,11 @@ volume_validate_all() { # Test for required binaries check_binary $OCF_RESKEY_binary + + if [ -z "$SHORTHOSTNAME" ]; then + ocf_log err 'Unable to get host in node map' + return $OCF_ERR_CONFIGURED + fi return $OCF_SUCCESS } diff --git a/extras/python/Makefile.am b/extras/python/Makefile.am new file mode 100644 index 00000000000..7d81fa0319b --- /dev/null +++ b/extras/python/Makefile.am @@ -0,0 +1,7 @@ +if HAVE_PYTHON +# Install __init__.py into the Python site-packages area +pypkgdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster +pypkg_PYTHON = __init__.py +endif + +EXTRA_DIST = __init__.py diff --git a/extras/python/__init__.py b/extras/python/__init__.py new file mode 100644 index 00000000000..3ad9513f40e --- /dev/null +++ b/extras/python/__init__.py @@ -0,0 +1,2 @@ +from pkgutil import extend_path +__path__ = extend_path(__path__, __name__) diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py index f03895de114..e62f7fc52a3 100755 --- a/extras/quota/quota_fsck.py +++ b/extras/quota/quota_fsck.py @@ -52,17 +52,17 @@ epilog_msg=''' def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): if log_type == QUOTA_VERBOSE: - print('%-24s %-60s\nxattr_values: %s\n%s\n' % {"Verbose", path, xattr_dict, stbuf}) + print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf)) elif log_type == QUOTA_META_ABSENT: - print('%-24s %-60s\n%s\n' % {"Quota-Meta Absent", path, xattr_dict}) + print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict)) elif log_type == QUOTA_SIZE_MISMATCH: print("mismatch") if dir_size is not None: - print('%24s %60s %12s %12s' % {"Size Mismatch", path, xattr_dict['contri_size'], - dir_size}) + print('%24s %60s %12s %12s' % ("Size Mismatch", path, + xattr_dict, dir_size)) else: - print('%-24s %-60s %-12i %-12i' % {"Size Mismatch", path, xattr_dict['contri_size'], - stbuf.st_size}) + print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict, + stbuf.st_size)) def size_differs_lot(s1, s2): ''' @@ -156,12 +156,10 @@ def get_quota_xattr_brick(dpath): xattr_dict = {} xattr_dict['parents'] = {} - for xattr in pairs: + for xattr in pairs[1:]: + xattr = xattr.decode("utf-8") xattr_key = xattr.split("=")[0] - if re.search("# file:", xattr_key): - # skip the file comment - continue - elif xattr_key is "": + if xattr_key == "": # skip any empty lines continue elif not re.search("quota", xattr_key): diff --git a/extras/rebalance.py b/extras/rebalance.py index 59d7af96d88..37c68ebbb42 100755 --- a/extras/rebalance.py +++ b/extras/rebalance.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 from __future__ import print_function + import atexit import copy import optparse @@ -155,8 +156,8 @@ def get_score (bricks): if __name__ == "__main__": - my_usage = "%prog [options] server volume [directory]" - parser = optparse.OptionParser(usage=my_usage) + my_usage = "%prog [options] server volume [directory]" + parser = optparse.OptionParser(usage=my_usage) parser.add_option("-f", "--free-space", dest="free_space", default=False, action="store_true", help="use free space instead of total space") @@ -166,7 +167,7 @@ if __name__ == "__main__": parser.add_option("-v", "--verbose", dest="verbose", default=False, action="store_true", help="verbose output") - options, args = parser.parse_args() + options, args = parser.parse_args() if len(args) == 3: fix_dir = args[2] diff --git a/extras/snap_scheduler/conf.py.in b/extras/snap_scheduler/conf.py.in index 35838005fc2..6dcca0534a7 100644 --- a/extras/snap_scheduler/conf.py.in +++ b/extras/snap_scheduler/conf.py.in @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # # Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> # This file is part of GlusterFS. diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py index 1c8569dcd85..0e4df77d481 100755 --- a/extras/snap_scheduler/gcron.py +++ b/extras/snap_scheduler/gcron.py @@ -19,10 +19,10 @@ import logging.handlers import fcntl -GCRON_TASKS = "/var/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" +GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks" GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks" GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag" -LOCK_FILE_DIR = "/var/run/gluster/shared_storage/snaps/lock_files/" +LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/" log = logging.getLogger("gcron-logger") start_time = 0.0 @@ -38,7 +38,8 @@ def initLogger(script_name): sh.setFormatter(formatter) process = subprocess.Popen(["gluster", "--print-logdir"], - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, + universal_newlines=True) out, err = process.communicate() if process.returncode == 0: logfile = os.path.join(out.strip(), script_name[:-3]+".log") @@ -105,11 +106,11 @@ def doJob(name, lockFile, jobFunc, volname): else: log.info("Job %s has been processed already", name) fcntl.flock(f, fcntl.LOCK_UN) - except (OSError, IOError) as (errno, strerror): + except (OSError, IOError): log.info("Job %s is being processed by another agent", name) os.close(f) - except (OSError, IOError) as (errno, strerror): - log.debug("Failed to open lock file %s : %s", lockFile, strerror) + except (OSError, IOError) as e: + log.debug("Failed to open lock file %s : %s", lockFile, e) log.error("Failed to process job %s", name) success = False @@ -130,12 +131,12 @@ def main(): f = os.open(GCRON_RELOAD_FLAG, os.O_CREAT | os.O_NONBLOCK, 0o644) os.close(f) - except OSError as (errno, strerror): + except OSError as e: if errno != EEXIST: log.error("Failed to create %s : %s", - GCRON_RELOAD_FLAG, strerror) + GCRON_RELOAD_FLAG, e) output("Failed to create %s. Error: %s" - % (GCRON_RELOAD_FLAG, strerror)) + % (GCRON_RELOAD_FLAG, e)) return if not os.path.exists(GCRON_CROND_TASK): @@ -154,9 +155,9 @@ def main(): if process.returncode != 0: log.error("Failed to touch %s. Error: %s.", GCRON_CROND_TASK, err) - except (IOError, OSError) as (errno, strerror): + except (IOError, OSError) as e: log.error("Failed to touch %s. Error: %s.", - GCRON_CROND_TASK, strerror) + GCRON_CROND_TASK, e) return if os.lstat(GCRON_TASKS).st_mtime > \ os.lstat(GCRON_CROND_TASK).st_mtime: @@ -168,9 +169,9 @@ def main(): if process.returncode != 0: log.error("Failed to touch %s. Error: %s.", GCRON_CROND_TASK, err) - except IOError as (errno, strerror): + except IOError as e: log.error("Failed to touch %s. Error: %s.", - GCRON_CROND_TASK, strerror) + GCRON_CROND_TASK, e) return volname = sys.argv[1] diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py index 09859844efc..e8fcc449a9b 100755 --- a/extras/snap_scheduler/snap_scheduler.py +++ b/extras/snap_scheduler/snap_scheduler.py @@ -67,7 +67,7 @@ except ImportError: SCRIPT_NAME = "snap_scheduler" scheduler_enabled = False log = logging.getLogger(SCRIPT_NAME) -SHARED_STORAGE_DIR="/var/run/gluster/shared_storage" +SHARED_STORAGE_DIR="/run/gluster/shared_storage" GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled" GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled" GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks" @@ -149,7 +149,7 @@ def initLogger(): sh.setFormatter(formatter) process = subprocess.Popen(["gluster", "--print-logdir"], - stdout=subprocess.PIPE) + stdout=subprocess.PIPE, universal_newlines=True) logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log") fh = logging.FileHandler(logfile) @@ -209,9 +209,9 @@ def enable_scheduler(): f = os.open(GCRON_ENABLED, os.O_CREAT | os.O_NONBLOCK, 0o644) os.close(f) - except OSError as (errno, strerror): + except OSError as e: log.error("Failed to open %s. Error: %s.", - GCRON_ENABLED, strerror) + GCRON_ENABLED, e) ret = INTERNAL_ERROR return ret os.symlink(GCRON_ENABLED, GCRON_TASKS) @@ -219,8 +219,9 @@ def enable_scheduler(): log.info("Snapshot scheduling is enabled") output("Snapshot scheduling is enabled") ret = 0 - except OSError as (errno, strerror): - print_str = "Failed to enable snapshot scheduling. Error: "+strerror + except OSError as e: + print_str = ("Failed to enable snapshot scheduling." + "Error: {{}}" + e) log.error(print_str) output(print_str) ret = INTERNAL_ERROR @@ -268,8 +269,9 @@ def disable_scheduler(): log.info("Snapshot scheduling is disabled") output("Snapshot scheduling is disabled") ret = 0 - except OSError as (errno, strerror): - print_str = "Failed to disable snapshot scheduling. Error: "+strerror + except OSError as e: + print_str = ("Failed to disable snapshot scheduling. Error: " + + e) log.error(print_str) output(print_str) ret = INTERNAL_ERROR @@ -308,8 +310,8 @@ def load_tasks_from_file(): tasks[jobname] = schedule+":"+volname f.close() ret = 0 - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", GCRON_ENABLED, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", GCRON_ENABLED, e) ret = INTERNAL_ERROR return ret @@ -322,8 +324,8 @@ def get_current_scheduler(): current_scheduler = f.readline().rstrip('\n') f.close() ret = 0 - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", CURRENT_SCHEDULER, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", CURRENT_SCHEDULER, e) ret = INTERNAL_ERROR return ret @@ -376,8 +378,8 @@ def write_tasks_to_file(): f.flush() os.fsync(f.fileno()) f.close() - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", TMP_FILE, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", TMP_FILE, e) ret = INTERNAL_ERROR return ret @@ -393,8 +395,8 @@ def update_current_scheduler(data): f.flush() os.fsync(f.fileno()) f.close() - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", TMP_FILE, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", TMP_FILE, e) ret = INTERNAL_ERROR return ret @@ -459,9 +461,9 @@ def add_schedules(jobname, schedule, volname): f = os.open(job_lockfile, os.O_CREAT | os.O_NONBLOCK, 0o644) os.close(f) - except OSError as (errno, strerror): + except OSError as e: log.error("Failed to open %s. Error: %s.", - job_lockfile, strerror) + job_lockfile, e) ret = INTERNAL_ERROR return ret log.info("Successfully added snapshot schedule %s" % @@ -489,9 +491,9 @@ def delete_schedules(jobname): job_lockfile = LOCK_FILE_DIR+jobname try: os.remove(job_lockfile) - except OSError as (errno, strerror): + except OSError as e: log.error("Failed to open %s. Error: %s.", - job_lockfile, strerror) + job_lockfile, e) ret = INTERNAL_ERROR return ret log.info("Successfully deleted snapshot schedule %s" @@ -650,8 +652,8 @@ def initialise_scheduler(): f.flush() os.fsync(f.fileno()) f.close() - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", TMP_FILE, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", TMP_FILE, e) ret = INIT_FAILED return ret @@ -661,8 +663,8 @@ def initialise_scheduler(): try: f = open(GCRON_TASKS, "w", 0o644) f.close() - except IOError as (errno, strerror): - log.error("Failed to open %s. Error: %s.", GCRON_TASKS, strerror) + except IOError as e: + log.error("Failed to open %s. Error: %s.", GCRON_TASKS, e) ret = INIT_FAILED return ret @@ -894,11 +896,11 @@ def main(argv): if not os.path.exists(SHARED_STORAGE_DIR+"/snaps/"): try: os.makedirs(SHARED_STORAGE_DIR+"/snaps/") - except OSError as (errno, strerror): + except OSError as e: if errno != EEXIST: - log.error("Failed to create %s : %s", SHARED_STORAGE_DIR+"/snaps/", strerror) + log.error("Failed to create %s : %s", SHARED_STORAGE_DIR+"/snaps/", e) output("Failed to create %s. Error: %s" - % (SHARED_STORAGE_DIR+"/snaps/", strerror)) + % (SHARED_STORAGE_DIR+"/snaps/", e)) return INTERNAL_ERROR if not os.path.exists(GCRON_ENABLED): @@ -908,11 +910,11 @@ def main(argv): if not os.path.exists(LOCK_FILE_DIR): try: os.makedirs(LOCK_FILE_DIR) - except OSError as (errno, strerror): + except OSError as e: if errno != EEXIST: - log.error("Failed to create %s : %s", LOCK_FILE_DIR, strerror) + log.error("Failed to create %s : %s", LOCK_FILE_DIR, e) output("Failed to create %s. Error: %s" - % (LOCK_FILE_DIR, strerror)) + % (LOCK_FILE_DIR, e)) return INTERNAL_ERROR try: @@ -921,15 +923,15 @@ def main(argv): fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) ret = perform_operation(args) fcntl.flock(f, fcntl.LOCK_UN) - except IOError as (errno, strerror): + except IOError: log.info("%s is being processed by another agent.", LOCK_FILE) output("Another snap_scheduler command is running. " "Please try again after some time.") return ANOTHER_TRANSACTION_IN_PROGRESS os.close(f) - except OSError as (errno, strerror): - log.error("Failed to open %s : %s", LOCK_FILE, strerror) - output("Failed to open %s. Error: %s" % (LOCK_FILE, strerror)) + except OSError as e: + log.error("Failed to open %s : %s", LOCK_FILE, e) + output("Failed to open %s. Error: %s" % (LOCK_FILE, e)) return INTERNAL_ERROR return ret diff --git a/extras/statedumpparse.rb b/extras/statedumpparse.rb new file mode 100755 index 00000000000..1aff43377db --- /dev/null +++ b/extras/statedumpparse.rb @@ -0,0 +1,208 @@ +#!/usr/bin/env ruby + +require 'time' +require 'optparse' + +unless Array.instance_methods.include? :to_h + class Array + def to_h + h = {} + each { |k,v| h[k]=v } + h + end + end +end + +# statedump.c:gf_proc_dump_mempool_info uses a five-dash record separator, +# client.c:client_fd_lk_ctx_dump uses a six-dash record separator. +ARRSEP = /^(-{5,6}=-{5,6})?$/ +HEAD = /^\[(.*)\]$/ +INPUT_FORMATS = %w[statedump json] + +format = 'json' +input_format = 'statedump' +tz = '+0000' +memstat_select,memstat_reject = //,/\Z./ +OptionParser.new do |op| + op.banner << " [<] <STATEDUMP>" + op.on("-f", "--format=F", "json/yaml/memstat(-[plain|human|json])") { |s| format = s } + op.on("--input-format=F", INPUT_FORMATS.join(?/)) { |s| input_format = s } + op.on("--timezone=T", + "time zone to apply to zoneless timestamps [default UTC]") { |s| tz = s } + op.on("--memstat-select=RX", "memstat: select memory types matching RX") { |s| + memstat_select = Regexp.new s + } + op.on("--memstat-reject=RX", "memstat: reject memory types matching RX") { |s| + memstat_reject = Regexp.new s + } +end.parse! + + +if format =~ /\Amemstat(?:-(.*))?/ + memstat_type = $1 || 'plain' + unless %w[plain human json].include? memstat_type + raise "unknown memstat type #{memstat_type.dump}" + end + format = 'memstat' +end + +repr, logsep = case format +when 'yaml' + require 'yaml' + + [proc { |e| e.to_yaml }, "\n"] +when 'json', 'memstat' + require 'json' + + [proc { |e| e.to_json }, " "] +else + raise "unkonwn format '#{format}'" +end +formatter = proc { |e| puts repr.call(e) } + +INPUT_FORMATS.include? input_format or raise "unkwown input format '#{input_format}'" + +dumpinfo = {} + +# parse a statedump entry +elem_cbk = proc { |s,&cbk| + arraylike = false + s.grep(/\S/).empty? and next + head = nil + while s.last =~ /^\s*$/ + s.pop + end + body = catch { |misc2| + s[0] =~ HEAD ? (head = $1) : (throw misc2) + body = [[]] + s[1..-1].each { |l| + if l =~ ARRSEP + arraylike = true + body << [] + next + end + body.last << l + } + + body.reject(&:empty?).map { |e| + ea = e.map { |l| + k,v = l.split("=",2) + m = /\A(0|-?[1-9]\d*)(\.\d+)?\Z/.match v + [k, m ? (m[2] ? Float(v) : Integer(v)) : v] + } + begin + ea.to_h + rescue + throw misc2 + end + } + } + + if body + cbk.call [head, arraylike ? body : (body.empty? ? {} : body[0])] + else + STDERR.puts ["WARNING: failed to parse record:", repr.call(s)].join(logsep) + end +} + +# aggregator routine +aggr = case format +when 'memstat' + meminfo = {} + # commit memory-related entries to meminfo + proc { |k,r| + case k + when /memusage/ + (meminfo["GF_MALLOC"]||={})[k] ||= r["size"] if k =~ memstat_select and k !~ memstat_reject + when "mempool" + r.each {|e| + kk = "mempool:#{e['pool-name']}" + (meminfo["mempool"]||={})[kk] ||= e["size"] if kk =~ memstat_select and kk !~ memstat_reject + } + end + } +else + # just format data, don't actually aggregate anything + proc { |pair| formatter.call pair } +end + +# processing the data +case input_format +when 'statedump' + acc = [] + $<.each { |l| + l = l.strip + if l =~ /^(DUMP-(?:START|END)-TIME):\s+(.*)/ + dumpinfo["_meta"]||={} + (dumpinfo["_meta"]["date"]||={})[$1] = Time.parse([$2, tz].join " ") + next + end + + if l =~ HEAD + elem_cbk.call(acc, &aggr) + acc = [l] + next + end + + acc << l + } + elem_cbk.call(acc, &aggr) +when 'json' + $<.each { |l| + r = JSON.load l + case r + when Array + aggr[r] + when Hash + dumpinfo.merge! r + end + } +end + +# final actions: output aggregated data +case format +when 'memstat' + ma = meminfo.values.map(&:to_a).inject(:+) + totals = meminfo.map { |coll,h| [coll, h.values.inject(:+)] }.to_h + tt = ma.transpose[1].inject(:+) + + summary_sep,showm = case memstat_type + when 'json' + ["", proc { |k,v| puts({type: k, value: v}.to_json) }] + when 'plain', 'human' + # human-friendly number representation + hr = proc { |n| + qa = %w[B kB MB GB] + q = ((1...qa.size).find {|i| n < (1 << i*10)} || qa.size) - 1 + "%.2f%s" % [n.to_f / (1 << q*10), qa[q]] + } + + templ = "%{val} %{key}" + tft = proc { |t| t } + nft = if memstat_type == 'human' + nw = [ma.transpose[1], totals.values, tt].flatten.map{|n| hr[n].size}.max + proc { |n| + hn = hr[n] + " " * (nw - hn.size) + hn + } + else + nw = tt.to_s.size + proc { |n| "%#{nw}d" % n } + end + ## Alternative template, key first: + # templ = "%{key} %{val}" + # tw = ma.transpose[0].map(&:size).max + # tft = proc { |t| t + " " * [tw - t.size, 0].max } + # nft = (memstat_type == 'human') ? hr : proc { |n| n } + ["\n", proc { |k,v| puts templ % {key: tft[k], val: nft[v]} }] + else + raise 'this should be impossible' + end + + ma.sort_by { |k,v| v }.each(&showm) + print summary_sep + totals.each { |coll,t| showm.call "Total #{coll}", t } + showm.call "TOTAL", tt +else + formatter.call dumpinfo +end diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am index d4a3d0bf878..61446a9b84a 100644 --- a/extras/systemd/Makefile.am +++ b/extras/systemd/Makefile.am @@ -1,10 +1,14 @@ -CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service -EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in +CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service +EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in + +if USE_SYSTEMD +systemd_DATA = gluster-ta-volume.service +endif if WITH_SERVER if USE_SYSTEMD # systemddir is already defined through configure.ac -systemd_DATA = glusterd.service glusterfssharedstorage.service +systemd_DATA += glusterd.service glusterfssharedstorage.service if BUILD_EVENTS systemd_DATA += glustereventsd.service diff --git a/extras/systemd/gluster-ta-volume.service.in b/extras/systemd/gluster-ta-volume.service.in new file mode 100644 index 00000000000..2802bca05bf --- /dev/null +++ b/extras/systemd/gluster-ta-volume.service.in @@ -0,0 +1,13 @@ +[Unit] +Description=GlusterFS, Thin-arbiter process to maintain quorum for replica volume +After=network.target + +[Service] +Environment="LOG_LEVEL=WARNING" +ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 -LWARNING +Restart=always +KillMode=process +SuccessExitStatus=15 + +[Install] +WantedBy=multi-user.target diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in index 78f8e40b0e4..abb0d82911f 100644 --- a/extras/systemd/glusterd.service.in +++ b/extras/systemd/glusterd.service.in @@ -1,7 +1,10 @@ [Unit] Description=GlusterFS, a clustered file-system server -Requires=rpcbind.service -After=network.target rpcbind.service +Documentation=man:glusterd(8) +StartLimitBurst=6 +StartLimitIntervalSec=3600 +Requires=@RPCBIND_SERVICE@ +After=network.target @RPCBIND_SERVICE@ Before=network-online.target [Service] @@ -9,10 +12,15 @@ Type=forking PIDFile=@localstatedir@/run/glusterd.pid LimitNOFILE=65536 Environment="LOG_LEVEL=INFO" -EnvironmentFile=-@sysconfdir@/sysconfig/glusterd +EnvironmentFile=-@SYSCONF_DIR@/sysconfig/glusterd ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS KillMode=process +TimeoutSec=300 SuccessExitStatus=15 +Restart=on-abnormal +RestartSec=60 +StartLimitBurst=6 +StartLimitInterval=3600 [Install] WantedBy=multi-user.target diff --git a/extras/systemd/glustereventsd.service.in b/extras/systemd/glustereventsd.service.in index 4bfcf42f386..f80b78199f6 100644 --- a/extras/systemd/glustereventsd.service.in +++ b/extras/systemd/glustereventsd.service.in @@ -1,6 +1,8 @@ [Unit] Description=Gluster Events Notifier -After=syslog.target network.target +After=network.target +Documentation=man:glustereventsd(8) + [Service] Environment=PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES_EXPANDED@:$PYTHONPATH diff --git a/extras/thin-arbiter/gluster-ta-volume.service b/extras/thin-arbiter/gluster-ta-volume.service deleted file mode 100644 index 19be1757555..00000000000 --- a/extras/thin-arbiter/gluster-ta-volume.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description = Thin-arbiter process to maintain quorum for replica volume -After = network.target - -[Service] -Environment = "LOG_LEVEL=WARNING" -ExecStart = /usr/local/sbin/glusterfsd -N --volfile-id ta-vol -f /var/lib/glusterd/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-vol-server.transport.socket.listen-port=24007 -Restart = always -KillMode=process -SuccessExitStatus=15 - -[Install] -WantedBy = multi-user.target diff --git a/extras/thin-arbiter/setup-thin-arbiter.sh b/extras/thin-arbiter/setup-thin-arbiter.sh index 32fe7fc0cdd..0681b30ef3f 100755 --- a/extras/thin-arbiter/setup-thin-arbiter.sh +++ b/extras/thin-arbiter/setup-thin-arbiter.sh @@ -1,17 +1,62 @@ -#! /bin/bash +#!/bin/bash +# Copyright (c) 2018-2019 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + + +# This tool has been developed to setup thin-arbiter process on a node. +# Seting up a thin arbiter process involves following files - +# 1 - thin-arbiter.vol +# Thin-arbiter (TA) process will use the graph in this file to load the +# required translators. +# 2 - gluster-ta-volume.service (generated by gluster-ta-volume.service.in) +# TA process would be running as systemd service. +# +# TA process uses a location to save TA id files for every subvolume. +# This location can be taken as input from user. Once provided and the +# TA process is started on a node, it can not be changed using this +# script or by any other mean. The same location should be used in +# the gluster CLI when creating thin-arbiter volumes. + +MYPATH=`dirname $0` volloc="/var/lib/glusterd/thin-arbiter" mkdir -p $volloc -cp -f extras/thin-arbiter/thin-arbiter.vol $volloc/thin-arbiter.vol +if [ -f /etc/glusterfs/thin-arbiter.vol ]; then + volfile=/etc/glusterfs/thin-arbiter.vol +else + volfile=$MYPATH/thin-arbiter.vol +fi + tafile="$volloc/thin-arbiter.vol" + +help () { + echo " " + echo ' This tool helps to setup thin-arbiter (TA) process on a node. + TA process uses a location to save TA id files for every subvolume. + This location can be taken as input from user. Once provided and the + TA process is started on a node, it can not be changed using this script + or by any other mean. The same location should be used in gluster CLI + when creating thin-arbiter volumes. + + usage: setup-thin-arbiter.sh [-s] [-h] + options: + -s - Setup thin-arbiter file path and start process + -h - Show this help message and exit +' +} + volfile_set_brick_path () { while read -r line do dir=`echo "$line" | cut -d' ' -f 2` - if [ "$dir" = "directory" ] - then + if [ "$dir" = "directory" ]; then bpath=`echo "$line" | cut -d' ' -f 3` sed -i -- 's?'$bpath'?'$1'?g' $tafile return @@ -19,50 +64,121 @@ volfile_set_brick_path () { done < $tafile } -tapath="/mnt/thin-arbiter" -echo "Volume file to be used to start thin-arbiter process is :" -echo "$tafile" -echo " " -echo "Default thin-arbiter path is : $tapath" -echo -n "Do you want to change path for thin arbiter volumes. (y/N): " -echo " " -read moveon - -if [ "${moveon}" = 'N' ] || [ "${moveon}" = 'n' ]; then - echo "Default brick path, $tapath, has been set" - echo "for all thin arbiter volumes using this node" +check_ta_proc () { + pro=`ps aux | grep thin-arbiter.vol | grep "volfile-id"` + if [ "${pro}" = '' ]; then + echo "" + else + curr_loc=`cat $volloc/thin-arbiter.vol | grep option | grep directory` + loc=`echo "${curr_loc##* }"` + echo "******************************************************" + echo "Error:" + echo "Thin-arbiter process is running with thin-arbiter path = $loc" + echo "Can not change TA path on this host now." + echo "$pro" + echo "******************************************************" + exit 1 + fi +} + +getpath () { + check_ta_proc + echo "******************************************************" + echo "User will be required to enter a path/folder for arbiter volume." + echo "Please note that this path will be used for ALL VOLUMES using this" + echo "node to host thin-arbiter. After setting, if a volume" + echo "has been created using this host and path then path for" + echo "thin-arbiter can not be changed " + echo "******************************************************" echo " " -else - echo -n "Enter brick path for thin arbiter volumes: " - read tapath - echo "Entered brick path : $tapath " - echo "Please note that this brick path will be used for ALL" - echo "VOLUMES using this node to host thin-arbiter brick" + while true; + do + echo -n "Enter brick path for thin arbiter volumes: " + echo " " + read tapath + if [ "${tapath}" = '' ]; then + echo "Please enter valid path" + continue + else + echo "Entered brick path : $tapath " + echo "Please note that this brick path will be used for ALL" + echo "VOLUMES using this node to host thin-arbiter brick" + echo -n "Want to continue? (y/N): " + echo " " + read cont + + if [ "${cont}" = 'N' ] || [ "${cont}" = 'n' ]; then + exit 0 + else + break + fi + fi + done +} + +setup () { + getpath + mkdir -p $tapath/.glusterfs/indices + if [ -d $tapath/.glusterfs/indices ]; then + echo " " + else + echo "Could not create $tapath/.glusterfs/indices directory, check provided ta path." + exit 1 + fi + + cp -f --backup --suffix=_old $volfile $volloc/thin-arbiter.vol + volfile_set_brick_path "$tapath" + + echo "Directory path to be used for thin-arbiter volume is: $tapath" echo " " -fi + echo "========================================================" -mkdir -p $tapath/.glusterfs/indices -volfile_set_brick_path "$tapath" + if [ -f /usr/lib/systemd/system/gluster-ta-volume.service ]; then + echo "Starting thin-arbiter process" + else + cp $MYPATH/../systemd/gluster-ta-volume.service /etc/systemd/system/ + echo "Starting thin-arbiter process" + chmod 0644 /etc/systemd/system/gluster-ta-volume.service + fi -echo "Directory path to be used for thin-arbiter volume is: $tapath" -echo " " + systemctl daemon-reload + systemctl enable gluster-ta-volume + systemctl stop gluster-ta-volume + systemctl start gluster-ta-volume -echo "========================================================" + if [ $? == 0 ]; then + echo "thin-arbiter process has been setup and running" + else + echo "Failed to setup thin arbiter" + exit 1 + fi -echo "Installing and starting service for thin-arbiter process" +} -cp extras/thin-arbiter/gluster-ta-volume.service /etc/systemd/system/ +main() +{ -chmod 0777 /etc/systemd/system/gluster-ta-volume.service + if [ "$#" -ne 1 ]; then + help + exit 0 + fi -systemctl daemon-reload -systemctl enable gluster-ta-volume -systemctl stop gluster-ta-volume -systemctl start gluster-ta-volume + while getopts "sh" opt; do + case $opt in + h) + help + exit 0 + ;; + s) + setup + exit 0 + ;; + *) + help + exit 0 + ;; + esac + done +} -if [ $? == 0 ] -then - echo "thin-arbiter process is setup and running" -else - echo "Failed to setup thin arbiter" -fi +main "$@" diff --git a/extras/thin-arbiter/thin-arbiter.vol b/extras/thin-arbiter/thin-arbiter.vol index 244a4caf485..c76babc7b3c 100644 --- a/extras/thin-arbiter/thin-arbiter.vol +++ b/extras/thin-arbiter/thin-arbiter.vol @@ -33,11 +33,10 @@ volume ta-index subvolumes ta-io-threads end-volume -volume ta-io-stats +volume /mnt/thin-arbiter type debug/io-stats option count-fop-hits off option latency-measurement off - option log-level WARNING option unique-id /mnt/thin-arbiter subvolumes ta-index end-volume @@ -54,5 +53,5 @@ volume ta-server option auth-path /mnt/thin-arbiter option transport.address-family inet option transport-type tcp - subvolumes ta-io-stats + subvolumes /mnt/thin-arbiter end-volume diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map index 315355b08b8..7cd2bbd605b 100644 --- a/extras/who-wrote-glusterfs/gitdm.domain-map +++ b/extras/who-wrote-glusterfs/gitdm.domain-map @@ -4,6 +4,7 @@ active.by ActiveCloud appeartv.com Appear TV cern.ch CERN +cmss.chinamobile.com China Mobile(Suzhou) Software Technology datalab.es DataLab S.L. fb.com Facebook fedoraproject.org Fedora Project |
