From 02979e4663ea9e8a925f785f206d1dba02f253c4 Mon Sep 17 00:00:00 2001 From: Aravinda VK Date: Fri, 2 Dec 2016 15:04:49 +0530 Subject: geo-rep: Rsync tunables for performance improvements Flag: --ignore-missing-args This Rsync flag reduces sync failures if the source file is unlinked but present in --files-from list. This reduces Rsync retries in Geo-rep and improves the performance Flag: --existing Rsync in Geo-rep never creates target files. Using RPC Geo-rep creates entry in Slave and rsync --inplace used to prevent creating temporary file and rename.(To avoid different GFID in Slave). If the entry is missing in Slave then Geo-rep Rsync gets Permission denied errors when it tries to create file with name as GFID inside .gfid dir.(Geo-rep rsync syncs data using GFIDS with aux-gfid-mount) To disable these flags, gluster volume geo-replication config \ rsync-opt-ignore-missing-args false gluster volume geo-replication config \ rsync-opt-existing false Thanks Kotresh for finding these awesome tunables. BUG: 1400924 Change-Id: I6a84fb86a589bf6edc8dfd1086456a84b05a64fc Signed-off-by: Aravinda VK Reviewed-on: https://review.gluster.org/16010 Smoke: Gluster Build System Reviewed-by: Amar Tumballi NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Kotresh HR --- geo-replication/syncdaemon/gsyncd.py | 2 ++ geo-replication/syncdaemon/resource.py | 12 +++++++++++- geo-replication/syncdaemon/syncdutils.py | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'geo-replication') diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index ac39a79128b..45b5ff546fa 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -298,6 +298,8 @@ def main_i(): op.add_option('--rsync-command', metavar='CMD', default='rsync') op.add_option('--rsync-options', metavar='OPTS', default='') op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress') + op.add_option('--rsync-opt-ignore-missing-args', default="true") + op.add_option('--rsync-opt-existing', default="true") op.add_option('--timeout', metavar='SEC', type=int, default=120) op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP) diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 2fb6d5f1cdf..9cbe92a93c5 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -37,7 +37,7 @@ from syncdutils import GsyncdError, select, privileged, boolify, funcode from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat from syncdutils import NoStimeAvailable, PartialHistoryAvailable from syncdutils import ChangelogException, ChangelogHistoryNotAvailable -from syncdutils import get_changelog_log_level +from syncdutils import get_changelog_log_level, get_rsync_version from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION from gsyncdstatus import GeorepStatus from syncdutils import get_master_and_slave_data_from_args @@ -1050,12 +1050,22 @@ class SlaveRemote(object): if not files: raise GsyncdError("no files to sync") logging.debug("files: " + ", ".join(files)) + + extra_rsync_flags = [] + # Performance flag, --ignore-missing-args, if rsync version is + # greater than 3.1.0 then include this flag. + if boolify(gconf.rsync_opt_ignore_missing_args) and \ + get_rsync_version(gconf.rsync_command) >= "3.1.0": + extra_rsync_flags = ["--ignore-missing-args"] + argv = gconf.rsync_command.split() + \ ['-aR0', '--inplace', '--files-from=-', '--super', '--stats', '--numeric-ids', '--no-implied-dirs'] + \ + (boolify(gconf.rsync_opt_existing) and ['--existing'] or []) + \ gconf.rsync_options.split() + \ (boolify(gconf.sync_xattrs) and ['--xattrs'] or []) + \ (boolify(gconf.sync_acls) and ['--acls'] or []) + \ + extra_rsync_flags + \ ['.'] + list(args) log_rsync_performance = boolify(gconf.configinterface.get_realtime( diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index c6acfbb0500..321e0d32ccc 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -23,6 +23,7 @@ from errno import EINTR, ENOENT, EPERM, ESTALE, EBUSY, errorcode from signal import signal, SIGTERM import select as oselect from os import waitpid as owaitpid +import subprocess from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE sys.path.insert(1, GLUSTERFS_LIBEXECDIR) @@ -68,6 +69,7 @@ GF_OP_RETRIES = 10 CHANGELOG_AGENT_SERVER_VERSION = 1.0 CHANGELOG_AGENT_CLIENT_VERSION = 1.0 NodeID = None +rsync_version = None def escape(s): @@ -554,3 +556,19 @@ def get_master_and_slave_data_from_args(args): slave_data = arg.replace("ssh://", "") return (master_name, slave_data) + + +def get_rsync_version(rsync_cmd): + global rsync_version + if rsync_version is not None: + return rsync_version + + rsync_version = "0" + p = subprocess.Popen([rsync_cmd, "--version"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = p.communicate() + if p.returncode == 0: + rsync_version = out.split(" ", 4)[3] + + return rsync_version -- cgit