diff options
author | Aravinda VK <avishwan@redhat.com> | 2016-12-02 15:04:49 +0530 |
---|---|---|
committer | Aravinda VK <avishwan@redhat.com> | 2017-05-23 11:15:47 +0000 |
commit | 02979e4663ea9e8a925f785f206d1dba02f253c4 (patch) | |
tree | d155a570877ed1bffa056776470a971ef18af545 /geo-replication/syncdaemon | |
parent | acf8cc3a2283d209337ba9f5da6b03396daac359 (diff) |
geo-rep: Rsync tunables for performance improvements
Flag: --ignore-missing-args
This Rsync flag reduces sync failures if the source file is
unlinked but present in --files-from list. This reduces
Rsync retries in Geo-rep and improves the performance
Flag: --existing
Rsync in Geo-rep never creates target files. Using RPC Geo-rep creates
entry in Slave and rsync --inplace used to prevent creating temporary file
and rename.(To avoid different GFID in Slave). If the entry is missing in
Slave then Geo-rep Rsync gets Permission denied errors when it tries to
create file with name as GFID inside .gfid dir.(Geo-rep rsync syncs data
using GFIDS with aux-gfid-mount)
To disable these flags,
gluster volume geo-replication <session> config \
rsync-opt-ignore-missing-args false
gluster volume geo-replication <session> config \
rsync-opt-existing false
Thanks Kotresh for finding these awesome tunables.
BUG: 1400924
Change-Id: I6a84fb86a589bf6edc8dfd1086456a84b05a64fc
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://review.gluster.org/16010
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon')
-rw-r--r-- | geo-replication/syncdaemon/gsyncd.py | 2 | ||||
-rw-r--r-- | geo-replication/syncdaemon/resource.py | 12 | ||||
-rw-r--r-- | geo-replication/syncdaemon/syncdutils.py | 18 |
3 files changed, 31 insertions, 1 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index ac39a79128b..45b5ff546fa 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -298,6 +298,8 @@ def main_i(): op.add_option('--rsync-command', metavar='CMD', default='rsync') op.add_option('--rsync-options', metavar='OPTS', default='') op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress') + op.add_option('--rsync-opt-ignore-missing-args', default="true") + op.add_option('--rsync-opt-existing', default="true") op.add_option('--timeout', metavar='SEC', type=int, default=120) op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP) diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 2fb6d5f1cdf..9cbe92a93c5 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -37,7 +37,7 @@ from syncdutils import GsyncdError, select, privileged, boolify, funcode from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat from syncdutils import NoStimeAvailable, PartialHistoryAvailable from syncdutils import ChangelogException, ChangelogHistoryNotAvailable -from syncdutils import get_changelog_log_level +from syncdutils import get_changelog_log_level, get_rsync_version from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION from gsyncdstatus import GeorepStatus from syncdutils import get_master_and_slave_data_from_args @@ -1050,12 +1050,22 @@ class SlaveRemote(object): if not files: raise GsyncdError("no files to sync") logging.debug("files: " + ", ".join(files)) + + extra_rsync_flags = [] + # Performance flag, --ignore-missing-args, if rsync version is + # greater than 3.1.0 then include this flag. + if boolify(gconf.rsync_opt_ignore_missing_args) and \ + get_rsync_version(gconf.rsync_command) >= "3.1.0": + extra_rsync_flags = ["--ignore-missing-args"] + argv = gconf.rsync_command.split() + \ ['-aR0', '--inplace', '--files-from=-', '--super', '--stats', '--numeric-ids', '--no-implied-dirs'] + \ + (boolify(gconf.rsync_opt_existing) and ['--existing'] or []) + \ gconf.rsync_options.split() + \ (boolify(gconf.sync_xattrs) and ['--xattrs'] or []) + \ (boolify(gconf.sync_acls) and ['--acls'] or []) + \ + extra_rsync_flags + \ ['.'] + list(args) log_rsync_performance = boolify(gconf.configinterface.get_realtime( diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index c6acfbb0500..321e0d32ccc 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -23,6 +23,7 @@ from errno import EINTR, ENOENT, EPERM, ESTALE, EBUSY, errorcode from signal import signal, SIGTERM import select as oselect from os import waitpid as owaitpid +import subprocess from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE sys.path.insert(1, GLUSTERFS_LIBEXECDIR) @@ -68,6 +69,7 @@ GF_OP_RETRIES = 10 CHANGELOG_AGENT_SERVER_VERSION = 1.0 CHANGELOG_AGENT_CLIENT_VERSION = 1.0 NodeID = None +rsync_version = None def escape(s): @@ -554,3 +556,19 @@ def get_master_and_slave_data_from_args(args): slave_data = arg.replace("ssh://", "") return (master_name, slave_data) + + +def get_rsync_version(rsync_cmd): + global rsync_version + if rsync_version is not None: + return rsync_version + + rsync_version = "0" + p = subprocess.Popen([rsync_cmd, "--version"], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE) + out, err = p.communicate() + if p.returncode == 0: + rsync_version = out.split(" ", 4)[3] + + return rsync_version |