summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2016-12-02 15:04:49 +0530
committerAravinda VK <avishwan@redhat.com>2017-05-23 11:15:47 +0000
commit02979e4663ea9e8a925f785f206d1dba02f253c4 (patch)
treed155a570877ed1bffa056776470a971ef18af545 /geo-replication/syncdaemon
parentacf8cc3a2283d209337ba9f5da6b03396daac359 (diff)
geo-rep: Rsync tunables for performance improvements
Flag: --ignore-missing-args This Rsync flag reduces sync failures if the source file is unlinked but present in --files-from list. This reduces Rsync retries in Geo-rep and improves the performance Flag: --existing Rsync in Geo-rep never creates target files. Using RPC Geo-rep creates entry in Slave and rsync --inplace used to prevent creating temporary file and rename.(To avoid different GFID in Slave). If the entry is missing in Slave then Geo-rep Rsync gets Permission denied errors when it tries to create file with name as GFID inside .gfid dir.(Geo-rep rsync syncs data using GFIDS with aux-gfid-mount) To disable these flags, gluster volume geo-replication <session> config \ rsync-opt-ignore-missing-args false gluster volume geo-replication <session> config \ rsync-opt-existing false Thanks Kotresh for finding these awesome tunables. BUG: 1400924 Change-Id: I6a84fb86a589bf6edc8dfd1086456a84b05a64fc Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: https://review.gluster.org/16010 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Amar Tumballi <amarts@redhat.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Kotresh HR <khiremat@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon')
-rw-r--r--geo-replication/syncdaemon/gsyncd.py2
-rw-r--r--geo-replication/syncdaemon/resource.py12
-rw-r--r--geo-replication/syncdaemon/syncdutils.py18
3 files changed, 31 insertions, 1 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index ac39a79128b..45b5ff546fa 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -298,6 +298,8 @@ def main_i():
op.add_option('--rsync-command', metavar='CMD', default='rsync')
op.add_option('--rsync-options', metavar='OPTS', default='')
op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress')
+ op.add_option('--rsync-opt-ignore-missing-args', default="true")
+ op.add_option('--rsync-opt-existing', default="true")
op.add_option('--timeout', metavar='SEC', type=int, default=120)
op.add_option('--connection-timeout', metavar='SEC',
type=int, default=60, help=SUPPRESS_HELP)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index 2fb6d5f1cdf..9cbe92a93c5 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -37,7 +37,7 @@ from syncdutils import GsyncdError, select, privileged, boolify, funcode
from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat
from syncdutils import NoStimeAvailable, PartialHistoryAvailable
from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
-from syncdutils import get_changelog_log_level
+from syncdutils import get_changelog_log_level, get_rsync_version
from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
from gsyncdstatus import GeorepStatus
from syncdutils import get_master_and_slave_data_from_args
@@ -1050,12 +1050,22 @@ class SlaveRemote(object):
if not files:
raise GsyncdError("no files to sync")
logging.debug("files: " + ", ".join(files))
+
+ extra_rsync_flags = []
+ # Performance flag, --ignore-missing-args, if rsync version is
+ # greater than 3.1.0 then include this flag.
+ if boolify(gconf.rsync_opt_ignore_missing_args) and \
+ get_rsync_version(gconf.rsync_command) >= "3.1.0":
+ extra_rsync_flags = ["--ignore-missing-args"]
+
argv = gconf.rsync_command.split() + \
['-aR0', '--inplace', '--files-from=-', '--super',
'--stats', '--numeric-ids', '--no-implied-dirs'] + \
+ (boolify(gconf.rsync_opt_existing) and ['--existing'] or []) + \
gconf.rsync_options.split() + \
(boolify(gconf.sync_xattrs) and ['--xattrs'] or []) + \
(boolify(gconf.sync_acls) and ['--acls'] or []) + \
+ extra_rsync_flags + \
['.'] + list(args)
log_rsync_performance = boolify(gconf.configinterface.get_realtime(
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index c6acfbb0500..321e0d32ccc 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -23,6 +23,7 @@ from errno import EINTR, ENOENT, EPERM, ESTALE, EBUSY, errorcode
from signal import signal, SIGTERM
import select as oselect
from os import waitpid as owaitpid
+import subprocess
from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE
sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
@@ -68,6 +69,7 @@ GF_OP_RETRIES = 10
CHANGELOG_AGENT_SERVER_VERSION = 1.0
CHANGELOG_AGENT_CLIENT_VERSION = 1.0
NodeID = None
+rsync_version = None
def escape(s):
@@ -554,3 +556,19 @@ def get_master_and_slave_data_from_args(args):
slave_data = arg.replace("ssh://", "")
return (master_name, slave_data)
+
+
+def get_rsync_version(rsync_cmd):
+ global rsync_version
+ if rsync_version is not None:
+ return rsync_version
+
+ rsync_version = "0"
+ p = subprocess.Popen([rsync_cmd, "--version"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ rsync_version = out.split(" ", 4)[3]
+
+ return rsync_version