summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon
diff options
context:
space:
mode:
Diffstat (limited to 'geo-replication/syncdaemon')
-rw-r--r--geo-replication/syncdaemon/Makefile.am9
-rw-r--r--geo-replication/syncdaemon/README.md5
-rw-r--r--geo-replication/syncdaemon/__codecheck.py3
-rw-r--r--geo-replication/syncdaemon/argsupgrade.py359
-rw-r--r--geo-replication/syncdaemon/changelogagent.py78
-rw-r--r--geo-replication/syncdaemon/conf.py.in17
-rw-r--r--geo-replication/syncdaemon/configinterface.py.in352
-rw-r--r--geo-replication/syncdaemon/gsyncd.py930
-rw-r--r--geo-replication/syncdaemon/gsyncdconfig.py485
-rw-r--r--geo-replication/syncdaemon/gsyncdstatus.py419
-rw-r--r--geo-replication/syncdaemon/libcxattr.py54
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py247
-rw-r--r--geo-replication/syncdaemon/logutils.py77
-rw-r--r--geo-replication/syncdaemon/master.py1624
-rw-r--r--geo-replication/syncdaemon/monitor.py496
-rw-r--r--geo-replication/syncdaemon/py2py3.py184
-rw-r--r--geo-replication/syncdaemon/rconf.py (renamed from geo-replication/syncdaemon/gconf.py)12
-rw-r--r--geo-replication/syncdaemon/repce.py36
-rw-r--r--geo-replication/syncdaemon/resource.py2038
-rw-r--r--geo-replication/syncdaemon/subcmds.py335
-rw-r--r--geo-replication/syncdaemon/syncdutils.py890
21 files changed, 5466 insertions, 3184 deletions
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
index 885963eae2b..d70e3368faf 100644
--- a/geo-replication/syncdaemon/Makefile.am
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -1,7 +1,8 @@
-syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
+syncdaemondir = $(GLUSTERFS_LIBEXECDIR)/python/syncdaemon
-syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \
- resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
- $(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py changelogagent.py
+syncdaemon_PYTHON = rconf.py gsyncd.py __init__.py master.py README.md repce.py \
+ resource.py syncdutils.py monitor.py libcxattr.py gsyncdconfig.py \
+ libgfchangelog.py gsyncdstatus.py conf.py logutils.py \
+ subcmds.py argsupgrade.py py2py3.py
CLEANFILES =
diff --git a/geo-replication/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
index 67f346ace5a..5ab785ae669 100644
--- a/geo-replication/syncdaemon/README.md
+++ b/geo-replication/syncdaemon/README.md
@@ -12,14 +12,13 @@ Requirements are categorized according to this.
* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
* rsync (both)
* glusterfs: with marker and changelog support (master & slave);
-* FUSE: glusterfs fuse module with auxilary gfid based access support
+* FUSE: glusterfs fuse module with auxiliary gfid based access support
INSTALLATION
------------
As of now, the supported way of operation is running from the source directory or using the RPMs given.
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
CONFIGURATION
-------------
@@ -45,7 +44,7 @@ in the source tree.
USAGE
-----
-gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
+gsyncd is a utilitly for continuous mirroring, ie. it mirrors master to slave incrementally.
Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume
_pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The
respective gsyncd invocations are (demoing some syntax sugaring):
diff --git a/geo-replication/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py
index 45dbd26bb64..9437147f7d9 100644
--- a/geo-replication/syncdaemon/__codecheck.py
+++ b/geo-replication/syncdaemon/__codecheck.py
@@ -8,6 +8,7 @@
# cases as published by the Free Software Foundation.
#
+from __future__ import print_function
import os
import os.path
import sys
@@ -45,7 +46,7 @@ class IPNetwork(list):
gsyncd = sys.modules['gsyncd']
for a in [['--help'], ['--version'],
['--canonicalize-escape-url', '/foo']]:
- print('>>> invoking program with args: %s' % ' '.join(a))
+ print(('>>> invoking program with args: %s' % ' '.join(a)))
pid = os.fork()
if not pid:
sys_argv_set(a)
diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py
new file mode 100644
index 00000000000..7af40633ef8
--- /dev/null
+++ b/geo-replication/syncdaemon/argsupgrade.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Converts old style args into new style args
+
+from __future__ import print_function
+import sys
+from argparse import ArgumentParser
+import socket
+import os
+
+from syncdutils import GsyncdError
+from conf import GLUSTERD_WORKDIR
+
+
+def gethostbyname(hnam):
+ """gethostbyname wrapper"""
+ try:
+ return socket.gethostbyname(hnam)
+ except socket.gaierror:
+ ex = sys.exc_info()[1]
+ raise GsyncdError("failed to resolve %s: %s" %
+ (hnam, ex.strerror))
+
+
+def slave_url(urldata):
+ urldata = urldata.replace("ssh://", "")
+ host, vol = urldata.split("::")
+ vol = vol.split(":")[0]
+ return "%s::%s" % (host, vol)
+
+
+def init_gsyncd_template_conf():
+ path = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf"
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_session_conf(master, slave):
+ slave = slave_url(slave)
+ master = master.strip(":")
+ slavehost, slavevol = slave.split("::")
+ slavehost = slavehost.split("@")[-1]
+
+ # Session Config File
+ path = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR, master, slavehost, slavevol)
+
+ if os.path.exists(os.path.dirname(path)) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_conf(path):
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if os.path.exists(dname) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def upgrade():
+ # Create dummy template conf(empty), hack to avoid glusterd
+ # fail when it does stat to check the existence.
+ init_gsyncd_template_conf()
+
+ inet6 = False
+ if "--inet6" in sys.argv:
+ inet6 = True
+
+ if "--monitor" in sys.argv:
+ # python gsyncd.py --path=/bricks/b1
+ # --monitor -c gsyncd.conf
+ # --iprefix=/var :gv1
+ # --glusterd-uuid=f26ac7a8-eb1b-4ea7-959c-80b27d3e43d0
+ # f241::gv2
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--glusterd-uuid")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ p.add_argument("--path", action="append")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Overwrite the sys.argv after rearrange
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+ sys.argv = [
+ sys.argv[0],
+ "monitor",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-node-id",
+ pargs.glusterd_uuid
+ ]
+ elif "--status-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2
+ # --status-get --path /bricks/b1
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--path")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-path",
+ pargs.path
+ ]
+ elif "--canonicalize-url" in sys.argv:
+ # This can accept multiple URLs and converts each URL to the
+ # format ssh://USER@IP:gluster://127.0.0.1:VOLUME
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--canonicalize-url", nargs="+")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ for url in pargs.canonicalize_url:
+ host, vol = url.split("::")
+ host = host.replace("ssh://", "")
+ remote_addr = host
+ if "@" not in remote_addr:
+ remote_addr = "root@" + remote_addr
+
+ user, hname = remote_addr.split("@")
+
+ if not inet6:
+ hname = gethostbyname(hname)
+
+ print(("ssh://%s@%s:gluster://127.0.0.1:%s" % (
+ user, hname, vol)))
+
+ sys.exit(0)
+ elif "--normalize-url" in sys.argv:
+ # Adds schema prefix as ssh://
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--normalize-url")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ print(("ssh://%s" % slave_url(pargs.normalize_url)))
+ sys.exit(0)
+ elif "--config-get-all" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get-all
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--show-defaults",
+ "--use-underscore"
+ ]
+ elif "--verify" in sys.argv and "spawning" in sys.argv:
+ # Just checks that able to spawn gsyncd or not
+ sys.exit(0)
+ elif "--slavevoluuid-get" in sys.argv:
+ # --slavevoluuid-get f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--slavevoluuid-get")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ host, vol = pargs.slavevoluuid_get.split("::")
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "voluuidget",
+ host,
+ vol
+ ]
+ elif "--config-set-rx" in sys.argv:
+ # Not required since default conf is not generated
+ # and custom conf generated only when required
+ # -c gsyncd.conf --config-set-rx remote-gsyncd
+ # /usr/local/libexec/glusterfs/gsyncd . .
+ # Touch the gsyncd.conf file and create session
+ # directory if required
+ p = ArgumentParser()
+ p.add_argument("-c", dest="config_file")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # If not template conf then it is trying to create
+ # session config, create a empty file instead
+ if pargs.config_file.endswith("gsyncd.conf"):
+ init_gsyncd_conf(pargs.config_file)
+ sys.exit(0)
+ elif "--create" in sys.argv:
+ # To update monitor status file
+ # --create Created -c gsyncd.conf
+ # --iprefix=/var :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--create")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "monitor-status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.create
+ ]
+ elif "--config-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get pid-file
+ p = ArgumentParser()
+ p.add_argument("--config-get")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--only-value",
+ "--show-defaults",
+ "--name",
+ pargs.config_get.replace("_", "-")
+ ]
+ elif "--config-set" in sys.argv:
+ # ignore session-owner
+ if "session-owner" in sys.argv:
+ sys.exit(0)
+
+ # --path=/bricks/b1 -c gsyncd.conf :gv1 f241::gv2
+ # --config-set log_level DEBUG
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--config-set", action='store_true')
+ p.add_argument("name")
+ p.add_argument("--value")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-set",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--name=%s" % pargs.name,
+ "--value=%s" % pargs.value
+ ]
+ elif "--config-check" in sys.argv:
+ # --config-check georep_session_working_dir
+ p = ArgumentParser()
+ p.add_argument("--config-check")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-check",
+ pargs.config_check.replace("_", "-")
+ ]
+ elif "--config-del" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-del log_level
+ p = ArgumentParser()
+ p.add_argument("--config-del")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-reset",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.config_del.replace("_", "-")
+ ]
+ elif "--delete" in sys.argv:
+ # --delete -c gsyncd.conf --iprefix=/var
+ # --path-list=--path=/bricks/b1 :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--reset-sync-time", action="store_true")
+ p.add_argument("--path-list")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--iprefix")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ paths = pargs.path_list.split("--path=")
+ paths = ["--path=%s" % x.strip() for x in paths if x.strip() != ""]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "delete",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave)
+ ]
+ sys.argv += paths
+
+ if pargs.reset_sync_time:
+ sys.argv.append("--reset-sync-time")
+
+ if inet6:
+ # Add `--inet6` as first argument
+ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:]
diff --git a/geo-replication/syncdaemon/changelogagent.py b/geo-replication/syncdaemon/changelogagent.py
deleted file mode 100644
index 54d82cefcd2..00000000000
--- a/geo-replication/syncdaemon/changelogagent.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
-# This file is part of GlusterFS.
-
-# This file is licensed to you under your choice of the GNU Lesser
-# General Public License, version 3 or any later version (LGPLv3 or
-# later), or the GNU General Public License, version 2 (GPLv2), in all
-# cases as published by the Free Software Foundation.
-#
-
-import os
-import logging
-import syncdutils
-from syncdutils import select, CHANGELOG_AGENT_SERVER_VERSION
-from repce import RepceServer
-
-
-class _MetaChangelog(object):
-
- def __getattr__(self, meth):
- from libgfchangelog import Changes as LChanges
- xmeth = [m for m in dir(LChanges) if m[0] != '_']
- if meth not in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LChanges, m))
- return getattr(self, meth)
-
-Changes = _MetaChangelog()
-
-
-class Changelog(object):
- def version(self):
- return CHANGELOG_AGENT_SERVER_VERSION
-
- def register(self, cl_brick, cl_dir, cl_log, cl_level, retries=0):
- return Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries)
-
- def scan(self):
- return Changes.cl_scan()
-
- def getchanges(self):
- return Changes.cl_getchanges()
-
- def done(self, clfile):
- return Changes.cl_done(clfile)
-
- def history(self, changelog_path, start, end, num_parallel):
- return Changes.cl_history_changelog(changelog_path, start, end,
- num_parallel)
-
- def history_scan(self):
- return Changes.cl_history_scan()
-
- def history_getchanges(self):
- return Changes.cl_history_getchanges()
-
- def history_done(self, clfile):
- return Changes.cl_history_done(clfile)
-
-
-class ChangelogAgent(object):
- def __init__(self, obj, fd_tup):
- (inf, ouf, rw, ww) = fd_tup.split(',')
- os.close(int(rw))
- os.close(int(ww))
- repce = RepceServer(obj, int(inf), int(ouf), 1)
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info('Agent listining...')
-
- select((), (), ())
-
-
-def agent(obj, fd_tup):
- return ChangelogAgent(obj, fd_tup)
diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in
new file mode 100644
index 00000000000..2042fa9cdfb
--- /dev/null
+++ b/geo-replication/syncdaemon/conf.py.in
@@ -0,0 +1,17 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
+GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@"
+
+LOCALSTATEDIR = "@localstatedir@"
+UUID_FILE = "@GLUSTERD_WORKDIR@/glusterd.info"
+GLUSTERFS_CONFDIR = "@SYSCONF_DIR@/glusterfs"
+GCONF_VERSION = 4.0
diff --git a/geo-replication/syncdaemon/configinterface.py.in b/geo-replication/syncdaemon/configinterface.py.in
deleted file mode 100644
index acb51486a10..00000000000
--- a/geo-replication/syncdaemon/configinterface.py.in
+++ /dev/null
@@ -1,352 +0,0 @@
-#
-# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
-# This file is part of GlusterFS.
-
-# This file is licensed to you under your choice of the GNU Lesser
-# General Public License, version 3 or any later version (LGPLv3 or
-# later), or the GNU General Public License, version 2 (GPLv2), in all
-# cases as published by the Free Software Foundation.
-#
-
-try:
- import ConfigParser
-except ImportError:
- # py 3
- import configparser as ConfigParser
-import re
-from string import Template
-import os
-import errno
-import sys
-from stat import ST_DEV, ST_INO, ST_MTIME
-import tempfile
-import shutil
-
-from syncdutils import escape, unescape, norm, update_file, GsyncdError
-
-SECT_ORD = '__section_order__'
-SECT_META = '__meta__'
-config_version = 2.0
-
-re_type = type(re.compile(''))
-
-
-# (SECTION, OPTION, OLD VALUE, NEW VALUE)
-CONFIGS = (
- ("peersrx . .",
- "georep_session_working_dir",
- "",
- "@GLUSTERD_WORKDIR@/geo-replication/${mastervol}_${remotehost}_"
- "${slavevol}/"),
- ("peersrx .",
- "gluster_params",
- "aux-gfid-mount xlator-option=\*-dht.assert-no-child-down=true",
- "aux-gfid-mount"),
- ("peersrx . .",
- "ssh_command_tar",
- "",
- "ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no "
- "-i @GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"),
- ("peersrx . .",
- "changelog_log_file",
- "",
- "${iprefix}/log/glusterfs/geo-replication/${mastervol}"
- "/${eSlave}${local_id}-changes.log"),
- ("peersrx . .",
- "working_dir",
- "@localstatedir@/run/gluster/${mastervol}/${eSlave}",
- "${iprefix}/lib/misc/glusterfsd/${mastervol}/${eSlave}"),
-)
-
-
-def upgrade_config_file(path):
- config_change = False
- config = ConfigParser.RawConfigParser()
- config.read(path)
-
- for sec, opt, oldval, newval in CONFIGS:
- try:
- val = config.get(sec, opt)
- except ConfigParser.NoOptionError:
- # if new config opt not exists
- config_change = True
- config.set(sec, opt, newval)
- continue
- except ConfigParser.Error:
- """
- When gsyncd invoked at the time of create, config file
- will not be their. Ignore any ConfigParser errors
- """
- continue
-
- if val == newval:
- # value is same as new val
- continue
-
- if val == oldval:
- # config value needs update
- config_change = True
- config.set(sec, opt, newval)
-
- if config_change:
- tempConfigFile = tempfile.NamedTemporaryFile(mode="wb", delete=False)
- with open(tempConfigFile.name, 'wb') as configFile:
- config.write(configFile)
-
- # If src and dst are two different file system, then os.rename
- # fails, In this case if temp file created in /tmp and if /tmp is
- # seperate fs then os.rename gives following error, so use shutil
- # OSError: [Errno 18] Invalid cross-device link
- # mail.python.org/pipermail/python-list/2005-February/342893.html
- shutil.move(tempConfigFile.name, path)
-
-
-class MultiDict(object):
-
- """a virtual dict-like class which functions as the union
- of underlying dicts"""
-
- def __init__(self, *dd):
- self.dicts = dd
-
- def __getitem__(self, key):
- val = None
- for d in self.dicts:
- if d.get(key) is not None:
- val = d[key]
- if val is None:
- raise KeyError(key)
- return val
-
-
-class GConffile(object):
-
- """A high-level interface to ConfigParser which flattens the two-tiered
- config layout by implenting automatic section dispatch based on initial
- parameters.
-
- Also ensure section ordering in terms of their time of addition -- a compat
- hack for Python < 2.7.
- """
-
- def _normconfig(self):
- """normalize config keys by s/-/_/g"""
- for n, s in self.config._sections.items():
- if n.find('__') == 0:
- continue
- s2 = type(s)()
- for k, v in s.items():
- if k.find('__') != 0:
- k = norm(k)
- s2[k] = v
- self.config._sections[n] = s2
-
- def __init__(self, path, peers, *dd):
- """
- - .path: location of config file
- - .config: underlying ConfigParser instance
- - .peers: on behalf of whom we flatten .config
- (master, or master-slave url pair)
- - .auxdicts: template subtituents
- """
- self.peers = peers
- self.path = path
- self.auxdicts = dd
- self.config = ConfigParser.RawConfigParser()
- self.config.read(path)
- self.dev, self.ino, self.mtime = -1, -1, -1
- self._normconfig()
-
- def _load(self):
- try:
- sres = os.stat(self.path)
- self.dev = sres[ST_DEV]
- self.ino = sres[ST_INO]
- self.mtime = sres[ST_MTIME]
- except (OSError, IOError):
- if sys.exc_info()[1].errno == errno.ENOENT:
- sres = None
-
- self.config = ConfigParser.RawConfigParser()
- self.config.read(self.path)
- self._normconfig()
-
- def get_realtime(self, opt):
- try:
- sres = os.stat(self.path)
- except (OSError, IOError):
- if sys.exc_info()[1].errno == errno.ENOENT:
- sres = None
- else:
- raise
-
- # compare file system stat with that of our stream file handle
- if not sres or sres[ST_DEV] != self.dev or \
- sres[ST_INO] != self.ino or self.mtime != sres[ST_MTIME]:
- self._load()
-
- return self.get(opt, printValue=False)
-
- def section(self, rx=False):
- """get the section name of the section representing .peers
- in .config"""
- peers = self.peers
- if not peers:
- peers = ['.', '.']
- rx = True
- if rx:
- st = 'peersrx'
- else:
- st = 'peers'
- return ' '.join([st] + [escape(u) for u in peers])
-
- @staticmethod
- def parse_section(section):
- """retrieve peers sequence encoded by section name
- (as urls or regexen, depending on section type)
- """
- sl = section.split()
- st = sl.pop(0)
- sl = [unescape(u) for u in sl]
- if st == 'peersrx':
- sl = [re.compile(u) for u in sl]
- return sl
-
- def ord_sections(self):
- """Return an ordered list of sections.
-
- Ordering happens based on the auxiliary
- SECT_ORD section storing indices for each
- section added through the config API.
-
- To not to go corrupt in case of manually
- written config files, we take care to append
- also those sections which are not registered
- in SECT_ORD.
-
- Needed for python 2.{4,5,6} where ConfigParser
- cannot yet order sections/options internally.
- """
- so = {}
- if self.config.has_section(SECT_ORD):
- so = self.config._sections[SECT_ORD]
- so2 = {}
- for k, v in so.items():
- if k != '__name__':
- so2[k] = int(v)
- tv = 0
- if so2:
- tv = max(so2.values()) + 1
- ss = [s for s in self.config.sections() if s.find('__') != 0]
- for s in ss:
- if s in so.keys():
- continue
- so2[s] = tv
- tv += 1
-
- def scmp(x, y):
- return cmp(*(so2[s] for s in (x, y)))
- ss.sort(scmp)
- return ss
-
- def update_to(self, dct, allow_unresolved=False):
- """update @dct from key/values of ours.
-
- key/values are collected from .config by filtering the regexp sections
- according to match, and from .section. The values are treated as
- templates, which are substituted from .auxdicts and (in case of regexp
- sections) match groups.
- """
- if not self.peers:
- raise GsyncdError('no peers given, cannot select matching options')
-
- def update_from_sect(sect, mud):
- for k, v in self.config._sections[sect].items():
- if k == '__name__':
- continue
- if allow_unresolved:
- dct[k] = Template(v).safe_substitute(mud)
- else:
- dct[k] = Template(v).substitute(mud)
- for sect in self.ord_sections():
- sp = self.parse_section(sect)
- if isinstance(sp[0], re_type) and len(sp) == len(self.peers):
- match = True
- mad = {}
- for i in range(len(sp)):
- m = sp[i].search(self.peers[i])
- if not m:
- match = False
- break
- for j in range(len(m.groups())):
- mad['match%d_%d' % (i + 1, j + 1)] = m.groups()[j]
- if match:
- update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts))
- if self.config.has_section(self.section()):
- update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
-
- def get(self, opt=None, printValue=True):
- """print the matching key/value pairs from .config,
- or if @opt given, the value for @opt (according to the
- logic described in .update_to)
- """
- d = {}
- self.update_to(d, allow_unresolved=True)
- if opt:
- opt = norm(opt)
- v = d.get(opt)
- if v:
- if printValue:
- print(v)
- else:
- return v
- else:
- for k, v in d.iteritems():
- if k == '__name__':
- continue
- print("%s: %s" % (k, v))
-
- def write(self, trfn, opt, *a, **kw):
- """update on-disk config transactionally
-
- @trfn is the transaction function
- """
- def mergeconf(f):
- self.config = ConfigParser.RawConfigParser()
- self.config.readfp(f)
- self._normconfig()
- if not self.config.has_section(SECT_META):
- self.config.add_section(SECT_META)
- self.config.set(SECT_META, 'version', config_version)
- return trfn(norm(opt), *a, **kw)
-
- def updateconf(f):
- self.config.write(f)
- update_file(self.path, updateconf, mergeconf)
-
- def _set(self, opt, val, rx=False):
- """set @opt to @val in .section"""
- sect = self.section(rx)
- if not self.config.has_section(sect):
- self.config.add_section(sect)
- # regarding SECT_ORD, cf. ord_sections
- if not self.config.has_section(SECT_ORD):
- self.config.add_section(SECT_ORD)
- self.config.set(
- SECT_ORD, sect, len(self.config._sections[SECT_ORD]))
- self.config.set(sect, opt, val)
- return True
-
- def set(self, opt, *a, **kw):
- """perform ._set transactionally"""
- self.write(self._set, opt, *a, **kw)
-
- def _delete(self, opt, rx=False):
- """delete @opt from .section"""
- sect = self.section(rx)
- if self.config.has_section(sect):
- return self.config.remove_option(sect, opt)
-
- def delete(self, opt, *a, **kw):
- """perform ._delete transactionally"""
- self.write(self._delete, opt, *a, **kw)
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index cb29903bf4f..257ed72c6ae 100644
--- a/geo-replication/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -1,647 +1,325 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
#
-# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
-# This file is part of GlusterFS.
-
-# This file is licensed to you under your choice of the GNU Lesser
-# General Public License, version 3 or any later version (LGPLv3 or
-# later), or the GNU General Public License, version 2 (GPLv2), in all
-# cases as published by the Free Software Foundation.
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
#
+from argparse import ArgumentParser
+import time
import os
-import os.path
-import glob
+from errno import EEXIST
import sys
-import time
import logging
-import shutil
-import optparse
-import fcntl
-import fnmatch
-from optparse import OptionParser, SUPPRESS_HELP
-from logging import Logger, handlers
-from errno import ENOENT
-
-from ipaddr import IPAddress, IPNetwork
-
-from gconf import gconf
-from syncdutils import FreeObject, norm, grabpidfile, finalize
-from syncdutils import log_raise_exception, privileged, update_file
-from syncdutils import GsyncdError, select, set_term_handler
-from configinterface import GConffile, upgrade_config_file
-import resource
-from monitor import monitor
-from changelogagent import agent, Changelog
-
-
-class GLogger(Logger):
-
- """Logger customizations for gsyncd.
-
- It implements a log format similar to that of glusterfs.
- """
-
- def makeRecord(self, name, level, *a):
- rv = Logger.makeRecord(self, name, level, *a)
- rv.nsecs = (rv.created - int(rv.created)) * 1000000
- fr = sys._getframe(4)
- callee = fr.f_locals.get('self')
- if callee:
- ctx = str(type(callee)).split("'")[1].split('.')[-1]
- else:
- ctx = '<top>'
- if not hasattr(rv, 'funcName'):
- rv.funcName = fr.f_code.co_name
- rv.lvlnam = logging.getLevelName(level)[0]
- rv.ctx = ctx
- return rv
-
- @classmethod
- def setup(cls, **kw):
- lbl = kw.get('label', "")
- if lbl:
- lbl = '(' + lbl + ')'
- lprm = {'datefmt': "%Y-%m-%d %H:%M:%S",
- 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" +
- lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"}
- lprm.update(kw)
- lvl = kw.get('level', logging.INFO)
- lprm['level'] = lvl
- logging.root = cls("root", lvl)
- logging.setLoggerClass(cls)
- logging.getLogger().handlers = []
- logging.getLogger().setLevel(lprm['level'])
-
- if 'filename' in lprm:
- try:
- logging_handler = handlers.WatchedFileHandler(lprm['filename'])
- formatter = logging.Formatter(fmt=lprm['format'],
- datefmt=lprm['datefmt'])
- logging_handler.setFormatter(formatter)
- logging.getLogger().addHandler(logging_handler)
- except AttributeError:
- # Python version < 2.6 will not have WatchedFileHandler
- # so fallback to logging without any handler.
- # Note: logrotate will not work if Python version is < 2.6
- logging.basicConfig(**lprm)
- else:
- # If filename not passed(not available in lprm) then it may be
- # streaming.(Ex: {"stream": "/dev/stdout"})
- logging.basicConfig(**lprm)
-
- @classmethod
- def _gsyncd_loginit(cls, **kw):
- lkw = {}
- if gconf.log_level:
- lkw['level'] = gconf.log_level
- if kw.get('log_file'):
- if kw['log_file'] in ('-', '/dev/stderr'):
- lkw['stream'] = sys.stderr
- elif kw['log_file'] == '/dev/stdout':
- lkw['stream'] = sys.stdout
- else:
- lkw['filename'] = kw['log_file']
-
- cls.setup(label=kw.get('label'), **lkw)
-
- lkw.update({'saved_label': kw.get('label')})
- gconf.log_metadata = lkw
- gconf.log_exit = True
-
-
-def startup(**kw):
- """set up logging, pidfile grabbing, daemonization"""
- if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
- if not grabpidfile():
- sys.stderr.write("pidfile is taken, exiting.\n")
- sys.exit(2)
- gconf.pid_file_owned = True
-
- if kw.get('go_daemon') == 'should':
- x, y = os.pipe()
- gconf.cpid = os.fork()
- if gconf.cpid:
- os.close(x)
- sys.exit()
- os.close(y)
- os.setsid()
- dn = os.open(os.devnull, os.O_RDWR)
- for f in (sys.stdin, sys.stdout, sys.stderr):
- os.dup2(dn, f.fileno())
- if getattr(gconf, 'pid_file', None):
- if not grabpidfile(gconf.pid_file + '.tmp'):
- raise GsyncdError("cannot grab temporary pidfile")
- os.rename(gconf.pid_file + '.tmp', gconf.pid_file)
- # wait for parent to terminate
- # so we can start up with
- # no messing from the dirty
- # ol' bustard
- select((x,), (), ())
- os.close(x)
-
- GLogger._gsyncd_loginit(**kw)
-
-
-def _unlink(path):
- try:
- os.unlink(path)
- except (OSError, IOError):
- if sys.exc_info()[1].errno == ENOENT:
- pass
- else:
- raise GsyncdError('Unlink error: %s' % path)
+
+from logutils import setup_logging
+import gsyncdconfig as gconf
+from rconf import rconf
+import subcmds
+from conf import GLUSTERD_WORKDIR, GLUSTERFS_CONFDIR, GCONF_VERSION
+from syncdutils import (set_term_handler, finalize, lf,
+ log_raise_exception, FreeObject, escape)
+import argsupgrade
+
+
+GSYNCD_VERSION = "gsyncd.py %s.0" % GCONF_VERSION
def main():
- """main routine, signal/exception handling boilerplates"""
- gconf.starttime = time.time()
+ rconf.starttime = time.time()
+
+ # If old Glusterd sends commands in old format, below function
+ # converts the sys.argv to new format. This conversion is added
+ # temporarily for backward compatibility. This can be removed
+ # once integrated with Glusterd2
+ # This modifies sys.argv globally, so rest of the code works as usual
+ argsupgrade.upgrade()
+
+ # Default argparse version handler prints to stderr, which is fixed in
+ # 3.x series but not in 2.x, using custom parser to fix this issue
+ if "--version" in sys.argv:
+ print(GSYNCD_VERSION)
+ sys.exit(0)
+
+ parser = ArgumentParser()
+ parser.add_argument("--inet6", action="store_true")
+ sp = parser.add_subparsers(dest="subcmd")
+
+ # Monitor Status File update
+ p = sp.add_parser("monitor-status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("status", help="Update Monitor Status")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Monitor
+ p = sp.add_parser("monitor")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--pause-on-start",
+ action="store_true",
+ help="Start with Paused state")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--use-gconf-volinfo", action="store_true")
+
+ # Worker
+ p = sp.add_parser("worker")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--feedback-fd", type=int,
+ help="feedback fd between monitor and worker")
+ p.add_argument("--local-node", help="Local master node")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--subvol-num", type=int, help="Subvolume number")
+ p.add_argument("--is-hottier", action="store_true",
+ help="Is this brick part of hot tier")
+ p.add_argument("--resource-remote",
+ help="Remote node to connect to Slave Volume")
+ p.add_argument("--resource-remote-id",
+ help="Remote node ID to connect to Slave Volume")
+ p.add_argument("--slave-id", help="Slave Volume ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Slave
+ p = sp.add_parser("slave")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--session-owner")
+ p.add_argument("--master-brick",
+ help="Master brick which is connected to the Slave")
+ p.add_argument("--master-node",
+ help="Master node which is connected to the Slave")
+ p.add_argument("--master-node-id",
+ help="Master node ID which is connected to the Slave")
+ p.add_argument("--local-node", help="Local Slave node")
+ p.add_argument("--local-node-id", help="Local Slave ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # All configurations which are configured via "slave-" options
+ # DO NOT add default values for these configurations, default values
+ # will be picked from template config file
+ p.add_argument("--slave-timeout", type=int,
+ help="Timeout to end gsyncd at Slave side")
+ p.add_argument("--use-rsync-xattrs", action="store_true")
+ p.add_argument("--slave-log-level", help="Slave Gsyncd Log level")
+ p.add_argument("--slave-gluster-log-level",
+ help="Slave Gluster mount Log level")
+ p.add_argument("--slave-gluster-command-dir",
+ help="Directory where Gluster binaries exist on slave")
+ p.add_argument("--slave-access-mount", action="store_true",
+ help="Do not lazy umount the slave volume")
+ p.add_argument("--master-dist-count", type=int,
+ help="Master Distribution count")
+
+ # Status
+ p = sp.add_parser("status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-check
+ p = sp.add_parser("config-check")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("--value", help="Config Value")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-get
+ p = sp.add_parser("config-get")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("--name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--show-defaults", action="store_true")
+ p.add_argument("--only-value", action="store_true")
+ p.add_argument("--use-underscore", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-set
+ p = sp.add_parser("config-set")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-n", "--name", help="Config Name")
+ p.add_argument("-v", "--value", help="Config Value")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-reset
+ p = sp.add_parser("config-reset")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # voluuidget
+ p = sp.add_parser("voluuidget")
+ p.add_argument("host", help="Hostname")
+ p.add_argument("volname", help="Volume Name")
+ p.add_argument("--debug", action="store_true")
+
+ # Delete
+ p = sp.add_parser("delete")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument('--path', dest='paths', action="append")
+ p.add_argument("--reset-sync-time", action="store_true",
+ help="Reset Sync Time")
+ p.add_argument("--debug", action="store_true")
+
+ # Parse arguments
+ args = parser.parse_args()
+
+ # Extra template values, All arguments are already part of template
+ # variables, use this for adding extra variables
+ extra_tmpl_args = {}
+
+ # Add First/Primary Slave host, user and volume
+ if getattr(args, "slave", None) is not None:
+ hostdata, slavevol = args.slave.split("::")
+ hostdata = hostdata.split("@")
+ slavehost = hostdata[-1]
+ slaveuser = "root"
+ if len(hostdata) == 2:
+ slaveuser = hostdata[0]
+ extra_tmpl_args["primary_slave_host"] = slavehost
+ extra_tmpl_args["slaveuser"] = slaveuser
+ extra_tmpl_args["slavevol"] = slavevol
+
+ # Add Bricks encoded path
+ if getattr(args, "local_path", None) is not None:
+ extra_tmpl_args["local_id"] = escape(args.local_path)
+
+ # Add Master Bricks encoded path(For Slave)
+ if getattr(args, "master_brick", None) is not None:
+ extra_tmpl_args["master_brick_id"] = escape(args.master_brick)
+
+ # Load configurations
+ config_file = getattr(args, "config_file", None)
+
+ # Subcmd accepts config file argument but not passed
+ # Set default path for config file in that case
+ # If an subcmd accepts config file then it also accepts
+ # master and Slave arguments.
+ if config_file is None and hasattr(args, "config_file") \
+ and args.subcmd != "slave":
+ config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR,
+ args.master,
+ extra_tmpl_args["primary_slave_host"],
+ extra_tmpl_args["slavevol"])
+
+ # If Config file path not exists, log error and continue using default conf
+ config_file_error_msg = None
+ if config_file is not None and not os.path.exists(config_file):
+ # Logging not yet initialized, create the error message to
+ # log later and reset the config_file to None
+ config_file_error_msg = lf(
+ "Session config file not exists, using the default config",
+ path=config_file)
+ config_file = None
+
+ rconf.config_file = config_file
+
+ # Override gconf values from argument values only if it is slave gsyncd
+ override_from_args = False
+ if args.subcmd == "slave":
+ override_from_args = True
+
+ if config_file is not None and \
+ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+ if ret is not None:
+ gconf.config_upgrade(config_file, ret)
+
+ # Load Config file
+ gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
+ config_file,
+ vars(args),
+ extra_tmpl_args,
+ override_from_args)
+
+ # Default label to print in log file
+ label = args.subcmd
+ if args.subcmd in ("worker"):
+ # If Worker, then add brick path also to label
+ label = "%s %s" % (args.subcmd, args.local_path)
+ elif args.subcmd == "slave":
+ # If Slave add Master node and Brick details
+ label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick)
+
+ # Setup Logger
+ # Default log file
+ log_file = gconf.get("cli-log-file")
+ log_level = gconf.get("cli-log-level")
+ if getattr(args, "master", None) is not None and \
+ getattr(args, "slave", None) is not None:
+ log_file = gconf.get("log-file")
+ log_level = gconf.get("log-level")
+
+ # Use different log file location for Slave log file
+ if args.subcmd == "slave":
+ log_file = gconf.get("slave-log-file")
+ log_level = gconf.get("slave-log-level")
+
+ if args.debug:
+ log_file = "-"
+ log_level = "DEBUG"
+
+ # Create Logdir if not exists
+ try:
+ if log_file != "-":
+ os.mkdir(os.path.dirname(log_file))
+ except OSError as e:
+ if e.errno != EEXIST:
+ raise
+
+ setup_logging(
+ log_file=log_file,
+ level=log_level,
+ label=label
+ )
+
+ if config_file_error_msg is not None:
+ logging.warn(config_file_error_msg)
+
+ # Log message for loaded config file
+ if config_file is not None:
+ logging.debug(lf("Using session config file", path=config_file))
+
set_term_handler()
- GLogger.setup()
excont = FreeObject(exval=0)
+
+ # Gets the function name based on the input argument. For example
+ # if subcommand passed as argument is monitor then it looks for
+ # function with name "subcmd_monitor" in subcmds file
+ func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None)
+
try:
try:
- main_i()
+ if func is not None:
+ rconf.args = args
+ func(args)
except:
log_raise_exception(excont)
finally:
finalize(exval=excont.exval)
-def main_i():
- """internal main routine
-
- parse command line, decide what action will be taken;
- we can either:
- - query/manipulate configuration
- - format gsyncd urls using gsyncd's url parsing engine
- - start service in following modes, in given stages:
- - agent: startup(), ChangelogAgent()
- - monitor: startup(), monitor()
- - master: startup(), connect_remote(), connect(), service_loop()
- - slave: startup(), connect(), service_loop()
- """
- rconf = {'go_daemon': 'should'}
-
- def store_abs(opt, optstr, val, parser):
- if val and val != '-':
- val = os.path.abspath(val)
- setattr(parser.values, opt.dest, val)
-
- def store_local(opt, optstr, val, parser):
- rconf[opt.dest] = val
-
- def store_local_curry(val):
- return lambda o, oo, vx, p: store_local(o, oo, val, p)
-
- def store_local_obj(op, dmake):
- return lambda o, oo, vx, p: store_local(
- o, oo, FreeObject(op=op, **dmake(vx)), p)
-
- op = OptionParser(
- usage="%prog [options...] <master> <slave>", version="%prog 0.0.1")
- op.add_option('--gluster-command-dir', metavar='DIR', default='')
- op.add_option('--gluster-log-file', metavar='LOGF',
- default=os.devnull, type=str, action='callback',
- callback=store_abs)
- op.add_option('--gluster-log-level', metavar='LVL')
- op.add_option('--gluster-params', metavar='PRMS', default='')
- op.add_option(
- '--glusterd-uuid', metavar='UUID', type=str, default='',
- help=SUPPRESS_HELP)
- op.add_option(
- '--gluster-cli-options', metavar='OPTS', default='--log-file=-')
- op.add_option('--mountbroker', metavar='LABEL')
- op.add_option('-p', '--pid-file', metavar='PIDF', type=str,
- action='callback', callback=store_abs)
- op.add_option('-l', '--log-file', metavar='LOGF', type=str,
- action='callback', callback=store_abs)
- op.add_option('--iprefix', metavar='LOGD', type=str,
- action='callback', callback=store_abs)
- op.add_option('--changelog-log-file', metavar='LOGF', type=str,
- action='callback', callback=store_abs)
- op.add_option('--log-file-mbr', metavar='LOGF', type=str,
- action='callback', callback=store_abs)
- op.add_option('--state-file', metavar='STATF', type=str,
- action='callback', callback=store_abs)
- op.add_option('--state-detail-file', metavar='STATF',
- type=str, action='callback', callback=store_abs)
- op.add_option('--georep-session-working-dir', metavar='STATF',
- type=str, action='callback', callback=store_abs)
- op.add_option('--ignore-deletes', default=False, action='store_true')
- op.add_option('--isolated-slave', default=False, action='store_true')
- op.add_option('--use-rsync-xattrs', default=False, action='store_true')
- op.add_option('--pause-on-start', default=False, action='store_true')
- op.add_option('-L', '--log-level', metavar='LVL')
- op.add_option('-r', '--remote-gsyncd', metavar='CMD',
- default=os.path.abspath(sys.argv[0]))
- op.add_option('--volume-id', metavar='UUID')
- op.add_option('--slave-id', metavar='ID')
- op.add_option('--session-owner', metavar='ID')
- op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='')
- op.add_option(
- '--local-path', metavar='PATH', help=SUPPRESS_HELP, default='')
- op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
- op.add_option('--ssh-command-tar', metavar='CMD', default='ssh')
- op.add_option('--rsync-command', metavar='CMD', default='rsync')
- op.add_option('--rsync-options', metavar='OPTS', default='')
- op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress')
- op.add_option('--timeout', metavar='SEC', type=int, default=120)
- op.add_option('--connection-timeout', metavar='SEC',
- type=int, default=60, help=SUPPRESS_HELP)
- op.add_option('--sync-jobs', metavar='N', type=int, default=3)
- op.add_option('--replica-failover-interval', metavar='N',
- type=int, default=1)
- op.add_option(
- '--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
- op.add_option('--allow-network', metavar='IPS', default='')
- op.add_option('--socketdir', metavar='DIR')
- op.add_option('--state-socket-unencoded', metavar='SOCKF',
- type=str, action='callback', callback=store_abs)
- op.add_option('--checkpoint', metavar='LABEL', default='')
-
- # tunables for failover/failback mechanism:
- # None - gsyncd behaves as normal
- # blind - gsyncd works with xtime pairs to identify
- # candidates for synchronization
- # wrapup - same as normal mode but does not assign
- # xtimes to orphaned files
- # see crawl() for usage of the above tunables
- op.add_option('--special-sync-mode', type=str, help=SUPPRESS_HELP)
-
- # changelog or xtime? (TODO: Change the default)
- op.add_option(
- '--change-detector', metavar='MODE', type=str, default='xtime')
- # sleep interval for change detection (xtime crawl uses a hardcoded 1
- # second sleep time)
- op.add_option('--change-interval', metavar='SEC', type=int, default=3)
- # working directory for changelog based mechanism
- op.add_option('--working-dir', metavar='DIR', type=str,
- action='callback', callback=store_abs)
- op.add_option('--use-tarssh', default=False, action='store_true')
-
- op.add_option('-c', '--config-file', metavar='CONF',
- type=str, action='callback', callback=store_local)
- # duh. need to specify dest or value will be mapped to None :S
- op.add_option('--monitor', dest='monitor', action='callback',
- callback=store_local_curry(True))
- op.add_option('--agent', dest='agent', action='callback',
- callback=store_local_curry(True))
- op.add_option('--resource-local', dest='resource_local',
- type=str, action='callback', callback=store_local)
- op.add_option('--resource-remote', dest='resource_remote',
- type=str, action='callback', callback=store_local)
- op.add_option('--feedback-fd', dest='feedback_fd', type=int,
- help=SUPPRESS_HELP, action='callback', callback=store_local)
- op.add_option('--rpc-fd', dest='rpc_fd', type=str, help=SUPPRESS_HELP)
- op.add_option('--listen', dest='listen', help=SUPPRESS_HELP,
- action='callback', callback=store_local_curry(True))
- op.add_option('-N', '--no-daemon', dest="go_daemon",
- action='callback', callback=store_local_curry('dont'))
- op.add_option('--verify', type=str, dest="verify",
- action='callback', callback=store_local)
- op.add_option('--create', type=str, dest="create",
- action='callback', callback=store_local)
- op.add_option('--delete', dest='delete', action='callback',
- callback=store_local_curry(True))
- op.add_option('--debug', dest="go_daemon", action='callback',
- callback=lambda *a: (store_local_curry('dont')(*a),
- setattr(
- a[-1].values, 'log_file', '-'),
- setattr(a[-1].values, 'log_level',
- 'DEBUG'),
- setattr(a[-1].values,
- 'changelog_log_file', '-')))
- op.add_option('--path', type=str, action='append')
-
- for a in ('check', 'get'):
- op.add_option('--config-' + a, metavar='OPT', type=str, dest='config',
- action='callback',
- callback=store_local_obj(a, lambda vx: {'opt': vx}))
- op.add_option('--config-get-all', dest='config', action='callback',
- callback=store_local_obj('get', lambda vx: {'opt': None}))
- for m in ('', '-rx', '-glob'):
- # call this code 'Pythonic' eh?
- # have to define a one-shot local function to be able
- # to inject (a value depending on the)
- # iteration variable into the inner lambda
- def conf_mod_opt_regex_variant(rx):
- op.add_option('--config-set' + m, metavar='OPT VAL', type=str,
- nargs=2, dest='config', action='callback',
- callback=store_local_obj('set', lambda vx: {
- 'opt': vx[0], 'val': vx[1], 'rx': rx}))
- op.add_option('--config-del' + m, metavar='OPT', type=str,
- dest='config', action='callback',
- callback=store_local_obj('del', lambda vx: {
- 'opt': vx, 'rx': rx}))
- conf_mod_opt_regex_variant(m and m[1:] or False)
-
- op.add_option('--normalize-url', dest='url_print',
- action='callback', callback=store_local_curry('normal'))
- op.add_option('--canonicalize-url', dest='url_print',
- action='callback', callback=store_local_curry('canon'))
- op.add_option('--canonicalize-escape-url', dest='url_print',
- action='callback', callback=store_local_curry('canon_esc'))
-
- tunables = [norm(o.get_opt_string()[2:])
- for o in op.option_list
- if (o.callback in (store_abs, 'store_true', None) and
- o.get_opt_string() not in ('--version', '--help'))]
- remote_tunables = ['listen', 'go_daemon', 'timeout',
- 'session_owner', 'config_file', 'use_rsync_xattrs']
- rq_remote_tunables = {'listen': True}
-
- # precedence for sources of values: 1) commandline, 2) cfg file, 3)
- # defaults for this to work out we need to tell apart defaults from
- # explicitly set options... so churn out the defaults here and call
- # the parser with virgin values container.
- defaults = op.get_default_values()
- opts, args = op.parse_args(values=optparse.Values())
- args_orig = args[:]
- r = rconf.get('resource_local')
- if r:
- if len(args) == 0:
- args.append(None)
- args[0] = r
- r = rconf.get('resource_remote')
- if r:
- if len(args) == 0:
- raise GsyncdError('local resource unspecfied')
- elif len(args) == 1:
- args.append(None)
- args[1] = r
- confdata = rconf.get('config')
- if not (len(args) == 2 or
- (len(args) == 1 and rconf.get('listen')) or
- (len(args) <= 2 and confdata) or
- rconf.get('url_print')):
- sys.stderr.write("error: incorrect number of arguments\n\n")
- sys.stderr.write(op.get_usage() + "\n")
- sys.exit(1)
-
- verify = rconf.get('verify')
- if verify:
- logging.info(verify)
- logging.info("Able to spawn gsyncd.py")
- return
-
- restricted = os.getenv('_GSYNCD_RESTRICTED_')
-
- if restricted:
- allopts = {}
- allopts.update(opts.__dict__)
- allopts.update(rconf)
- bannedtuns = set(allopts.keys()) - set(remote_tunables)
- if bannedtuns:
- raise GsyncdError('following tunables cannot be set with '
- 'restricted SSH invocaton: ' +
- ', '.join(bannedtuns))
- for k, v in rq_remote_tunables.items():
- if not k in allopts or allopts[k] != v:
- raise GsyncdError('tunable %s is not set to value %s required '
- 'for restricted SSH invocaton' %
- (k, v))
-
- confrx = getattr(confdata, 'rx', None)
-
- def makersc(aa, check=True):
- if not aa:
- return ([], None, None)
- ra = [resource.parse_url(u) for u in aa]
- local = ra[0]
- remote = None
- if len(ra) > 1:
- remote = ra[1]
- if check and not local.can_connect_to(remote):
- raise GsyncdError("%s cannot work with %s" %
- (local.path, remote and remote.path))
- return (ra, local, remote)
- if confrx:
- # peers are regexen, don't try to parse them
- if confrx == 'glob':
- args = ['\A' + fnmatch.translate(a) for a in args]
- canon_peers = args
- namedict = {}
- else:
- dc = rconf.get('url_print')
- rscs, local, remote = makersc(args_orig, not dc)
- if dc:
- for r in rscs:
- print(r.get_url(**{'normal': {},
- 'canon': {'canonical': True},
- 'canon_esc': {'canonical': True,
- 'escaped': True}}[dc]))
- return
- pa = ([], [], [])
- urlprms = (
- {}, {'canonical': True}, {'canonical': True, 'escaped': True})
- for x in rscs:
- for i in range(len(pa)):
- pa[i].append(x.get_url(**urlprms[i]))
- _, canon_peers, canon_esc_peers = pa
- # creating the namedict, a dict representing various ways of referring
- # to / repreenting peers to be fillable in config templates
- mods = (lambda x: x, lambda x: x[
- 0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
- if remote:
- rmap = {local: ('local', 'master'), remote: ('remote', 'slave')}
- else:
- rmap = {local: ('local', 'slave')}
- namedict = {}
- for i in range(len(rscs)):
- x = rscs[i]
- for name in rmap[x]:
- for j in range(3):
- namedict[mods[j](name)] = pa[j][i]
- namedict[name + 'vol'] = x.volume
- if name == 'remote':
- namedict['remotehost'] = x.remotehost
- if not 'config_file' in rconf:
- rconf['config_file'] = os.path.join(
- os.path.dirname(sys.argv[0]), "conf/gsyncd_template.conf")
-
- upgrade_config_file(rconf['config_file'])
- gcnf = GConffile(
- rconf['config_file'], canon_peers,
- defaults.__dict__, opts.__dict__, namedict)
-
- checkpoint_change = False
- if confdata:
- opt_ok = norm(confdata.opt) in tunables + [None]
- if confdata.op == 'check':
- if opt_ok:
- sys.exit(0)
- else:
- sys.exit(1)
- elif not opt_ok:
- raise GsyncdError("not a valid option: " + confdata.opt)
- if confdata.op == 'get':
- gcnf.get(confdata.opt)
- elif confdata.op == 'set':
- gcnf.set(confdata.opt, confdata.val, confdata.rx)
- elif confdata.op == 'del':
- gcnf.delete(confdata.opt, confdata.rx)
- # when modifying checkpoint, it's important to make a log
- # of that, so in that case we go on to set up logging even
- # if its just config invocation
- if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
- not confdata.rx:
- checkpoint_change = True
- if not checkpoint_change:
- return
-
- gconf.__dict__.update(defaults.__dict__)
- gcnf.update_to(gconf.__dict__)
- gconf.__dict__.update(opts.__dict__)
- gconf.configinterface = gcnf
-
- delete = rconf.get('delete')
- if delete:
- logging.info('geo-replication delete')
- # Delete pid file, status file, socket file
- cleanup_paths = []
- if getattr(gconf, 'pid_file', None):
- cleanup_paths.append(gconf.pid_file)
-
- if getattr(gconf, 'state_file', None):
- cleanup_paths.append(gconf.state_file)
-
- if getattr(gconf, 'state_detail_file', None):
- cleanup_paths.append(gconf.state_detail_file)
-
- if getattr(gconf, 'state_socket_unencoded', None):
- cleanup_paths.append(gconf.state_socket_unencoded)
-
- cleanup_paths.append(rconf['config_file'][:-11] + "*")
-
- # Cleanup changelog working dirs
- if getattr(gconf, 'working_dir', None):
- try:
- shutil.rmtree(gconf.working_dir)
- except (IOError, OSError):
- if sys.exc_info()[1].errno == ENOENT:
- pass
- else:
- raise GsyncdError(
- 'Error while removing working dir: %s' %
- gconf.working_dir)
-
- for path in cleanup_paths:
- # To delete temp files
- for f in glob.glob(path + "*"):
- _unlink(f)
- return
-
- if restricted and gconf.allow_network:
- ssh_conn = os.getenv('SSH_CONNECTION')
- if not ssh_conn:
- # legacy env var
- ssh_conn = os.getenv('SSH_CLIENT')
- if ssh_conn:
- allowed_networks = [IPNetwork(a)
- for a in gconf.allow_network.split(',')]
- client_ip = IPAddress(ssh_conn.split()[0])
- allowed = False
- for nw in allowed_networks:
- if client_ip in nw:
- allowed = True
- break
- if not allowed:
- raise GsyncdError("client IP address is not allowed")
-
- ffd = rconf.get('feedback_fd')
- if ffd:
- fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
-
- # normalize loglevel
- lvl0 = gconf.log_level
- if isinstance(lvl0, str):
- lvl1 = lvl0.upper()
- lvl2 = logging.getLevelName(lvl1)
- # I have _never_ _ever_ seen such an utterly braindead
- # error condition
- if lvl2 == "Level " + lvl1:
- raise GsyncdError('cannot recognize log level "%s"' % lvl0)
- gconf.log_level = lvl2
-
- if not privileged() and gconf.log_file_mbr:
- gconf.log_file = gconf.log_file_mbr
-
- if checkpoint_change:
- try:
- GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
- if confdata.op == 'set':
- logging.info('checkpoint %s set' % confdata.val)
- gcnf.delete('checkpoint_completed')
- gcnf.delete('checkpoint_target')
- elif confdata.op == 'del':
- logging.info('checkpoint info was reset')
- # if it is removing 'checkpoint' then we need
- # to remove 'checkpoint_completed' and 'checkpoint_target' too
- gcnf.delete('checkpoint_completed')
- gcnf.delete('checkpoint_target')
-
- except IOError:
- if sys.exc_info()[1].errno == ENOENT:
- # directory of log path is not present,
- # which happens if we get here from
- # a peer-multiplexed "config-set checkpoint"
- # (as that directory is created only on the
- # original node)
- pass
- else:
- raise
- return
-
- create = rconf.get('create')
- if create:
- if getattr(gconf, 'state_file', None):
- update_file(gconf.state_file, lambda f: f.write(create + '\n'))
- return
-
- go_daemon = rconf['go_daemon']
- be_monitor = rconf.get('monitor')
- be_agent = rconf.get('agent')
-
- rscs, local, remote = makersc(args)
- if not be_monitor and isinstance(remote, resource.SSH) and \
- go_daemon == 'should':
- go_daemon = 'postconn'
- log_file = None
- else:
- log_file = gconf.log_file
- if be_monitor:
- label = 'monitor'
- elif be_agent:
- label = 'agent'
- elif remote:
- # master
- label = gconf.local_path
- else:
- label = 'slave'
- startup(go_daemon=go_daemon, log_file=log_file, label=label)
- resource.Popen.init_errhandler()
-
- if be_agent:
- os.setsid()
- logging.debug('rpc_fd: %s' % repr(gconf.rpc_fd))
- return agent(Changelog(), gconf.rpc_fd)
-
- if be_monitor:
- return monitor(*rscs)
-
- logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
- if remote:
- go_daemon = remote.connect_remote(go_daemon=go_daemon)
- if go_daemon:
- startup(go_daemon=go_daemon, log_file=gconf.log_file)
- # complete remote connection in child
- remote.connect_remote(go_daemon='done')
- local.connect()
- if ffd:
- os.close(ffd)
- local.service_loop(*[r for r in [remote] if r])
-
-
if __name__ == "__main__":
main()
diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
new file mode 100644
index 00000000000..8848071997a
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdconfig.py
@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+try:
+ from ConfigParser import RawConfigParser, NoSectionError
+except ImportError:
+ from configparser import RawConfigParser, NoSectionError
+import os
+import shutil
+from string import Template
+from datetime import datetime
+from threading import Lock
+
+
+# Global object which can be used in other modules
+# once load_config is called
+_gconf = {}
+
+
+class GconfNotConfigurable(Exception):
+ pass
+
+
+class GconfInvalidValue(Exception):
+ pass
+
+
+class Gconf(object):
+ def __init__(self, default_conf_file, custom_conf_file=None,
+ args={}, extra_tmpl_args={}, override_from_args=False):
+ self.lock = Lock()
+ self.default_conf_file = default_conf_file
+ self.custom_conf_file = custom_conf_file
+ self.tmp_conf_file = None
+ self.gconf = {}
+ self.gconfdata = {}
+ self.gconf_typecast = {}
+ self.template_conf = []
+ self.non_configurable_configs = []
+ self.prev_mtime = 0
+ if custom_conf_file is not None:
+ self.tmp_conf_file = custom_conf_file + ".tmp"
+
+ self.session_conf_items = []
+ self.args = args
+ self.extra_tmpl_args = extra_tmpl_args
+ self.override_from_args = override_from_args
+ # Store default values only if overwritten, Only for JSON/CLI output
+ self.default_values = {}
+ self._load()
+
+ def _tmpl_substitute(self):
+ tmpl_values = {}
+ for k, v in self.gconf.items():
+ tmpl_values[k.replace("-", "_")] = v
+
+ # override the config file values with the one user passed
+ for k, v in self.args.items():
+ # override the existing value only if set by user
+ if v is not None:
+ tmpl_values[k] = v
+
+ for k, v in self.extra_tmpl_args.items():
+ tmpl_values[k] = v
+
+ for k, v in self.gconf.items():
+ if k in self.template_conf and \
+ (isinstance(v, str) or isinstance(v, unicode)):
+ self.gconf[k] = Template(v).safe_substitute(tmpl_values)
+
+ def _do_typecast(self):
+ for k, v in self.gconf.items():
+ cast_func = globals().get(
+ "to_" + self.gconf_typecast.get(k, "string"), None)
+ if cast_func is not None:
+ self.gconf[k] = cast_func(v)
+ if self.default_values.get(k, None) is not None:
+ self.default_values[k] = cast_func(v)
+
+ def reset(self, name):
+ # If custom conf file is not set then it is only read only configs
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ # If a config can not be modified
+ if name != "all" and not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ # Nothing to Reset, Not configured
+ if name != "all":
+ if not cnf.has_option("vars", name):
+ return True
+
+ # Remove option from custom conf file
+ cnf.remove_option("vars", name)
+ else:
+ # Remove and add empty section, do not disturb if config file
+ # already has any other section
+ try:
+ cnf.remove_section("vars")
+ except NoSectionError:
+ pass
+
+ cnf.add_section("vars")
+
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def set(self, name, value):
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+ curr_val = self.gconf.get(name, None)
+ if curr_val == value:
+ return True
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ if not cnf.has_section("vars"):
+ cnf.add_section("vars")
+
+ cnf.set("vars", name, value)
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def check(self, name, value=None, with_conffile=True):
+ if with_conffile and self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if value is not None and not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+
+ def _load_with_lock(self):
+ with self.lock:
+ self._load()
+
+ def _load(self):
+ self.gconf = {}
+ self.template_conf = []
+ self.gconf_typecast = {}
+ self.non_configurable_configs = []
+ self.session_conf_items = []
+ self.default_values = {}
+
+ conf = RawConfigParser()
+ # Default Template config file
+ with open(self.default_conf_file) as f:
+ conf.readfp(f)
+
+ # Custom Config file
+ if self.custom_conf_file is not None:
+ with open(self.custom_conf_file) as f:
+ conf.readfp(f)
+
+ # Get version from default conf file
+ self.version = conf.get("__meta__", "version")
+
+ # Populate default values
+ for sect in conf.sections():
+ if sect in ["__meta__", "vars"]:
+ continue
+
+ # Collect list of available options with help details
+ self.gconfdata[sect] = {}
+ for k, v in conf.items(sect):
+ self.gconfdata[sect][k] = v.strip()
+
+ # Collect the Type cast information
+ if conf.has_option(sect, "type"):
+ self.gconf_typecast[sect] = conf.get(sect, "type")
+
+ # Prepare list of configurable conf
+ if conf.has_option(sect, "configurable"):
+ if conf.get(sect, "configurable").lower() == "false":
+ self.non_configurable_configs.append(sect)
+
+ # if it is a template conf value which needs to be substituted
+ if conf.has_option(sect, "template"):
+ if conf.get(sect, "template").lower().strip() == "true":
+ self.template_conf.append(sect)
+
+ # Set default values
+ if conf.has_option(sect, "value"):
+ self.gconf[sect] = conf.get(sect, "value").strip()
+
+ # Load the custom conf elements and overwrite
+ if conf.has_section("vars"):
+ for k, v in conf.items("vars"):
+ self.session_conf_items.append(k)
+ self.default_values[k] = self.gconf.get(k, "")
+ self.gconf[k] = v.strip()
+
+ # Overwrite the Slave configurations which are sent as
+ # arguments to gsyncd slave
+ if self.override_from_args:
+ for k, v in self.args.items():
+ k = k.replace("_", "-")
+ if k.startswith("slave-") and k in self.gconf:
+ self.gconf[k] = v
+
+ self._tmpl_substitute()
+ self._do_typecast()
+
+ def reload(self, with_lock=True):
+ if self._is_config_changed():
+ if with_lock:
+ self._load_with_lock()
+ else:
+ self._load()
+
+ def get(self, name, default_value=None, with_lock=True):
+ if with_lock:
+ with self.lock:
+ return self.gconf.get(name, default_value)
+ else:
+ return self.gconf.get(name, default_value)
+
+ def getall(self, show_defaults=False, show_non_configurable=False):
+ cnf = {}
+ if not show_defaults:
+ for k in self.session_conf_items:
+ if k not in self.non_configurable_configs:
+ dv = self.default_values.get(k, "")
+ cnf[k] = {
+ "value": self.get(k),
+ "default": dv,
+ "configurable": True,
+ "modified": False if dv == "" else True
+ }
+ return cnf
+
+ # Show all configs including defaults
+ for k, v in self.gconf.items():
+ configurable = False if k in self.non_configurable_configs \
+ else True
+ dv = self.default_values.get(k, "")
+ modified = False if dv == "" else True
+ if show_non_configurable:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+ else:
+ if k not in self.non_configurable_configs:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+
+ return cnf
+
+ def getr(self, name, default_value=None):
+ with self.lock:
+ self.reload(with_lock=False)
+ return self.get(name, default_value, with_lock=False)
+
+ def get_help(self, name=None):
+ pass
+
+ def _is_configurable(self, name):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ return item.get("configurable", True)
+
+ def _is_valid_value(self, name, value):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ # If validation func not defined
+ if item.get("validation", None) is None:
+ return True
+
+ # minmax validation
+ if item["validation"] == "minmax":
+ return validate_minmax(value, item["min"], item["max"])
+
+ if item["validation"] == "choice":
+ return validate_choice(value, item["allowed_values"])
+
+ if item["validation"] == "bool":
+ return validate_bool(value)
+
+ if item["validation"] == "execpath":
+ return validate_execpath(value)
+
+ if item["validation"] == "unixtime":
+ return validate_unixtime(value)
+
+ if item["validation"] == "int":
+ return validate_int(value)
+
+ return False
+
+ def _is_config_changed(self):
+ if self.custom_conf_file is not None and \
+ os.path.exists(self.custom_conf_file):
+ st = os.lstat(self.custom_conf_file)
+ if st.st_mtime > self.prev_mtime:
+ self.prev_mtime = st.st_mtime
+ return True
+
+ return False
+
+def is_config_file_old(config_file, mastervol, slavevol):
+ cnf = RawConfigParser()
+ cnf.read(config_file)
+ session_section = "peers %s %s" % (mastervol, slavevol)
+ try:
+ return dict(cnf.items(session_section))
+ except NoSectionError:
+ return None
+
+def config_upgrade(config_file, ret):
+ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp")
+
+ #copy old config file in a backup file
+ shutil.copyfile(config_file, config_file_backup)
+
+ #write a new config file
+ config = RawConfigParser()
+ config.add_section('vars')
+
+ for key, value in ret.items():
+ #handle option name changes
+ if key == "use_tarssh":
+ new_key = "sync-method"
+ if value == "true":
+ new_value = "tarssh"
+ else:
+ new_value = "rsync"
+ config.set('vars', new_key, new_value)
+ elif key == "timeout":
+ new_key = "slave-timeout"
+ config.set('vars', new_key, value)
+ #for changes like: ignore_deletes to ignore-deletes
+ else:
+ new_key = key.replace("_", "-")
+ config.set('vars', new_key, value)
+
+ with open(config_file, 'w') as configfile:
+ config.write(configfile)
+
+
+def validate_int(value):
+ try:
+ _ = int(value)
+ return True
+ except ValueError:
+ return False
+
+
+def validate_unixtime(value):
+ try:
+ y = datetime.fromtimestamp(int(value)).strftime("%Y")
+ if y == "1970":
+ return False
+
+ return True
+ except ValueError:
+ return False
+
+
+def validate_minmax(value, minval, maxval):
+ try:
+ value = int(value)
+ minval = int(minval)
+ maxval = int(maxval)
+ return value >= minval and value <= maxval
+ except ValueError:
+ return False
+
+
+def validate_choice(value, allowed_values):
+ allowed_values = allowed_values.split(",")
+ allowed_values = [v.strip() for v in allowed_values]
+
+ return value in allowed_values
+
+
+def validate_bool(value):
+ return value in ["true", "false"]
+
+
+def validate_execpath(value):
+ return os.path.isfile(value) and os.access(value, os.X_OK)
+
+
+def validate_filepath(value):
+ return os.path.isfile(value)
+
+
+def validate_path(value):
+ return os.path.exists(value)
+
+
+def to_int(value):
+ return int(value)
+
+
+def to_float(value):
+ return float(value)
+
+
+def to_bool(value):
+ if isinstance(value, bool):
+ return value
+ return True if value in ["true", "True"] else False
+
+
+def get(name, default_value=None):
+ return _gconf.get(name, default_value)
+
+
+def getall(show_defaults=False, show_non_configurable=False):
+ return _gconf.getall(show_defaults=show_defaults,
+ show_non_configurable=show_non_configurable)
+
+
+def getr(name, default_value=None):
+ return _gconf.getr(name, default_value)
+
+
+def load(default_conf, custom_conf=None, args={}, extra_tmpl_args={},
+ override_from_args=False):
+ global _gconf
+ _gconf = Gconf(default_conf, custom_conf, args, extra_tmpl_args,
+ override_from_args)
+
+
+def setconfig(name, value):
+ global _gconf
+ _gconf.set(name, value)
+
+
+def resetconfig(name):
+ global _gconf
+ _gconf.reset(name)
+
+
+def check(name, value=None, with_conffile=True):
+ global _gconf
+ _gconf.check(name, value=value, with_conffile=with_conffile)
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
new file mode 100644
index 00000000000..1a655ff8887
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -0,0 +1,419 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import fcntl
+import os
+import tempfile
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import json
+import time
+from datetime import datetime
+from errno import EACCES, EAGAIN, ENOENT
+import logging
+
+from syncdutils import (EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event,
+ EVENT_GEOREP_CHECKPOINT_COMPLETED, lf)
+
+DEFAULT_STATUS = "N/A"
+MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
+STATUS_VALUES = (DEFAULT_STATUS,
+ "Initializing...",
+ "Active",
+ "Passive",
+ "Faulty")
+
+CRAWL_STATUS_VALUES = (DEFAULT_STATUS,
+ "Hybrid Crawl",
+ "History Crawl",
+ "Changelog Crawl")
+
+
+def human_time(ts):
+ try:
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def human_time_utc(ts):
+ try:
+ return datetime.utcfromtimestamp(
+ float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def get_default_values():
+ return {
+ "slave_node": DEFAULT_STATUS,
+ "worker_status": DEFAULT_STATUS,
+ "last_synced": 0,
+ "last_synced_entry": 0,
+ "crawl_status": DEFAULT_STATUS,
+ "entry": 0,
+ "data": 0,
+ "meta": 0,
+ "failures": 0,
+ "checkpoint_completed": DEFAULT_STATUS,
+ "checkpoint_time": 0,
+ "checkpoint_completion_time": 0}
+
+
+class LockedOpen(object):
+
+ def __init__(self, filename, *args, **kwargs):
+ self.filename = filename
+ self.open_args = args
+ self.open_kwargs = kwargs
+ self.fileobj = None
+
+ def __enter__(self):
+ """
+ If two processes compete to update a file, The first process
+ gets the lock and the second process is blocked in the fcntl.flock()
+ call. When first process replaces the file and releases the lock,
+ the already open file descriptor in the second process now points
+ to a "ghost" file(not reachable by any path name) with old contents.
+ To avoid that conflict, check the fd already opened is same or
+ not. Open new one if not same
+ """
+ f = open(self.filename, *self.open_args, **self.open_kwargs)
+ while True:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ fnew = open(self.filename, *self.open_args, **self.open_kwargs)
+ if os.path.sameopenfile(f.fileno(), fnew.fileno()):
+ fnew.close()
+ break
+ else:
+ f.close()
+ f = fnew
+ self.fileobj = f
+ return f
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ fcntl.flock(self.fileobj, fcntl.LOCK_UN)
+ self.fileobj.close()
+
+
+def set_monitor_status(status_file, status):
+ fd = os.open(status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ with LockedOpen(status_file, 'r+'):
+ with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(status_file),
+ delete=False) as tf:
+ tf.write(status)
+ tempname = tf.name
+
+ os.rename(tempname, status_file)
+ dirfd = os.open(os.path.dirname(os.path.abspath(status_file)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+
+
+class GeorepStatus(object):
+ def __init__(self, monitor_status_file, master_node, brick, master_node_id,
+ master, slave, monitor_pid_file=None):
+ self.master = master
+ slv_data = slave.split("::")
+ self.slave_host = slv_data[0]
+ self.slave_volume = slv_data[1].split(":")[0] # Remove Slave UUID
+ self.work_dir = os.path.dirname(monitor_status_file)
+ self.monitor_status_file = monitor_status_file
+ self.filename = os.path.join(self.work_dir,
+ "brick_%s.status"
+ % urllib.quote_plus(brick))
+
+ fd = os.open(self.filename, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ fd = os.open(self.monitor_status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ self.master_node = master_node
+ self.master_node_id = master_node_id
+ self.brick = brick
+ self.default_values = get_default_values()
+ self.monitor_pid_file = monitor_pid_file
+
+ def send_event(self, event_type, **kwargs):
+ gf_event(event_type,
+ master_volume=self.master,
+ master_node=self.master_node,
+ master_node_id=self.master_node_id,
+ slave_host=self.slave_host,
+ slave_volume=self.slave_volume,
+ brick_path=self.brick,
+ **kwargs)
+
+ def _update(self, mergerfunc):
+ data = self.default_values
+ with LockedOpen(self.filename, 'r+') as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+
+ data = mergerfunc(data)
+ # If Data is not changed by merger func
+ if not data:
+ return False
+
+ with tempfile.NamedTemporaryFile(
+ 'w',
+ dir=os.path.dirname(self.filename),
+ delete=False) as tf:
+ tf.write(data)
+ tempname = tf.name
+
+ os.rename(tempname, self.filename)
+ dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+ return True
+
+ def reset_on_worker_start(self):
+ def merger(data):
+ data["slave_node"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = 0
+ data["data"] = 0
+ data["meta"] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_field(self, key, value):
+ def merger(data):
+ # Current data and prev data is same
+ if data[key] == value:
+ return {}
+
+ data[key] = value
+ return json.dumps(data)
+
+ return self._update(merger)
+
+ def trigger_gf_event_checkpoint_completion(self, checkpoint_time,
+ checkpoint_completion_time):
+ self.send_event(EVENT_GEOREP_CHECKPOINT_COMPLETED,
+ checkpoint_time=checkpoint_time,
+ checkpoint_completion_time=checkpoint_completion_time)
+
+ def set_last_synced(self, value, checkpoint_time):
+ def merger(data):
+ data["last_synced"] = value[0]
+
+ # If checkpoint is not set or reset
+ # or if last set checkpoint is changed
+ if checkpoint_time == 0 or \
+ checkpoint_time != data["checkpoint_time"]:
+ data["checkpoint_time"] = 0
+ data["checkpoint_completion_time"] = 0
+ data["checkpoint_completed"] = "No"
+
+ # If checkpoint is completed and not marked as completed
+ # previously then update the checkpoint completed time
+ if checkpoint_time > 0 and checkpoint_time <= value[0]:
+ if data["checkpoint_completed"] == "No":
+ curr_time = int(time.time())
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = curr_time
+ data["checkpoint_completed"] = "Yes"
+ logging.info(lf("Checkpoint completed",
+ checkpoint_time=human_time_utc(
+ checkpoint_time),
+ completion_time=human_time_utc(curr_time)))
+ self.trigger_gf_event_checkpoint_completion(
+ checkpoint_time, curr_time)
+
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_worker_status(self, status):
+ if self.set_field("worker_status", status):
+ logging.info(lf("Worker Status Change",
+ status=status))
+
+ def set_worker_crawl_status(self, status):
+ if self.set_field("crawl_status", status):
+ logging.info(lf("Crawl Status Change",
+ status=status))
+
+ def set_slave_node(self, slave_node):
+ def merger(data):
+ data["slave_node"] = slave_node
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def inc_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) + value
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def dec_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) - value
+ if data[key] < 0:
+ data[key] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_active(self):
+ if self.set_field("worker_status", "Active"):
+ logging.info(lf("Worker Status Change",
+ status="Active"))
+ self.send_event(EVENT_GEOREP_ACTIVE)
+
+ def set_passive(self):
+ if self.set_field("worker_status", "Passive"):
+ logging.info(lf("Worker Status Change",
+ status="Passive"))
+ self.send_event(EVENT_GEOREP_PASSIVE)
+
+ def get_monitor_status(self):
+ data = ""
+ with open(self.monitor_status_file, "r") as f:
+ data = f.read().strip()
+ return data
+
+ def get_status(self, checkpoint_time=0):
+ """
+ Monitor Status ---> Created Started Paused Stopped
+ ----------------------------------------------------------------------
+ slave_node N/A VALUE VALUE N/A
+ status Created VALUE Paused Stopped
+ last_synced N/A VALUE VALUE VALUE
+ last_synced_entry N/A VALUE VALUE VALUE
+ crawl_status N/A VALUE N/A N/A
+ entry N/A VALUE N/A N/A
+ data N/A VALUE N/A N/A
+ meta N/A VALUE N/A N/A
+ failures N/A VALUE VALUE VALUE
+ checkpoint_completed N/A VALUE VALUE VALUE
+ checkpoint_time N/A VALUE VALUE VALUE
+ checkpoint_completed_time N/A VALUE VALUE VALUE
+ """
+ data = self.default_values
+ with open(self.filename) as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+ monitor_status = self.get_monitor_status()
+
+ # Verifying whether monitor process running and adjusting status
+ if monitor_status in ["Started", "Paused"]:
+ try:
+ with open(self.monitor_pid_file, "r+") as f:
+ fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ monitor_status = "Stopped"
+ except (IOError, OSError) as e:
+ # If pid file not exists, either monitor died or Geo-rep
+ # not even started once
+ if e.errno == ENOENT:
+ monitor_status = "Stopped"
+ elif e.errno in (EACCES, EAGAIN):
+ # cannot grab. so, monitor process still running..move on
+ pass
+ else:
+ raise
+
+ if monitor_status in ["Created", "Paused", "Stopped"]:
+ data["worker_status"] = monitor_status
+
+ if monitor_status == "":
+ data["worker_status"] = "Stopped"
+
+ # Checkpoint adjustments
+ if checkpoint_time == 0:
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+ else:
+ if checkpoint_time != data["checkpoint_time"]:
+ if checkpoint_time <= data["last_synced"]:
+ data["checkpoint_completed"] = "Yes"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = data["last_synced"]
+ else:
+ data["checkpoint_completed"] = "No"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+
+ if data["checkpoint_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_time = data["checkpoint_time"]
+ data["checkpoint_time"] = human_time(chkpt_time)
+ data["checkpoint_time_utc"] = human_time_utc(chkpt_time)
+
+ if data["checkpoint_completion_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_completion_time = data["checkpoint_completion_time"]
+ data["checkpoint_completion_time"] = human_time(
+ chkpt_completion_time)
+ data["checkpoint_completion_time_utc"] = human_time_utc(
+ chkpt_completion_time)
+
+ if data["last_synced"] == 0:
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ else:
+ last_synced = data["last_synced"]
+ data["last_synced"] = human_time(last_synced)
+ data["last_synced_utc"] = human_time_utc(last_synced)
+
+ if data["worker_status"] != "Active":
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = DEFAULT_STATUS
+ data["data"] = DEFAULT_STATUS
+ data["meta"] = DEFAULT_STATUS
+ data["failures"] = DEFAULT_STATUS
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completed_time"] = DEFAULT_STATUS
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data["worker_status"] not in ["Active", "Passive"]:
+ data["slave_node"] = DEFAULT_STATUS
+
+ if data.get("last_synced_utc", 0) == 0:
+ data["last_synced_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_completion_time_utc", 0) == 0:
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_time_utc", 0) == 0:
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+
+ return data
+
+ def print_status(self, checkpoint_time=0, json_output=False):
+ status_out = self.get_status(checkpoint_time)
+ if json_output:
+ out = {}
+ # Convert all values as string
+ for k, v in status_out.items():
+ out[k] = str(v)
+ print(json.dumps(out))
+ return
+
+ for key, value in status_out.items():
+ print(("%s: %s" % (key, value)))
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
index 74d120fa196..e6406c36bd7 100644
--- a/geo-replication/syncdaemon/libcxattr.py
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -9,13 +9,14 @@
#
import os
-from ctypes import CDLL, c_int, create_string_buffer
-from ctypes.util import find_library
+from ctypes import CDLL, get_errno
+from py2py3 import (bytearray_to_str, gr_create_string_buffer,
+ gr_query_xattr, gr_lsetxattr, gr_lremovexattr)
class Xattr(object):
- """singleton that wraps the extended attribues system
+ """singleton that wraps the extended attributes system
interface for python using ctypes
Just implement it to the degree we need it, in particular
@@ -25,11 +26,11 @@ class Xattr(object):
sizes we expect
"""
- libc = CDLL(find_library("libc"))
+ libc = CDLL("libc.so.6", use_errno=True)
@classmethod
def geterrno(cls):
- return c_int.in_dll(cls.libc, 'errno').value
+ return get_errno()
@classmethod
def raise_oserr(cls):
@@ -39,20 +40,23 @@ class Xattr(object):
@classmethod
def _query_xattr(cls, path, siz, syscall, *a):
if siz:
- buf = create_string_buffer('\0' * siz)
+ buf = gr_create_string_buffer(siz)
else:
buf = None
ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
if ret == -1:
cls.raise_oserr()
if siz:
- return buf.raw[:ret]
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
else:
return ret
@classmethod
def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr(path, siz, 'lgetxattr', attr)
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
@classmethod
def lgetxattr_buf(cls, path, attr):
@@ -66,29 +70,43 @@ class Xattr(object):
@classmethod
def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path, siz, 'llistxattr')
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
if isinstance(ret, str):
- ret = ret.split('\0')
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
return ret
@classmethod
def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
+ ret = gr_lsetxattr(cls, path, attr, val)
if ret == -1:
cls.raise_oserr()
@classmethod
def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path, attr)
+ ret = gr_lremovexattr(cls, path, attr)
if ret == -1:
cls.raise_oserr()
@classmethod
def llistxattr_buf(cls, path):
"""listxattr variant with size discovery"""
- size = cls.llistxattr(path)
- if size == -1:
- cls.raise_oserr()
- if size == 0:
- return []
- return cls.llistxattr(path, size)
+ try:
+ # Assuming no more than 100 xattrs in a file/directory and
+ # each xattr key length will be less than 256 bytes
+ # llistxattr will be called with bigger size so that
+ # listxattr will not fail with ERANGE. OSError will be
+ # raised if fails even with the large size specified.
+ size = 256 * 100
+ return cls.llistxattr(path, size)
+ except OSError:
+ # If fixed length failed for getting list of xattrs then
+ # use the llistxattr call to get the size and use that
+ # size to get the list of xattrs.
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+
+ return cls.llistxattr(path, size)
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
index 89ae032016f..a3bda7282c0 100644
--- a/geo-replication/syncdaemon/libgfchangelog.py
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -9,120 +9,135 @@
#
import os
-from ctypes import CDLL, create_string_buffer, get_errno, byref, c_ulong
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong
from ctypes.util import find_library
-from syncdutils import ChangelogException
-
-
-class Changes(object):
- libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
-
- @classmethod
- def geterrno(cls):
- return get_errno()
-
- @classmethod
- def raise_changelog_err(cls):
- errn = cls.geterrno()
- raise ChangelogException(errn, os.strerror(errn))
-
- @classmethod
- def _get_api(cls, call):
- return getattr(cls.libgfc, call)
-
- @classmethod
- def cl_register(cls, brick, path, log_file, log_level, retries=0):
- ret = cls._get_api('gf_changelog_register')(brick, path,
- log_file,
- log_level, retries)
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_scan(cls):
- ret = cls._get_api('gf_changelog_scan')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_startfresh(cls):
- ret = cls._get_api('gf_changelog_start_fresh')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_getchanges(cls):
- """ remove hardcoding for path name length """
- def clsort(f):
- return f.split('.')[-1]
- changes = []
- buf = create_string_buffer('\0', 4096)
- call = cls._get_api('gf_changelog_next_change')
-
- while True:
- ret = call(buf, 4096)
- if ret in (0, -1):
- break
- changes.append(buf.raw[:ret - 1])
- if ret == -1:
- cls.raise_changelog_err()
- # cleanup tracker
- cls.cl_startfresh()
- return sorted(changes, key=clsort)
-
- @classmethod
- def cl_done(cls, clfile):
- ret = cls._get_api('gf_changelog_done')(clfile)
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_history_scan(cls):
- ret = cls._get_api('gf_history_changelog_scan')()
- if ret == -1:
- cls.raise_changelog_err()
-
- return ret
-
- @classmethod
- def cl_history_changelog(cls, changelog_path, start, end, num_parallel):
- actual_end = c_ulong()
- ret = cls._get_api('gf_history_changelog')(changelog_path, start, end,
- num_parallel,
- byref(actual_end))
- if ret == -1:
- cls.raise_changelog_err()
-
- return (ret, actual_end.value)
-
- @classmethod
- def cl_history_startfresh(cls):
- ret = cls._get_api('gf_history_changelog_start_fresh')()
- if ret == -1:
- cls.raise_changelog_err()
-
- @classmethod
- def cl_history_getchanges(cls):
- """ remove hardcoding for path name length """
- def clsort(f):
- return f.split('.')[-1]
-
- changes = []
- buf = create_string_buffer('\0', 4096)
- call = cls._get_api('gf_history_changelog_next_change')
-
- while True:
- ret = call(buf, 4096)
- if ret in (0, -1):
- break
- changes.append(buf.raw[:ret - 1])
- if ret == -1:
- cls.raise_changelog_err()
-
- return sorted(changes, key=clsort)
-
- @classmethod
- def cl_history_done(cls, clfile):
- ret = cls._get_api('gf_history_changelog_done')(clfile)
- if ret == -1:
- cls.raise_changelog_err()
+from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+from py2py3 import (gr_cl_history_changelog, gr_cl_done,
+ gr_create_string_buffer, gr_cl_register,
+ gr_cl_history_done, bytearray_to_str)
+
+
+libgfc = CDLL(
+ find_library("gfchangelog"),
+ mode=RTLD_GLOBAL,
+ use_errno=True
+)
+
+
+def _raise_changelog_err():
+ errn = get_errno()
+ raise ChangelogException(errn, os.strerror(errn))
+
+
+def _init():
+ if libgfc.gf_changelog_init(None) == -1:
+ _raise_changelog_err()
+
+
+def register(brick, path, log_file, log_level, retries=0):
+ _init()
+
+ ret = gr_cl_register(libgfc, brick, path, log_file, log_level, retries)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def scan():
+ ret = libgfc.gf_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def startfresh():
+ ret = libgfc.gf_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ # cleanup tracker
+ startfresh()
+
+ return sorted(changes, key=clsort)
+
+
+def done(clfile):
+ ret = gr_cl_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+ return ret
+
+
+def history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gr_cl_history_changelog(libgfc, changelog_path, start, end,
+ num_parallel, byref(actual_end))
+ if ret == -1:
+ _raise_changelog_err()
+
+ if ret == -2:
+ raise ChangelogHistoryNotAvailable()
+
+ return (ret, actual_end.value)
+
+
+def history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_history_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ return sorted(changes, key=clsort)
+
+
+def history_done(clfile):
+ ret = gr_cl_history_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
diff --git a/geo-replication/syncdaemon/logutils.py b/geo-replication/syncdaemon/logutils.py
new file mode 100644
index 00000000000..01ae7852f23
--- /dev/null
+++ b/geo-replication/syncdaemon/logutils.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import logging
+from logging import Logger, handlers
+import sys
+import time
+
+
+class GLogger(Logger):
+
+ """Logger customizations for gsyncd.
+
+ It implements a log format similar to that of glusterfs.
+ """
+
+ def makeRecord(self, name, level, *a):
+ rv = Logger.makeRecord(self, name, level, *a)
+ rv.nsecs = (rv.created - int(rv.created)) * 1000000
+ fr = sys._getframe(4)
+ callee = fr.f_locals.get('self')
+ if callee:
+ ctx = str(type(callee)).split("'")[1].split('.')[-1]
+ else:
+ ctx = '<top>'
+ if not hasattr(rv, 'funcName'):
+ rv.funcName = fr.f_code.co_name
+ rv.lvlnam = logging.getLevelName(level)[0]
+ rv.ctx = ctx
+ return rv
+
+
+LOGFMT = ("[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s{0}"
+ ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s")
+
+
+def setup_logging(level="INFO", label="", log_file=""):
+ if label:
+ label = "(" + label + ")"
+
+ filename = None
+ stream = None
+ if log_file:
+ if log_file in ('-', '/dev/stderr'):
+ stream = sys.stderr
+ elif log_file == '/dev/stdout':
+ stream = sys.stdout
+ else:
+ filename = log_file
+
+ datefmt = "%Y-%m-%d %H:%M:%S"
+ fmt = LOGFMT.format(label)
+ logging.root = GLogger("root", level)
+ logging.setLoggerClass(GLogger)
+ logging.Formatter.converter = time.gmtime # Log in GMT/UTC time
+ logging.getLogger().handlers = []
+ logging.getLogger().setLevel(level)
+
+ if filename is not None:
+ logging_handler = handlers.WatchedFileHandler(filename)
+ formatter = logging.Formatter(fmt=fmt,
+ datefmt=datefmt)
+ logging_handler.setFormatter(formatter)
+ logging.getLogger().addHandler(logging_handler)
+ else:
+ logging.basicConfig(stream=stream,
+ format=fmt,
+ datefmt=datefmt,
+ level=level)
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 51c1e571e53..9501aeae6b5 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -12,20 +12,23 @@ import os
import sys
import time
import stat
-import json
import logging
-import socket
+import fcntl
import string
import errno
-from errno import ENOENT, ENODATA, EPIPE, EEXIST
+import tarfile
+from errno import ENOENT, ENODATA, EEXIST, EACCES, EAGAIN, ESTALE, EINTR
from threading import Condition, Lock
from datetime import datetime
-from gconf import gconf
-from tempfile import NamedTemporaryFile
-from syncdutils import Thread, GsyncdError, boolify, escape
-from syncdutils import unescape, select, gauxpfx, md5hex, selfkill
-from syncdutils import lstat, errno_wrap
-from syncdutils import NoPurgeTimeAvailable, PartialHistoryAvailable
+
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (Thread, GsyncdError, escape_space_newline,
+ unescape_space_newline, gauxpfx, escape,
+ lstat, errno_wrap, FreeObject, lf, matching_disk_gfid,
+ NoStimeAvailable, PartialHistoryAvailable,
+ host_brick_split)
URXTIME = (-1, 0)
@@ -54,36 +57,75 @@ def _volinfo_hook_relax_foreign(self):
fgn_vi = volinfo_sys[self.KFGN]
if fgn_vi:
expiry = fgn_vi['timeout'] - int(time.time()) + 1
- logging.info('foreign volume info found, waiting %d sec for expiry' %
- expiry)
+ logging.info(lf('foreign volume info found, waiting for expiry',
+ expiry=expiry))
time.sleep(expiry)
volinfo_sys = self.get_sys_volinfo()
return volinfo_sys
+def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
+ # This is used in automatic gfid conflict resolution.
+ # When marked True, it's skipped during re-processing.
+ dct['skip_entry'] = False
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+ dst = dct['stat'] = {}
+ if st:
+ dst['uid'] = st.st_uid
+ dst['gid'] = st.st_gid
+ dst['mode'] = st.st_mode
+ dst['atime'] = st.st_atime
+ dst['mtime'] = st.st_mtime
+ else:
+ dct[k] = ed[k]
+ return dct
+
+
# The API!
def gmaster_builder(excrawl=None):
"""produce the GMaster class variant corresponding
to sync mode"""
this = sys.modules[__name__]
- modemixin = gconf.special_sync_mode
+ modemixin = gconf.get("special-sync-mode")
if not modemixin:
modemixin = 'normal'
- changemixin = 'xsync' if gconf.change_detector == 'xsync' \
- else excrawl or gconf.change_detector
- logging.info('setting up %s change detection mode' % changemixin)
+
+ if gconf.get("change-detector") == 'xsync':
+ changemixin = 'xsync'
+ elif excrawl:
+ changemixin = excrawl
+ else:
+ changemixin = gconf.get("change-detector")
+
+ logging.debug(lf('setting up change detection mode',
+ mode=changemixin))
modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
- sendmarkmixin = boolify(
- gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin
- purgemixin = boolify(
- gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin
- syncengine = boolify(gconf.use_tarssh) and TarSSHEngine or RsyncEngine
+
+ if gconf.get("use-rsync-xattrs"):
+ sendmarkmixin = SendmarkRsyncMixin
+ else:
+ sendmarkmixin = SendmarkNormalMixin
+
+ if gconf.get("ignore-deletes"):
+ purgemixin = PurgeNoopMixin
+ else:
+ purgemixin = PurgeNormalMixin
+
+ if gconf.get("sync-method") == "tarssh":
+ syncengine = TarSSHEngine
+ else:
+ syncengine = RsyncEngine
class _GMaster(crawlmixin, modemixin, sendmarkmixin,
purgemixin, syncengine):
pass
+
return _GMaster
@@ -120,9 +162,9 @@ class NormalMixin(object):
return xt0 >= xt1
def make_xtime_opts(self, is_master, opts):
- if not 'create' in opts:
+ if 'create' not in opts:
opts['create'] = is_master
- if not 'default_xtime' in opts:
+ if 'default_xtime' not in opts:
opts['default_xtime'] = URXTIME
def xtime_low(self, rsc, path, **opts):
@@ -141,7 +183,9 @@ class NormalMixin(object):
xt = _xtime_now()
rsc.server.aggregated.set_xtime(path, self.uuid, xt)
else:
- xt = opts['default_xtime']
+ zero_zero = (0, 0)
+ if xt != zero_zero:
+ xt = opts['default_xtime']
return xt
def keepalive_payload_hook(self, timo, gap):
@@ -153,7 +197,7 @@ class NormalMixin(object):
vi = vi.copy()
vi['timeout'] = int(time.time()) + timo
else:
- # send keep-alives more frequently to
+ # send keep-alive more frequently to
# avoid a delay in announcing our volume info
# to slave if it becomes established in the
# meantime
@@ -168,10 +212,7 @@ class NormalMixin(object):
raise GsyncdError("timestamp corruption for " + path)
def need_sync(self, e, xte, xtrd):
- if self.xsync_upper_limit:
- return xte > xtrd and xte <= self.xsync_upper_limit
- else:
- return xte > xtrd
+ return xte > xtrd
def set_slave_xtime(self, path, mark):
self.slave.server.set_stime(path, self.uuid, mark)
@@ -194,9 +235,9 @@ class RecoverMixin(NormalMixin):
@staticmethod
def make_xtime_opts(is_master, opts):
- if not 'create' in opts:
+ if 'create' not in opts:
opts['create'] = False
- if not 'default_xtime' in opts:
+ if 'default_xtime' not in opts:
opts['default_xtime'] = URXTIME
def keepalive_payload_hook(self, timo, gap):
@@ -239,28 +280,24 @@ class TarSSHEngine(object):
"""
def a_syncdata(self, files):
- self.unlinked_gfids = []
- logging.debug('files: %s' % (files))
- self.current_files_skipped_count = 0
- del self.skipped_gfid_list[:]
+ logging.debug(lf("Files", files=files))
+
for f in files:
pb = self.syncer.add(f)
def regjob(se, xte, pb):
rv = pb.wait()
if rv[0]:
- logging.debug('synced ' + se)
+ logging.debug(lf('synced', file=se))
return True
else:
# stat check for file presence
st = lstat(se)
if isinstance(st, int):
- self.unlinked_gfids.append(se)
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
return True
- logging.warn('tar+ssh: %s [errcode: %d]' % (se, rv[1]))
- se_list = se.split('/');
- self.current_files_skipped_count += 1
- self.skipped_gfid_list.append(se_list[1])
+
self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
def syncdata_wait(self):
@@ -277,31 +314,25 @@ class RsyncEngine(object):
"""Sync engine that uses rsync(1) for data transfers"""
def a_syncdata(self, files):
- self.unlinked_gfids = []
- logging.debug('files: %s' % (files))
- self.current_files_skipped_count = 0
- del self.skipped_gfid_list[:]
+ logging.debug(lf("files", files=files))
+
for f in files:
- logging.debug('candidate for syncing %s' % f)
+ logging.debug(lf('candidate for syncing', file=f))
pb = self.syncer.add(f)
def regjob(se, xte, pb):
rv = pb.wait()
if rv[0]:
- logging.debug('synced ' + se)
+ logging.debug(lf('synced', file=se))
return True
else:
- if rv[1] in [23, 24]:
- # stat to check if the file exist
- st = lstat(se)
- if isinstance(st, int):
- # file got unlinked in the interim
- self.unlinked_gfids.append(se)
- return True
- logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
- se_list = se.split('/');
- self.current_files_skipped_count += 1
- self.skipped_gfid_list.append(se_list[1])
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
+ return True
+
self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
def syncdata_wait(self):
@@ -345,6 +376,18 @@ class GMasterCommon(object):
if self.volinfo:
return self.volinfo['volume_mark']
+ def get_entry_stime(self):
+ data = self.slave.server.entry_stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
+ def get_data_stime(self):
+ data = self.slave.server.stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
def xtime(self, path, *a, **opts):
"""get amended xtime
@@ -361,56 +404,13 @@ class GMasterCommon(object):
self.make_xtime_opts(rsc == self.master, opts)
return self.xtime_low(rsc, path, **opts)
- def get_initial_crawl_data(self):
- # while persisting only 'files_syncd' is non-zero, rest of
- # the stats are nulls. lets keep it that way in case they
- # are needed to be used some day...
- default_data = {'files_syncd': 0,
- 'files_remaining': 0,
- 'bytes_remaining': 0,
- 'purges_remaining': 0,
- 'total_files_skipped': 0}
- if getattr(gconf, 'state_detail_file', None):
- try:
- with open(gconf.state_detail_file, 'r+') as f:
- loaded_data = json.load(f)
- diff_data = set(default_data) - set(loaded_data)
- if len(diff_data):
- for i in diff_data:
- loaded_data[i] = default_data[i]
- return loaded_data
- except IOError:
- logging.warn('Creating new gconf.state_detail_file.')
- # Create file with initial data
- try:
- with open(gconf.state_detail_file, 'wb') as f:
- json.dump(default_data, f)
- return default_data
- except:
- raise
- return default_data
-
- def update_crawl_data(self):
- if getattr(gconf, 'state_detail_file', None):
- try:
- same_dir = os.path.dirname(gconf.state_detail_file)
- with NamedTemporaryFile(dir=same_dir, delete=False) as tmp:
- json.dump(self.total_crawl_stats, tmp)
- tmp.flush()
- os.fsync(tmp.fileno())
- os.rename(tmp.name, gconf.state_detail_file)
- except (IOError, OSError):
- raise
-
def __init__(self, master, slave):
self.master = master
self.slave = slave
self.jobtab = {}
- if boolify(gconf.use_tarssh):
- logging.info("using 'tar over ssh' as the sync engine")
- self.syncer = Syncer(slave, self.slave.tarssh)
+ if gconf.get("sync-method") == "tarssh":
+ self.syncer = Syncer(slave, self.slave.tarssh, [2])
else:
- logging.info("using 'rsync' as the sync engine")
# partial transfer (cf. rsync(1)), that's normal
self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
# crawls vs. turns:
@@ -424,24 +424,20 @@ class GMasterCommon(object):
# 0.
self.crawls = 0
self.turns = 0
- self.total_turns = int(gconf.turns)
+ self.total_turns = rconf.turns
self.crawl_start = datetime.now()
self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0}
- self.total_crawl_stats = None
self.start = None
self.change_seen = None
# the actual volinfo we make use of
self.volinfo = None
self.terminate = False
self.sleep_interval = 1
- self.checkpoint_thread = None
- self.current_files_skipped_count = 0
- self.skipped_gfid_list = []
- self.unlinked_gfids = []
+ self.unlinked_gfids = set()
def init_keep_alive(cls):
"""start the keep-alive thread """
- timo = int(gconf.timeout or 0)
+ timo = gconf.get("slave-timeout", 0)
if timo > 0:
def keep_alive():
while True:
@@ -451,14 +447,69 @@ class GMasterCommon(object):
t = Thread(target=keep_alive)
t.start()
- def should_crawl(cls):
- return gconf.glusterd_uuid in cls.master.server.node_uuid()
+ def mgmt_lock(self):
+
+ """Take management volume lock """
+ if rconf.mgmt_lock_fd:
+ try:
+ fcntl.lockf(rconf.mgmt_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ return True
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ return False
+ raise
+
+ fd = None
+ bname = str(self.uuid) + "_" + rconf.args.slave_id + "_subvol_" \
+ + str(rconf.args.subvol_num) + ".lock"
+ mgmt_lock_dir = os.path.join(gconf.get("meta-volume-mnt"), "geo-rep")
+ path = os.path.join(mgmt_lock_dir, bname)
+ logging.debug(lf("lock file path", path=path))
+ try:
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ logging.info("Creating geo-rep directory in meta volume...")
+ try:
+ os.makedirs(mgmt_lock_dir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ pass
+ else:
+ raise
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ else:
+ raise
+ try:
+ fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ # Save latest FD for future use
+ rconf.mgmt_lock_fd = fd
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ rconf.mgmt_lock_fd = fd
+ return False
+ raise
+
+ return True
+
+ def should_crawl(self):
+ if not gconf.get("use-meta-volume"):
+ return rconf.args.local_node_id in self.master.server.node_uuid()
+
+ if not os.path.ismount(gconf.get("meta-volume-mnt")):
+ logging.error("Meta-volume is not mounted. Worker Exiting...")
+ sys.exit(1)
+ return self.mgmt_lock()
def register(self):
self.register()
- def crawlwrap(self, oneshot=False, no_stime_update=False,
- register_time=None):
+ def crawlwrap(self, oneshot=False, register_time=None):
if oneshot:
# it's important to do this during the oneshot crawl as
# for a passive gsyncd (ie. in a replicate scenario)
@@ -467,38 +518,35 @@ class GMasterCommon(object):
# If crawlwrap is called when partial history available,
# then it sets register_time which is the time when geo-rep
- # worker registerd to changelog consumption. Since nsec is
- # not considered in register time, their are chances of skipping
- # changes detection in xsync crawl. Add 1 sec to upper_limit.
- # This limit will be reset when crawlwrap is called again.
- self.xsync_upper_limit = None
+ # worker registered to changelog consumption. Since nsec is
+ # not considered in register time, there are chances of skipping
+ # changes detection in xsync crawl. This limit will be reset when
+ # crawlwrap is called again.
+ self.live_changelog_start_time = None
if register_time:
- self.xsync_upper_limit = (register_time + 1, 0)
+ self.live_changelog_start_time = (register_time, 0)
# no need to maintain volinfo state machine.
# in a cascading setup, each geo-replication session is
# independent (ie. 'volume-mark' and 'xtime' are not
- # propogated). This is beacuse the slave's xtime is now
+ # propagated). This is because the slave's xtime is now
# stored on the master itself. 'volume-mark' just identifies
# that we are in a cascading setup and need to enable
# 'geo-replication.ignore-pid-check' option.
volinfo_sys = self.volinfo_hook()
self.volinfo = volinfo_sys[self.KNAT]
inter_master = volinfo_sys[self.KFGN]
- logging.info("%s master with volume id %s ..." %
- (inter_master and "intermediate" or "primary",
- self.uuid))
- gconf.configinterface.set('volume_id', self.uuid)
+ logging.debug("%s master with volume id %s ..." %
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
+ rconf.volume_id = self.uuid
if self.volinfo:
if self.volinfo['retval']:
- logging.warn("master cluster's info may not be valid %d" %
- self.volinfo['retval'])
- self.start_checkpoint_thread()
+ logging.warn(lf("master cluster's info may not be valid",
+ error=self.volinfo['retval']))
else:
raise GsyncdError("master volinfo unavailable")
- self.total_crawl_stats = self.get_initial_crawl_data()
self.lastreport['time'] = time.time()
- logging.info('crawl interval: %d seconds' % self.sleep_interval)
t0 = time.time()
crawl = self.should_crawl()
@@ -509,61 +557,56 @@ class GMasterCommon(object):
self.start = time.time()
should_display_info = self.start - self.lastreport['time'] >= 60
if should_display_info:
- logging.info("%d crawls, %d turns",
- self.crawls - self.lastreport['crawls'],
- self.turns - self.lastreport['turns'])
+ logging.debug("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
self.lastreport.update(crawls=self.crawls,
turns=self.turns,
time=self.start)
t1 = time.time()
- if int(t1 - t0) >= int(gconf.replica_failover_interval):
+ if int(t1 - t0) >= gconf.get("replica-failover-interval"):
crawl = self.should_crawl()
t0 = t1
self.update_worker_remote_node()
if not crawl:
- self.update_worker_health("Passive")
+ self.status.set_passive()
# bring up _this_ brick to the cluster stime
# which is min of cluster (but max of the replicas)
brick_stime = self.xtime('.', self.slave)
cluster_stime = self.master.server.aggregated.stime_mnt(
- '.', '.'.join([str(self.uuid), str(gconf.slave_id)]))
- logging.debug("Cluster stime: %s | Brick stime: %s" %
- (repr(cluster_stime), repr(brick_stime)))
+ '.', '.'.join([str(self.uuid), rconf.args.slave_id]))
+ logging.debug(lf("Crawl info",
+ cluster_stime=cluster_stime,
+ brick_stime=brick_stime))
+
if not isinstance(cluster_stime, int):
if brick_stime < cluster_stime:
self.slave.server.set_stime(
self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime)
+ self.upd_stime(cluster_stime)
+ # Purge all changelogs available in processing dir
+ # less than cluster_stime
+ proc_dir = os.path.join(self.tempdir,
+ ".processing")
+
+ if os.path.exists(proc_dir):
+ to_purge = [f for f in os.listdir(proc_dir)
+ if (f.startswith("CHANGELOG.") and
+ int(f.split('.')[-1]) <
+ cluster_stime[0])]
+ for f in to_purge:
+ os.remove(os.path.join(proc_dir, f))
+
time.sleep(5)
continue
- self.update_worker_health("Active")
- self.crawl(no_stime_update=no_stime_update)
+
+ self.status.set_active()
+ self.crawl()
+
if oneshot:
return
time.sleep(self.sleep_interval)
- @classmethod
- def _checkpt_param(cls, chkpt, prm, xtimish=True):
- """use config backend to lookup a parameter belonging to
- checkpoint @chkpt"""
- cprm = gconf.configinterface.get_realtime('checkpoint_' + prm)
- if not cprm:
- return
- chkpt_mapped, val = cprm.split(':', 1)
- if unescape(chkpt_mapped) != chkpt:
- return
- if xtimish:
- val = cls.deserialize_xtime(val)
- return val
-
- @classmethod
- def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
- """use config backend to store a parameter associated
- with checkpoint @chkpt"""
- if xtimish:
- val = cls.serialize_xtime(val)
- gconf.configinterface.set(
- 'checkpoint_' + prm, "%s:%s" % (escape(chkpt), val))
-
@staticmethod
def humantime(*tpair):
"""format xtime-like (sec, nsec) pair to human readable format"""
@@ -592,116 +635,6 @@ class GMasterCommon(object):
string.zfill(m, 2), string.zfill(s, 2))
return date
- def checkpt_service(self, chan, chkpt):
- """checkpoint service loop
-
- monitor and verify checkpoint status for @chkpt, and listen
- for incoming requests for whom we serve a pretty-formatted
- status report"""
- while True:
- chkpt = gconf.configinterface.get_realtime("checkpoint")
- if not chkpt:
- gconf.configinterface.delete("checkpoint_completed")
- gconf.configinterface.delete("checkpoint_target")
- # dummy loop for the case when there is no checkpt set
- select([chan], [], [])
- conn, _ = chan.accept()
- conn.send('\0')
- conn.close()
- continue
-
- checkpt_tgt = self._checkpt_param(chkpt, 'target')
- if not checkpt_tgt:
- checkpt_tgt = self.xtime('.')
- if isinstance(checkpt_tgt, int):
- raise GsyncdError("master root directory is "
- "unaccessible (%s)",
- os.strerror(checkpt_tgt))
- self._set_checkpt_param(chkpt, 'target', checkpt_tgt)
- logging.debug("checkpoint target %s has been determined "
- "for checkpoint %s" %
- (repr(checkpt_tgt), chkpt))
-
- # check if the label is 'now'
- chkpt_lbl = chkpt
- try:
- x1, x2 = chkpt.split(':')
- if x1 == 'now':
- chkpt_lbl = "as of " + self.humantime(x2)
- except:
- pass
- completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
- if completed:
- completed = tuple(int(x) for x in completed.split('.'))
- s, _, _ = select([chan], [], [], (not completed) and 5 or None)
- # either request made and we re-check to not
- # give back stale data, or we still hunting for completion
- if (self.native_xtime(checkpt_tgt) and (
- self.native_xtime(checkpt_tgt) < self.volmark)):
- # indexing has been reset since setting the checkpoint
- status = "is invalid"
- else:
- xtr = self.xtime('.', self.slave)
- if isinstance(xtr, int):
- raise GsyncdError("slave root directory is "
- "unaccessible (%s)",
- os.strerror(xtr))
- ncompleted = self.xtime_geq(xtr, checkpt_tgt)
- if completed and not ncompleted: # stale data
- logging.warn("completion time %s for checkpoint %s "
- "became stale" %
- (self.humantime(*completed), chkpt))
- completed = None
- gconf.configinterface.delete('checkpoint_completed')
- if ncompleted and not completed: # just reaching completion
- completed = "%.6f" % time.time()
- self._set_checkpt_param(
- chkpt, 'completed', completed, xtimish=False)
- completed = tuple(int(x) for x in completed.split('.'))
- logging.info("checkpoint %s completed" % chkpt)
- status = completed and \
- "completed at " + self.humantime(completed[0]) or \
- "not reached yet"
- if s:
- conn = None
- try:
- conn, _ = chan.accept()
- try:
- conn.send("checkpoint %s is %s\0" %
- (chkpt_lbl, status))
- except:
- exc = sys.exc_info()[1]
- if ((isinstance(exc, OSError) or isinstance(
- exc, IOError)) and exc.errno == EPIPE):
- logging.debug('checkpoint client disconnected')
- else:
- raise
- finally:
- if conn:
- conn.close()
-
- def start_checkpoint_thread(self):
- """prepare and start checkpoint service"""
- if self.checkpoint_thread or not (
- getattr(gconf, 'state_socket_unencoded', None) and getattr(
- gconf, 'socketdir', None)
- ):
- return
- chan = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- state_socket = os.path.join(
- gconf.socketdir, md5hex(gconf.state_socket_unencoded) + ".socket")
- try:
- os.unlink(state_socket)
- except:
- if sys.exc_info()[0] == OSError:
- pass
- chan.bind(state_socket)
- chan.listen(1)
- chkpt = gconf.configinterface.get_realtime("checkpoint")
- t = Thread(target=self.checkpt_service, args=(chan, chkpt))
- t.start()
- self.checkpoint_thread = t
-
def add_job(self, path, label, job, *a, **kw):
"""insert @job function to job table at @path with @label"""
if self.jobtab.get(path) is None:
@@ -717,7 +650,7 @@ class GMasterCommon(object):
"""perform jobs registered for @path
Reset jobtab entry for @path,
- determine success as the conjuction of
+ determine success as the conjunction of
success of all the jobs. In case of
success, call .sendmark on @path
"""
@@ -742,10 +675,12 @@ class GMasterCommon(object):
class XCrawlMetadata(object):
- def __init__(self, st_uid, st_gid, st_mode):
+ def __init__(self, st_uid, st_gid, st_mode, st_atime, st_mtime):
self.st_uid = int(st_uid)
self.st_gid = int(st_gid)
self.st_mode = int(st_mode)
+ self.st_atime = float(st_atime)
+ self.st_mtime = float(st_mtime)
class GMasterChangelogMixin(GMasterCommon):
@@ -755,6 +690,7 @@ class GMasterChangelogMixin(GMasterCommon):
# index for change type and entry
IDX_START = 0
IDX_END = 2
+ UNLINK_ENTRY = 2
POS_GFID = 0
POS_TYPE = 1
@@ -764,30 +700,291 @@ class GMasterChangelogMixin(GMasterCommon):
TYPE_GFID = "D "
TYPE_ENTRY = "E "
- # flat directory heirarchy for gfid based access
- FLAT_DIR_HIERARCHY = '.'
+ MAX_EF_RETRIES = 10
+ MAX_OE_RETRIES = 10
- # maximum retries per changelog before giving up
- MAX_RETRIES = 10
+ # flat directory hierarchy for gfid based access
+ FLAT_DIR_HIERARCHY = '.'
- CHANGELOG_LOG_LEVEL = 9
CHANGELOG_CONN_RETRIES = 5
- def fallback_xsync(self):
- logging.info('falling back to xsync mode')
- gconf.configinterface.set('change-detector', 'xsync')
- selfkill()
+ def init_fop_batch_stats(self):
+ self.batch_stats = {
+ "CREATE": 0,
+ "MKNOD": 0,
+ "UNLINK": 0,
+ "MKDIR": 0,
+ "RMDIR": 0,
+ "LINK": 0,
+ "SYMLINK": 0,
+ "RENAME": 0,
+ "SETATTR": 0,
+ "SETXATTR": 0,
+ "XATTROP": 0,
+ "DATA": 0,
+ "ENTRY_SYNC_TIME": 0,
+ "META_SYNC_TIME": 0,
+ "DATA_START_TIME": 0
+ }
+
+ def update_fop_batch_stats(self, ty):
+ if ty in ['FSETXATTR']:
+ ty = 'SETXATTR'
+ self.batch_stats[ty] = self.batch_stats.get(ty, 0) + 1
+
+ def archive_and_purge_changelogs(self, changelogs):
+ # Creates tar file instead of tar.gz, since changelogs will
+ # be appended to existing tar. archive name is
+ # archive_<YEAR><MONTH>.tar
+ archive_name = "archive_%s.tar" % datetime.today().strftime(
+ gconf.get("changelog-archive-format"))
+
+ try:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "a")
+ except tarfile.ReadError:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "w")
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ tar.add(os.path.join(self.processed_changelogs_dir, f),
+ arcname=os.path.basename(f))
+ except:
+ exc = sys.exc_info()[1]
+ if ((isinstance(exc, OSError) or
+ isinstance(exc, IOError)) and exc.errno == ENOENT):
+ continue
+ else:
+ tar.close()
+ raise
+ tar.close()
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ os.remove(os.path.join(self.processed_changelogs_dir, f))
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ continue
+ else:
+ raise
def setup_working_dir(self):
- workdir = os.path.join(gconf.working_dir, md5hex(gconf.local_path))
+ workdir = os.path.join(gconf.get("working-dir"),
+ escape(rconf.args.local_path))
logging.debug('changelog working dir %s' % workdir)
return workdir
- def get_purge_time(self):
- purge_time = self.xtime('.', self.slave)
- if isinstance(purge_time, int):
- purge_time = None
- return purge_time
+ def log_failures(self, failures, entry_key, gfid_prefix, log_prefix):
+ num_failures = 0
+ for failure in failures:
+ st = lstat(os.path.join(gfid_prefix, failure[0][entry_key]))
+ if not isinstance(st, int):
+ num_failures += 1
+ logging.error(lf('%s FAILED' % log_prefix,
+ data=failure))
+ if failure[0]['op'] == 'MKDIR':
+ raise GsyncdError("The above directory failed to sync."
+ " Please fix it to proceed further.")
+
+ self.status.inc_value("failures", num_failures)
+
+ def fix_possible_entry_failures(self, failures, retry_count, entries):
+ pfx = gauxpfx()
+ fix_entry_ops = []
+ failures1 = []
+ remove_gfids = set()
+ for failure in failures:
+ if failure[2]['name_mismatch']:
+ pbname = failure[2]['slave_entry']
+ elif failure[2]['dst']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ op = failure[0]['op']
+ # name exists but gfid is different
+ if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']:
+ slave_gfid = failure[2]['slave_gfid']
+ st = lstat(os.path.join(pfx, slave_gfid))
+ # Takes care of scenarios with no hardlinks
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug(lf('Entry not present on master. Fixing gfid '
+ 'mismatch in slave. Deleting the entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ # Add deletion to fix_entry_ops list
+ if failure[2]['slave_isdir']:
+ fix_entry_ops.append(
+ edct('RMDIR',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ else:
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ remove_gfids.add(slave_gfid)
+ if op in ['RENAME']:
+ # If renamed gfid doesn't exists on master, remove
+ # rename entry and unlink src on slave
+ st = lstat(os.path.join(pfx, failure[0]['gfid']))
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug("Unlink source %s" % repr(failure))
+ remove_gfids.add(failure[0]['gfid'])
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # Takes care of scenarios of hardlinks/renames on master
+ elif not isinstance(st, int):
+ if matching_disk_gfid(slave_gfid, pbname):
+ # Safe to ignore the failure as master contains same
+ # file with same gfid. Remove entry from entries list
+ logging.debug(lf('Fixing gfid mismatch in slave. '
+ ' Safe to ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ remove_gfids.add(failure[0]['gfid'])
+ if op == 'RENAME':
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # The file exists on master but with different name.
+ # Probably renamed and got missed during xsync crawl.
+ elif failure[2]['slave_isdir']:
+ realpath = os.readlink(os.path.join(
+ rconf.args.local_path,
+ ".glusterfs",
+ slave_gfid[0:2],
+ slave_gfid[2:4],
+ slave_gfid))
+ dst_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ src_entry = pbname
+ logging.debug(lf('Fixing dir name/gfid mismatch in '
+ 'slave', retry_count=retry_count,
+ entry=repr(failure)))
+ if src_entry == dst_entry:
+ # Safe to ignore the failure as master contains
+ # same directory as in slave with same gfid.
+ # Remove the failure entry from entries list
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Safe to ignore, '
+ 'take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ rename_dict = edct('RENAME', gfid=slave_gfid,
+ entry=src_entry,
+ entry1=dst_entry, stat=st,
+ link=None)
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Renaming',
+ retry_count=retry_count,
+ entry=repr(rename_dict)))
+ fix_entry_ops.append(rename_dict)
+ else:
+ # A hardlink file exists with different name or
+ # renamed file exists and we are sure from
+ # matching_disk_gfid check that the entry doesn't
+ # exist with same gfid so we can safely delete on slave
+ logging.debug(lf('Fixing file gfid mismatch in slave. '
+ 'Hardlink/Rename Case. Deleting entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ elif failure[1] == ENOENT:
+ if op in ['RENAME']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ pargfid = pbname.split('/')[1]
+ st = lstat(os.path.join(pfx, pargfid))
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+ logging.debug(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ logging.debug(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ realpath = os.readlink(os.path.join(rconf.args.local_path,
+ ".glusterfs",
+ pargfid[0:2],
+ pargfid[2:4],
+ pargfid))
+ dir_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ fix_entry_ops.append(
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+
+ logging.debug("remove_gfids: %s" % repr(remove_gfids))
+ if remove_gfids:
+ for e in entries:
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
+ and e['gfid'] in remove_gfids:
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
+ e['skip_entry'] = True
+
+ if fix_entry_ops:
+ # Process deletions of entries whose gfids are mismatched
+ failures1 = self.slave.server.entry_ops(fix_entry_ops)
+
+ return (failures1, fix_entry_ops)
+
+ def handle_entry_failures(self, failures, entries):
+ retries = 0
+ pending_failures = False
+ failures1 = []
+ failures2 = []
+ entry_ops1 = []
+ entry_ops2 = []
+
+ if failures:
+ pending_failures = True
+ failures1 = failures
+ entry_ops1 = entries
+
+ while pending_failures and retries < self.MAX_EF_RETRIES:
+ retries += 1
+ (failures2, entry_ops2) = self.fix_possible_entry_failures(
+ failures1, retries, entry_ops1)
+ if not failures2:
+ pending_failures = False
+ logging.info(lf('Successfully fixed entry ops with gfid '
+ 'mismatch', retry_count=retries))
+ else:
+ pending_failures = True
+ failures1 = failures2
+ entry_ops1 = entry_ops2
+
+ if pending_failures:
+ for failure in failures1:
+ logging.error("Failed to fix entry ops %s", repr(failure))
def process_change(self, change, done, retry):
pfx = gauxpfx()
@@ -796,8 +993,28 @@ class GMasterChangelogMixin(GMasterCommon):
meta_gfid = set()
datas = set()
- # basic crawl stats: files and bytes
- files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []}
+ change_ts = change.split(".")[-1]
+
+ # Ignore entry ops which are already processed in Changelog modes
+ ignore_entry_ops = False
+ entry_stime = None
+ data_stime = None
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime = self.get_entry_stime()
+ data_stime = self.get_data_stime()
+
+ if entry_stime is not None and data_stime is not None:
+ # if entry_stime is not None but data_stime > entry_stime
+ # This situation is caused by the stime update of Passive worker
+ # Consider data_stime in this case.
+ if data_stime[0] > entry_stime[0]:
+ entry_stime = data_stime
+
+ # Compare the entry_stime with changelog file suffix
+ # if changelog time is less than entry_stime then ignore
+ if int(change_ts) <= entry_stime[0]:
+ ignore_entry_ops = True
+
try:
f = open(change, "r")
clist = f.readlines()
@@ -805,34 +1022,27 @@ class GMasterChangelogMixin(GMasterCommon):
except IOError:
raise
- def edct(op, **ed):
- dct = {}
- dct['op'] = op
- for k in ed:
- if k == 'stat':
- st = ed[k]
- dst = dct['stat'] = {}
- if st:
- dst['uid'] = st.st_uid
- dst['gid'] = st.st_gid
- dst['mode'] = st.st_mode
- else:
- dct[k] = ed[k]
- return dct
-
- # entry counts (not purges)
- def entry_update():
- files_pending['count'] += 1
-
- # purge count
- def purge_update():
- files_pending['purge'] += 1
-
for e in clist:
e = e.strip()
et = e[self.IDX_START:self.IDX_END] # entry type
ec = e[self.IDX_END:].split(' ') # rest of the bits
+ # skip ENTRY operation if hot tier brick
+ if self.name == 'live_changelog' or \
+ self.name == 'history_changelog':
+ if rconf.args.is_hottier and et == self.TYPE_ENTRY:
+ logging.debug(lf('skip ENTRY op if hot tier brick',
+ op=ec[self.POS_TYPE]))
+ continue
+
+ # Data and Meta operations are decided while parsing
+ # UNLINK/RMDIR/MKNOD except that case ignore all the other
+ # entry ops if ignore_entry_ops is True.
+ # UNLINK/RMDIR/MKNOD entry_ops are ignored in the end
+ if ignore_entry_ops and et == self.TYPE_ENTRY and \
+ ec[self.POS_TYPE] not in ["UNLINK", "RMDIR", "MKNOD"]:
+ continue
+
if et == self.TYPE_ENTRY:
# extract information according to the type of
# the entry operation. create(), mkdir() and mknod()
@@ -840,56 +1050,128 @@ class GMasterChangelogMixin(GMasterCommon):
# itself, so no need to stat()...
ty = ec[self.POS_TYPE]
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
+
# PARGFID/BNAME
- en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1]))
# GFID of the entry
gfid = ec[self.POS_GFID]
if ty in ['UNLINK', 'RMDIR']:
+ # The index of PARGFID/BNAME for UNLINK, RMDIR
+ # is no more the last index. It varies based on
+ # changelog.capture-del-path is enabled or not.
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.UNLINK_ENTRY]))
+
# Remove from DATA list, so that rsync will
# not fail
pt = os.path.join(pfx, ec[0])
- if pt in datas:
+ st = lstat(pt)
+ if pt in datas and isinstance(st, int):
+ # file got unlinked, May be historical Changelog
datas.remove(pt)
- purge_update()
- entries.append(edct(ty, gfid=gfid, entry=en))
+ if ty in ['RMDIR'] and not isinstance(st, int):
+ logging.info(lf('Ignoring rmdir. Directory present in '
+ 'master', gfid=gfid, pgfid_bname=en))
+ continue
+
+ if not gconf.get("ignore-deletes"):
+ if not ignore_entry_ops:
+ entries.append(edct(ty, gfid=gfid, entry=en))
elif ty in ['CREATE', 'MKDIR', 'MKNOD']:
- entry_update()
- # stat information present in the changelog itself
+ # Special case: record mknod as link
+ if ty in ['MKNOD']:
+ mode = int(ec[2])
+ if mode & 0o1000:
+ # Avoid stat'ing the file as it
+ # may be deleted in the interim
+ st = FreeObject(st_mode=int(ec[2]),
+ st_uid=int(ec[3]),
+ st_gid=int(ec[4]),
+ st_atime=0,
+ st_mtime=0)
+
+ # So, it may be deleted, but still we are
+ # append LINK? Because, the file will be
+ # CREATED if source not exists.
+ entries.append(edct('LINK', stat=st, entry=en,
+ gfid=gfid))
+
+ # Here, we have the assumption that only
+ # tier-gfid.linkto causes this mknod. Add data
+ datas.add(os.path.join(pfx, ec[0]))
+ continue
+
+ # stat info. present in the changelog itself
entries.append(edct(ty, gfid=gfid, entry=en,
- mode=int(ec[2]),
- uid=int(ec[3]), gid=int(ec[4])))
+ mode=int(ec[2]),
+ uid=int(ec[3]), gid=int(ec[4])))
elif ty == "RENAME":
go = os.path.join(pfx, gfid)
st = lstat(go)
if isinstance(st, int):
st = {}
- entry_update()
- e1 = unescape(os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+
+ e1 = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en,
- stat=st))
+ stat=st, link=rl))
+ # If src doesn't exist while doing rename, destination
+ # is created. If data is not followed by rename, this
+ # remains zero byte file on slave. Hence add data entry
+ # for renames
+ datas.add(os.path.join(pfx, gfid))
else:
# stat() to get mode and other information
+ if not matching_disk_gfid(gfid, en):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
go = os.path.join(pfx, gfid)
st = lstat(go)
if isinstance(st, int):
- logging.debug('file %s got purged in the interim' % go)
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
continue
if ty == 'LINK':
- entry_update()
- entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
+ link=rl))
+ # If src doesn't exist while doing link, destination
+ # is created based on file type. If data is not
+ # followed by link, this remains zero byte file on
+ # slave. Hence add data entry for links
+ if rl is None:
+ datas.add(os.path.join(pfx, gfid))
elif ty == 'SYMLINK':
- rl = errno_wrap(os.readlink, [en], [ENOENT])
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
if isinstance(rl, int):
continue
- entry_update()
+
entries.append(
edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
else:
- logging.warn('ignoring %s [op %s]' % (gfid, ty))
+ logging.warn(lf('ignoring op',
+ gfid=gfid,
+ type=ty))
elif et == self.TYPE_GFID:
# If self.unlinked_gfids is available, then that means it is
# retrying the changelog second time. Do not add the GFID's
@@ -900,6 +1182,7 @@ class GMasterChangelogMixin(GMasterCommon):
else:
datas.add(os.path.join(pfx, ec[0]))
elif et == self.TYPE_META:
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
if ec[1] == 'SETATTR': # only setattr's for now...
if len(ec) == 5:
# In xsync crawl, we already have stat data
@@ -907,17 +1190,74 @@ class GMasterChangelogMixin(GMasterCommon):
meta_gfid.add((os.path.join(pfx, ec[0]),
XCrawlMetadata(st_uid=ec[2],
st_gid=ec[3],
- st_mode=ec[4])))
+ st_mode=ec[4],
+ st_atime=ec[5],
+ st_mtime=ec[6])))
else:
meta_gfid.add((os.path.join(pfx, ec[0]), ))
+ elif ec[1] in ['SETXATTR', 'XATTROP', 'FXATTROP']:
+ # To sync xattr/acls use rsync/tar, --xattrs and --acls
+ # switch to rsync and tar
+ if not gconf.get("sync-method") == "tarssh" and \
+ (gconf.get("sync-xattrs") or gconf.get("sync-acls")):
+ datas.add(os.path.join(pfx, ec[0]))
else:
- logging.warn('got invalid changelog type: %s' % (et))
+ logging.warn(lf('got invalid fop type',
+ type=et))
logging.debug('entries: %s' % repr(entries))
- if not retry:
- self.update_worker_cumilitive_status(files_pending)
+
+ # Increment counters for Status
+ self.files_in_batch += len(datas)
+ self.status.inc_value("data", len(datas))
+
+ self.batch_stats["DATA"] += self.files_in_batch - \
+ self.batch_stats["SETXATTR"] - \
+ self.batch_stats["XATTROP"]
+
+ entry_start_time = time.time()
# sync namespace
- if entries:
- self.slave.server.entry_ops(entries)
+ if entries and not ignore_entry_ops:
+ # Increment counters for Status
+ self.status.inc_value("entry", len(entries))
+
+ failures = self.slave.server.entry_ops(entries)
+
+ if gconf.get("gfid-conflict-resolution"):
+ count = 0
+ if failures:
+ logging.info(lf('Entry ops failed with gfid mismatch',
+ count=len(failures)))
+ while failures and count < self.MAX_OE_RETRIES:
+ count += 1
+ self.handle_entry_failures(failures, entries)
+ logging.info(lf('Retry original entries', count=count))
+ failures = self.slave.server.entry_ops(entries)
+ if not failures:
+ logging.info("Successfully fixed all entry ops with "
+ "gfid mismatch")
+ break
+
+ self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+ self.status.dec_value("entry", len(entries))
+
+ # Update Entry stime in Brick Root only in case of Changelog mode
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime_to_update = (int(change_ts) - 1, 0)
+ self.upd_entry_stime(entry_stime_to_update)
+ self.status.set_field("last_synced_entry",
+ entry_stime_to_update[0])
+
+ self.batch_stats["ENTRY_SYNC_TIME"] += time.time() - entry_start_time
+
+ if ignore_entry_ops:
+ # Book keeping, to show in logs the range of Changelogs skipped
+ self.num_skipped_entry_changelogs += 1
+ if self.skipped_entry_changelogs_first is None:
+ self.skipped_entry_changelogs_first = change_ts
+
+ self.skipped_entry_changelogs_last = change_ts
+
+ meta_start_time = time.time()
# sync metadata
if meta_gfid:
meta_entries = []
@@ -927,23 +1267,41 @@ class GMasterChangelogMixin(GMasterCommon):
else:
st = lstat(go[0])
if isinstance(st, int):
- logging.debug('file %s got purged in the interim' % go[0])
+ logging.debug(lf('file got purged in the interim',
+ file=go[0]))
continue
meta_entries.append(edct('META', go=go[0], stat=st))
if meta_entries:
- self.slave.server.meta_ops(meta_entries)
+ self.status.inc_value("meta", len(meta_entries))
+ failures = self.slave.server.meta_ops(meta_entries)
+ self.log_failures(failures, 'go', '', 'META')
+ self.status.dec_value("meta", len(meta_entries))
+
+ self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time
+
+ if self.batch_stats["DATA_START_TIME"] == 0:
+ self.batch_stats["DATA_START_TIME"] = time.time()
+
# sync data
if datas:
self.a_syncdata(datas)
+ self.datas_in_batch.update(datas)
def process(self, changes, done=1):
tries = 0
retry = False
+ self.unlinked_gfids = set()
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+ # Error log disabled till the last round
+ self.syncer.disable_errorlog()
+ self.skipped_entry_changelogs_first = None
+ self.skipped_entry_changelogs_last = None
+ self.num_skipped_entry_changelogs = 0
+ self.batch_start_time = time.time()
+ self.init_fop_batch_stats()
while True:
- self.skipped_gfid_list = []
- self.current_files_skipped_count = 0
-
# first, fire all changelog transfers in parallel. entry and
# metadata are performed synchronously, therefore in serial.
# However at the end of each changelog, data is synchronized
@@ -951,12 +1309,27 @@ class GMasterChangelogMixin(GMasterCommon):
# entries/metadata of that changelog but happens in parallel
# with data of other changelogs.
- for change in changes:
- logging.debug('processing change %s' % change)
- self.process_change(change, done, retry)
- if not retry:
- # number of changelogs processed in the batch
- self.turns += 1
+ if retry:
+ if tries == (gconf.get("max-rsync-retries") - 1):
+ # Enable Error logging if it is last retry
+ self.syncer.enable_errorlog()
+
+ # Remove Unlinked GFIDs from Queue
+ for unlinked_gfid in self.unlinked_gfids:
+ if unlinked_gfid in self.datas_in_batch:
+ self.datas_in_batch.remove(unlinked_gfid)
+
+ # Retry only Sync. Do not retry entry ops
+ if self.datas_in_batch:
+ self.a_syncdata(self.datas_in_batch)
+ else:
+ for change in changes:
+ logging.debug(lf('processing change',
+ changelog=change))
+ self.process_change(change, done, retry)
+ if not retry:
+ # number of changelogs processed in the batch
+ self.turns += 1
# Now we wait for all the data transfers fired off in the above
# step to complete. Note that this is not ideal either. Ideally
@@ -973,36 +1346,43 @@ class GMasterChangelogMixin(GMasterCommon):
# update the slave's time with the timestamp of the _last_
# changelog file time suffix. Since, the changelog prefix time
# is the time when the changelog was rolled over, introduce a
- # tolerence of 1 second to counter the small delta b/w the
+ # tolerance of 1 second to counter the small delta b/w the
# marker update and gettimeofday().
# NOTE: this is only for changelog mode, not xsync.
# @change is the last changelog (therefore max time for this batch)
if self.syncdata_wait():
- self.unlinked_gfids = []
+ self.unlinked_gfids = set()
if done:
xtl = (int(change.split('.')[-1]) - 1, 0)
self.upd_stime(xtl)
- map(self.changelog_done_func, changes)
- self.update_worker_files_syncd()
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
+
+ # Reset Data counter after sync
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
break
# We do not know which changelog transfer failed, retry everything.
retry = True
tries += 1
- if tries == self.MAX_RETRIES:
- logging.warn('changelogs %s could not be processed - '
- 'moving on...' %
- ' '.join(map(os.path.basename, changes)))
- self.update_worker_total_files_skipped(
- self.current_files_skipped_count)
- logging.warn('SKIPPED GFID = %s' %
- ','.join(self.skipped_gfid_list))
- self.update_worker_files_syncd()
+ if tries == gconf.get("max-rsync-retries"):
+ logging.error(lf('changelogs could not be processed '
+ 'completely - moving on...',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset data counter on failure
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+
if done:
xtl = (int(change.split('.')[-1]) - 1, 0)
self.upd_stime(xtl)
- map(self.changelog_done_func, changes)
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
break
# it's either entry_ops() or Rsync that failed to do it's
# job. Mostly it's entry_ops() [which currently has a problem
@@ -1011,234 +1391,187 @@ class GMasterChangelogMixin(GMasterCommon):
# entry_ops() that failed... so we retry the _whole_ changelog
# again.
# TODO: remove entry retries when it's gets fixed.
- logging.warn('incomplete sync, retrying changelogs: %s' %
- ' '.join(map(os.path.basename, changes)))
+ logging.warn(lf('incomplete sync, retrying changelogs',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset the Data counter before Retry
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.init_fop_batch_stats()
time.sleep(0.5)
+ # Log the Skipped Entry ops range if any
+ if self.skipped_entry_changelogs_first is not None and \
+ self.skipped_entry_changelogs_last is not None:
+ logging.info(lf("Skipping already processed entry ops",
+ from_changelog=self.skipped_entry_changelogs_first,
+ to_changelog=self.skipped_entry_changelogs_last,
+ num_changelogs=self.num_skipped_entry_changelogs))
+
+ # Log Current batch details
+ if changes:
+ logging.info(
+ lf("Entry Time Taken",
+ UNL=self.batch_stats["UNLINK"],
+ RMD=self.batch_stats["RMDIR"],
+ CRE=self.batch_stats["CREATE"],
+ MKN=self.batch_stats["MKNOD"],
+ MKD=self.batch_stats["MKDIR"],
+ REN=self.batch_stats["RENAME"],
+ LIN=self.batch_stats["LINK"],
+ SYM=self.batch_stats["SYMLINK"],
+ duration="%.4f" % self.batch_stats["ENTRY_SYNC_TIME"]))
+
+ logging.info(
+ lf("Data/Metadata Time Taken",
+ SETA=self.batch_stats["SETATTR"],
+ meta_duration="%.4f" % self.batch_stats["META_SYNC_TIME"],
+ SETX=self.batch_stats["SETXATTR"],
+ XATT=self.batch_stats["XATTROP"],
+ DATA=self.batch_stats["DATA"],
+ data_duration="%.4f" % (
+ time.time() - self.batch_stats["DATA_START_TIME"])))
+
+ logging.info(
+ lf("Batch Completed",
+ mode=self.name,
+ duration="%.4f" % (time.time() - self.batch_start_time),
+ changelog_start=changes[0].split(".")[-1],
+ changelog_end=changes[-1].split(".")[-1],
+ num_changelogs=len(changes),
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
+
+ def upd_entry_stime(self, stime):
+ self.slave.server.set_entry_stime(self.FLAT_DIR_HIERARCHY,
+ self.uuid,
+ stime)
+
def upd_stime(self, stime, path=None):
if not path:
path = self.FLAT_DIR_HIERARCHY
if not stime == URXTIME:
self.sendmark(path, stime)
- def get_worker_status_file(self):
- file_name = gconf.local_path + '.status'
- file_name = file_name.replace("/", "_")
- worker_status_file = gconf.georep_session_working_dir + file_name
- return worker_status_file
-
- def update_worker_status(self, key, value):
- default_data = {"remote_node": "N/A",
- "worker status": "Not Started",
- "crawl status": "N/A",
- "files_syncd": 0,
- "files_remaining": 0,
- "bytes_remaining": 0,
- "purges_remaining": 0,
- "total_files_skipped": 0}
- worker_status_file = self.get_worker_status_file()
- try:
- with open(worker_status_file, 'r+') as f:
- loaded_data = json.load(f)
- loaded_data[key] = value
- os.ftruncate(f.fileno(), 0)
- os.lseek(f.fileno(), 0, os.SEEK_SET)
- json.dump(loaded_data, f)
- f.flush()
- os.fsync(f.fileno())
- except (IOError, OSError, ValueError):
- logging.info('Creating new %s' % worker_status_file)
- try:
- with open(worker_status_file, 'wb') as f:
- default_data[key] = value
- json.dump(default_data, f)
- f.flush()
- os.fsync(f.fileno())
- except:
- raise
+ # Update last_synced_time in status file based on stime
+ # only update stime if stime xattr set to Brick root
+ if path == self.FLAT_DIR_HIERARCHY:
+ chkpt_time = gconf.getr("checkpoint")
+ checkpoint_time = 0
+ if chkpt_time is not None:
+ checkpoint_time = int(chkpt_time)
- def update_worker_cumilitive_status(self, files_pending):
- default_data = {"remote_node": "N/A",
- "worker status": "Not Started",
- "crawl status": "N/A",
- "files_syncd": 0,
- "files_remaining": 0,
- "bytes_remaining": 0,
- "purges_remaining": 0,
- "total_files_skipped": 0}
- worker_status_file = self.get_worker_status_file()
- try:
- with open(worker_status_file, 'r+') as f:
- loaded_data = json.load(f)
- loaded_data['files_remaining'] = files_pending['count']
- loaded_data['bytes_remaining'] = files_pending['bytes']
- loaded_data['purges_remaining'] = files_pending['purge']
- os.ftruncate(f.fileno(), 0)
- os.lseek(f.fileno(), 0, os.SEEK_SET)
- json.dump(loaded_data, f)
- f.flush()
- os.fsync(f.fileno())
- except (IOError, OSError, ValueError):
- logging.info('Creating new %s' % worker_status_file)
- try:
- with open(worker_status_file, 'wb') as f:
- default_data['files_remaining'] = files_pending['count']
- default_data['bytes_remaining'] = files_pending['bytes']
- default_data['purges_remaining'] = files_pending['purge']
- json.dump(default_data, f)
- f.flush()
- os.fsync(f.fileno())
- except:
- raise
+ self.status.set_last_synced(stime, checkpoint_time)
def update_worker_remote_node(self):
- node = sys.argv[-1]
- node = node.split("@")[-1]
- remote_node_ip = node.split(":")[0]
- remote_node_vol = node.split(":")[3]
- remote_node = remote_node_ip + '::' + remote_node_vol
- self.update_worker_status('remote_node', remote_node)
-
- def update_worker_health(self, state):
- self.update_worker_status('worker status', state)
-
- def update_worker_crawl_status(self, state):
- self.update_worker_status('crawl status', state)
-
- def update_worker_files_syncd(self):
- default_data = {"remote_node": "N/A",
- "worker status": "Not Started",
- "crawl status": "N/A",
- "files_syncd": 0,
- "files_remaining": 0,
- "bytes_remaining": 0,
- "purges_remaining": 0,
- "total_files_skipped": 0}
- worker_status_file = self.get_worker_status_file()
- try:
- with open(worker_status_file, 'r+') as f:
- loaded_data = json.load(f)
- loaded_data['files_syncd'] += loaded_data['files_remaining']
- loaded_data['files_remaining'] = 0
- loaded_data['bytes_remaining'] = 0
- loaded_data['purges_remaining'] = 0
- os.ftruncate(f.fileno(), 0)
- os.lseek(f.fileno(), 0, os.SEEK_SET)
- json.dump(loaded_data, f)
- f.flush()
- os.fsync(f.fileno())
- except (IOError, OSError, ValueError):
- logging.info('Creating new %s' % worker_status_file)
- try:
- with open(worker_status_file, 'wb') as f:
- json.dump(default_data, f)
- f.flush()
- os.fsync(f.fileno())
- except:
- raise
+ node = rconf.args.resource_remote
+ node_data = node.split("@")
+ node = node_data[-1]
+ remote_node_ip, _ = host_brick_split(node)
+ self.status.set_slave_node(remote_node_ip)
+
+ def changelogs_batch_process(self, changes):
+ changelogs_batches = []
+ current_size = 0
+ for c in changes:
+ si = os.lstat(c).st_size
+ if (si + current_size) > gconf.get("changelog-batch-size"):
+ # Create new batch if single Changelog file greater than
+ # Max Size! or current batch size exceeds Max size
+ changelogs_batches.append([c])
+ current_size = si
+ else:
+ # Append to last batch, if No batches available Create one
+ current_size += si
+ if not changelogs_batches:
+ changelogs_batches.append([c])
+ else:
+ changelogs_batches[-1].append(c)
- def update_worker_files_remaining(self, state):
- self.update_worker_status('files_remaining', state)
-
- def update_worker_bytes_remaining(self, state):
- self.update_worker_status('bytes_remaining', state)
-
- def update_worker_purges_remaining(self, state):
- self.update_worker_status('purges_remaining', state)
-
- def update_worker_total_files_skipped(self, value):
- default_data = {"remote_node": "N/A",
- "worker status": "Not Started",
- "crawl status": "N/A",
- "files_syncd": 0,
- "files_remaining": 0,
- "bytes_remaining": 0,
- "purges_remaining": 0,
- "total_files_skipped": 0}
- worker_status_file = self.get_worker_status_file()
- try:
- with open(worker_status_file, 'r+') as f:
- loaded_data = json.load(f)
- loaded_data['total_files_skipped'] = value
- loaded_data['files_remaining'] -= value
- os.ftruncate(f.fileno(), 0)
- os.lseek(f.fileno(), 0, os.SEEK_SET)
- json.dump(loaded_data, f)
- f.flush()
- os.fsync(f.fileno())
- except (IOError, OSError, ValueError):
- logging.info('Creating new %s' % worker_status_file)
- try:
- with open(worker_status_file, 'wb') as f:
- default_data['total_files_skipped'] = value
- json.dump(default_data, f)
- f.flush()
- os.fsync(f.fileno())
- except:
- raise
+ for batch in changelogs_batches:
+ logging.debug(lf('processing changes',
+ batch=batch))
+ self.process(batch)
- def crawl(self, no_stime_update=False):
- self.update_worker_crawl_status("Changelog Crawl")
+ def crawl(self):
+ self.status.set_worker_crawl_status("Changelog Crawl")
changes = []
# get stime (from the brick) and purge changelogs
# that are _historical_ to that time.
- purge_time = self.get_purge_time()
+ data_stime = self.get_data_stime()
- self.changelog_agent.scan()
+ libgfchangelog.scan()
self.crawls += 1
- changes = self.changelog_agent.getchanges()
+ changes = libgfchangelog.getchanges()
if changes:
- if purge_time:
- logging.info("slave's time: %s" % repr(purge_time))
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
processed = [x for x in changes
- if int(x.split('.')[-1]) < purge_time[0]]
+ if int(x.split('.')[-1]) < data_stime[0]]
for pr in processed:
- logging.info(
- 'skipping already processed change: %s...' %
- os.path.basename(pr))
+ logging.debug(
+ lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
self.changelog_done_func(pr)
changes.remove(pr)
+ self.archive_and_purge_changelogs(processed)
- if changes:
- logging.debug('processing changes %s' % repr(changes))
- self.process(changes)
+ self.changelogs_batch_process(changes)
- def register(self, register_time, changelog_agent):
- self.changelog_agent = changelog_agent
- self.sleep_interval = int(gconf.change_interval)
- self.changelog_done_func = self.changelog_agent.done
+ def register(self, register_time, status):
+ self.sleep_interval = gconf.get("change-interval")
+ self.changelog_done_func = libgfchangelog.done
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".processed")
+ self.name = "live_changelog"
+ self.status = status
class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
- def register(self, register_time, changelog_agent):
- self.changelog_agent = changelog_agent
+ def register(self, register_time, status):
self.changelog_register_time = register_time
self.history_crawl_start_time = register_time
- self.changelog_done_func = self.changelog_agent.history_done
+ self.changelog_done_func = libgfchangelog.history_done
self.history_turns = 0
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".history/.processed")
+ self.name = "history_changelog"
+ self.status = status
- def crawl(self, no_stime_update=False):
+ def crawl(self):
self.history_turns += 1
- self.update_worker_crawl_status("History Crawl")
- purge_time = self.get_purge_time()
+ self.status.set_worker_crawl_status("History Crawl")
+ data_stime = self.get_data_stime()
+
+ end_time = int(time.time())
+
+ #as start of historical crawl marks Geo-rep worker restart
+ if gconf.get("ignore-deletes"):
+ logging.info(lf('ignore-deletes config option is set',
+ stime=data_stime))
- logging.info('starting history crawl... turns: %s, stime: %s'
- % (self.history_turns, repr(purge_time)))
+ logging.info(lf('starting history crawl',
+ turns=self.history_turns,
+ stime=data_stime,
+ etime=end_time,
+ entry_stime=self.get_entry_stime()))
- if not purge_time or purge_time == URXTIME:
- logging.info("stime not available, abandoning history crawl")
- raise NoPurgeTimeAvailable()
+ if not data_stime or data_stime == URXTIME:
+ raise NoStimeAvailable()
# Changelogs backend path is hardcoded as
# <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
# location then consuming history will not work(Known issue as of now)
- changelog_path = os.path.join(gconf.local_path,
+ changelog_path = os.path.join(rconf.args.local_path,
".glusterfs/changelogs")
- ret, actual_end = self.changelog_agent.history(
+ ret, actual_end = libgfchangelog.history_changelog(
changelog_path,
- purge_time[0],
- self.changelog_register_time,
- int(gconf.sync_jobs))
+ data_stime[0],
+ end_time,
+ gconf.get("sync-jobs"))
# scan followed by getchanges till scan returns zero.
# history_scan() is blocking call, till it gets the number
@@ -1246,29 +1579,30 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
# to be processed. returns positive value as number of changelogs
# to be processed, which will be fetched using
# history_getchanges()
- while self.changelog_agent.history_scan() > 0:
+ while libgfchangelog.history_scan() > 0:
self.crawls += 1
- changes = self.changelog_agent.history_getchanges()
+ changes = libgfchangelog.history_getchanges()
if changes:
- if purge_time:
- logging.info("slave's time: %s" % repr(purge_time))
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
processed = [x for x in changes
- if int(x.split('.')[-1]) < purge_time[0]]
+ if int(x.split('.')[-1]) < data_stime[0]]
for pr in processed:
- logging.info('skipping already processed change: '
- '%s...' % os.path.basename(pr))
+ logging.debug(lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
self.changelog_done_func(pr)
changes.remove(pr)
- if changes:
- logging.debug('processing changes %s' % repr(changes))
- self.process(changes)
+ self.changelogs_batch_process(changes)
history_turn_time = int(time.time()) - self.history_crawl_start_time
- logging.info('finished history crawl syncing, endtime: %s, stime: %s'
- % (actual_end, repr(self.get_purge_time())))
+ logging.info(lf('finished history crawl',
+ endtime=actual_end,
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
# If TS returned from history_changelog is < register_time
# then FS crawl may be required, since history is only available
@@ -1282,7 +1616,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
self.history_crawl_start_time = int(time.time())
self.crawl()
else:
- # This exeption will be catched in resource.py and
+ # This exception will be caught in resource.py and
# fallback to xsync for the small gap.
raise PartialHistoryAvailable(str(actual_end))
@@ -1291,7 +1625,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
"""
This crawl needs to be xtime based (as of now
- it's not. this is beacuse we generate CHANGELOG
+ it's not. this is because we generate CHANGELOG
file during each crawl which is then processed
by process_change()).
For now it's used as a one-shot initial sync
@@ -1301,14 +1635,18 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
XSYNC_MAX_ENTRIES = 1 << 13
- def register(self, register_time=None, changelog_agent=None):
+ def register(self, register_time=None, status=None):
+ self.status = status
self.counter = 0
self.comlist = []
self.stimes = []
self.sleep_interval = 60
self.tempdir = self.setup_working_dir()
+ logging.info(lf('Working dir',
+ path=self.tempdir))
self.tempdir = os.path.join(self.tempdir, 'xsync')
- logging.info('xsync temp directory: %s' % self.tempdir)
+ self.processed_changelogs_dir = self.tempdir
+ self.name = "xsync"
try:
os.makedirs(self.tempdir)
except OSError:
@@ -1317,50 +1655,52 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
pass
else:
raise
+ # Purge stale unprocessed xsync changelogs
+ for f in os.listdir(self.tempdir):
+ if f.startswith("XSYNC-CHANGELOG"):
+ os.remove(os.path.join(self.tempdir, f))
+
- def crawl(self, no_stime_update=False):
+ def crawl(self):
"""
event dispatcher thread
this thread dispatches either changelog or synchronizes stime.
- additionally terminates itself on recieving a 'finale' event
+ additionally terminates itself on receiving a 'finale' event
"""
def Xsyncer():
self.Xcrawl()
t = Thread(target=Xsyncer)
t.start()
- logging.info('starting hybrid crawl..., stime: %s'
- % repr(self.get_purge_time()))
- self.update_worker_crawl_status("Hybrid Crawl")
+ logging.info(lf('starting hybrid crawl',
+ stime=self.get_data_stime()))
+ self.status.set_worker_crawl_status("Hybrid Crawl")
while True:
try:
item = self.comlist.pop(0)
if item[0] == 'finale':
- logging.info('finished hybrid crawl syncing, stime: %s'
- % repr(self.get_purge_time()))
+ logging.info(lf('finished hybrid crawl',
+ stime=self.get_data_stime()))
break
elif item[0] == 'xsync':
- logging.info('processing xsync changelog %s' % (item[1]))
+ logging.info(lf('processing xsync changelog',
+ path=item[1]))
self.process([item[1]], 0)
+ self.archive_and_purge_changelogs([item[1]])
elif item[0] == 'stime':
- if not no_stime_update:
- # xsync is started after running history but if
- # history actual end time is less than register time
- # then if we update stime, live changelog processing
- # will skip the changelogs for which TS is less than
- # stime. During this deletes and renames are not
- # propogated. By not setting stime live changelog will
- # start processing from the register time. Since we
- # have xsync_upper_limit their will not be much
- # overlap/redo of changelogs.
- logging.debug('setting slave time: %s' % repr(item[1]))
- self.upd_stime(item[1][1], item[1][0])
+ logging.debug(lf('setting slave time',
+ time=item[1]))
+ self.upd_stime(item[1][1], item[1][0])
else:
- logging.warn('unknown tuple in comlist (%s)' % repr(item))
+ logging.warn(lf('unknown tuple in comlist',
+ entry=item))
except IndexError:
time.sleep(1)
def write_entry_change(self, prefix, data=[]):
+ if not getattr(self, "fh", None):
+ self.open()
+
self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
def open(self):
@@ -1372,7 +1712,11 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
raise
def close(self):
- self.fh.close()
+ if getattr(self, "fh", None):
+ self.fh.flush()
+ os.fsync(self.fh.fileno())
+ self.fh.close()
+ self.fh = None
def fname(self):
return self.xsync_change
@@ -1383,11 +1727,11 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
def sync_xsync(self, last):
"""schedule a processing of changelog"""
self.close()
- self.put('xsync', self.fname())
+ if self.counter > 0:
+ self.put('xsync', self.fname())
self.counter = 0
if not last:
time.sleep(1) # make sure changelogs are 1 second apart
- self.open()
def sync_stime(self, stime=None, last=False):
"""schedule a stime synchronization"""
@@ -1409,7 +1753,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
def is_sticky(self, path, mo):
"""check for DHTs linkto sticky bit file"""
sticky = False
- if mo & 01000:
+ if mo & 0o1000:
sticky = self.master.server.linkto_check(path)
return sticky
@@ -1421,15 +1765,15 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
the filesystem tree, but set after directory synchronization.
"""
if path == '.':
- self.open()
self.crawls += 1
if not xtr_root:
# get the root stime and use it for all comparisons
xtr_root = self.xtime('.', self.slave)
if isinstance(xtr_root, int):
if xtr_root != ENOENT:
- logging.warn("slave cluster not returning the "
- "correct xtime for root (%d)" % xtr_root)
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for root",
+ error=xtr_root))
xtr_root = self.minus_infinity
xtl = self.xtime(path)
if isinstance(xtl, int):
@@ -1437,10 +1781,15 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
xtr = self.xtime(path, self.slave)
if isinstance(xtr, int):
if xtr != ENOENT:
- logging.warn("slave cluster not returning the "
- "correct xtime for %s (%d)" % (path, xtr))
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for dir",
+ path=path,
+ error=xtr))
xtr = self.minus_infinity
xtr = max(xtr, xtr_root)
+ zero_zero = (0, 0)
+ if xtr_root == zero_zero:
+ xtr = self.minus_infinity
if not self.need_sync(path, xtl, xtr):
if path == '.':
self.sync_done([(path, xtl)], True)
@@ -1450,48 +1799,65 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
dem = self.master.server.entries(path)
pargfid = self.master.server.gfid(path)
if isinstance(pargfid, int):
- logging.warn('skipping directory %s' % (path))
+ logging.warn(lf('skipping directory',
+ path=path))
for e in dem:
bname = e
e = os.path.join(path, e)
xte = self.xtime(e)
if isinstance(xte, int):
- logging.warn("irregular xtime for %s: %s" %
- (e, errno.errorcode[xte]))
+ logging.warn(lf("irregular xtime",
+ path=e,
+ error=errno.errorcode[xte]))
continue
if not self.need_sync(e, xte, xtr):
continue
st = self.master.server.lstat(e)
if isinstance(st, int):
- logging.warn('%s got purged in the interim ...' % e)
+ logging.warn(lf('got purged in the interim',
+ path=e))
continue
if self.is_sticky(e, st.st_mode):
- logging.debug('ignoring sticky bit file %s' % e)
+ logging.debug(lf('ignoring sticky bit file',
+ path=e))
continue
gfid = self.master.server.gfid(e)
if isinstance(gfid, int):
- logging.warn('skipping entry %s..' % e)
+ logging.warn(lf('skipping entry',
+ path=e))
continue
mo = st.st_mode
- self.counter += 1
+ self.counter += 1 if ((stat.S_ISDIR(mo) or
+ stat.S_ISLNK(mo) or
+ stat.S_ISREG(mo))) else 0
if self.counter == self.XSYNC_MAX_ENTRIES:
self.sync_done(self.stimes, False)
self.stimes = []
if stat.S_ISDIR(mo):
- self.write_entry_change("E", [gfid, 'MKDIR', str(mo), str(
- st.st_uid), str(st.st_gid), escape(os.path.join(pargfid,
- bname))])
+ self.write_entry_change("E",
+ [gfid, 'MKDIR', str(mo),
+ str(0), str(0), escape_space_newline(
+ os.path.join(pargfid, bname))])
self.write_entry_change("M", [gfid, "SETATTR", str(st.st_uid),
- str(st.st_gid), str(st.st_mode)])
+ str(st.st_gid), str(st.st_mode),
+ str(st.st_atime),
+ str(st.st_mtime)])
self.Xcrawl(e, xtr_root)
stime_to_update = xte
- if self.xsync_upper_limit:
- stime_to_update = min(self.xsync_upper_limit, xte)
+ # Live Changelog Start time indicates that from that time
+ # onwards Live changelogs are available. If we update stime
+ # greater than live_changelog_start time then Geo-rep will
+ # skip those changelogs as already processed. But Xsync
+ # actually failed to sync the deletes and Renames. Update
+ # stime as min(Live_changelogs_time, Actual_stime) When it
+ # switches to Changelog mode, it syncs Deletes and Renames.
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xte)
self.stimes.append((e, stime_to_update))
elif stat.S_ISLNK(mo):
self.write_entry_change(
- "E", [gfid, 'SYMLINK', escape(os.path.join(pargfid,
- bname))])
+ "E", [gfid, 'SYMLINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
elif stat.S_ISREG(mo):
nlink = st.st_nlink
nlink -= 1 # fixup backend stat link count
@@ -1501,19 +1867,19 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
if nlink == 1:
self.write_entry_change("E",
[gfid, 'MKNOD', str(mo),
- str(st.st_uid),
- str(st.st_gid),
- escape(os.path.join(
- pargfid, bname))])
+ str(0), str(0),
+ escape_space_newline(
+ os.path.join(
+ pargfid, bname))])
else:
self.write_entry_change(
- "E", [gfid, 'LINK', escape(os.path.join(pargfid,
- bname))])
+ "E", [gfid, 'LINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
self.write_entry_change("D", [gfid])
if path == '.':
stime_to_update = xtl
- if self.xsync_upper_limit:
- stime_to_update = min(self.xsync_upper_limit, xtl)
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xtl)
self.stimes.append((path, stime_to_update))
self.sync_done(self.stimes, True)
@@ -1599,16 +1965,17 @@ class Syncer(object):
def __init__(self, slave, sync_engine, resilient_errnos=[]):
"""spawn worker threads"""
+ self.log_err = False
self.slave = slave
self.lock = Lock()
self.pb = PostBox()
self.sync_engine = sync_engine
self.errnos_ok = resilient_errnos
- for i in range(int(gconf.sync_jobs)):
- t = Thread(target=self.syncjob)
+ for i in range(gconf.get("sync-jobs")):
+ t = Thread(target=self.syncjob, args=(i + 1, ))
t.start()
- def syncjob(self):
+ def syncjob(self, job_id):
"""the life of a worker"""
while True:
pb = None
@@ -1621,7 +1988,14 @@ class Syncer(object):
break
time.sleep(0.5)
pb.close()
- po = self.sync_engine(pb)
+ start = time.time()
+ po = self.sync_engine(pb, self.log_err)
+ logging.info(lf("Sync Time Taken",
+ job=job_id,
+ num_files=len(pb),
+ return_code=po.returncode,
+ duration="%.4f" % (time.time() - start)))
+
if po.returncode == 0:
ret = (True, 0)
elif po.returncode in self.errnos_ok:
@@ -1638,3 +2012,9 @@ class Syncer(object):
return pb
except BoxClosedErr:
pass
+
+ def enable_errorlog(self):
+ self.log_err = True
+
+ def disable_errorlog(self):
+ self.log_err = False
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index 3e0360332bd..6aa7b9dfc99 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -13,91 +13,49 @@ import sys
import time
import signal
import logging
-import uuid
import xml.etree.ElementTree as XET
-from subprocess import PIPE
-from resource import Popen, FILE, GLUSTER, SSH
from threading import Lock
-import re
+from errno import ECHILD, ESRCH
import random
-from gconf import gconf
-from syncdutils import update_file, select, waitpid
-from syncdutils import set_term_handler, is_host_local, GsyncdError
-from syncdutils import escape, Thread, finalize, memoize
+from resource import SSH
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (select, waitpid, errno_wrap, lf, grabpidfile,
+ set_term_handler, GsyncdError,
+ Thread, finalize, Volinfo, VolinfoFromGconf,
+ gf_event, EVENT_GEOREP_FAULTY, get_up_nodes,
+ unshare_propagation_supported)
+from gsyncdstatus import GeorepStatus, set_monitor_status
+import py2py3
+from py2py3 import pipe
ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
-def get_slave_bricks_status(host, vol):
- po = Popen(['gluster', '--xml', '--remote-host=' + host,
- 'volume', 'status', vol, "detail"],
- stdout=PIPE, stderr=PIPE)
- vix = po.stdout.read()
- po.wait()
- po.terminate_geterr()
- vi = XET.fromstring(vix)
- if vi.find('opRet').text != '0':
- logging.info("Unable to get list of up nodes of %s, "
- "returning empty list: %s" %
- (vol, vi.find('opErrstr').text))
- return []
-
- up_hosts = set()
-
- try:
- for el in vi.findall('volStatus/volumes/volume/node'):
- if el.find('status').text == '1':
- up_hosts.add(el.find('hostname').text)
- except (ParseError, AttributeError, ValueError) as e:
- logging.info("Parsing failed to get list of up nodes of %s, "
- "returning empty list: %s" % (vol, e))
-
- return list(up_hosts)
-
-
-class Volinfo(object):
-
- def __init__(self, vol, host='localhost', prelude=[]):
- po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host,
- 'volume', 'info', vol],
- stdout=PIPE, stderr=PIPE)
- vix = po.stdout.read()
- po.wait()
- po.terminate_geterr()
- vi = XET.fromstring(vix)
- if vi.find('opRet').text != '0':
- if prelude:
- via = '(via %s) ' % prelude.join(' ')
- else:
- via = ' '
- raise GsyncdError('getting volume info of %s%s '
- 'failed with errorcode %s',
- (vol, via, vi.find('opErrno').text))
- self.tree = vi
- self.volume = vol
- self.host = host
-
- def get(self, elem):
- return self.tree.findall('.//' + elem)
-
- @property
- @memoize
- def bricks(self):
- def bparse(b):
- host, dirp = b.text.split(':', 2)
- return {'host': host, 'dir': dirp}
- return [bparse(b) for b in self.get('brick')]
-
- @property
- @memoize
- def uuid(self):
- ids = self.get('id')
- if len(ids) != 1:
- raise GsyncdError("volume info of %s obtained from %s: "
- "ambiguous uuid",
- self.volume, self.host)
- return ids[0].text
+def get_subvol_num(brick_idx, vol, hot):
+ tier = vol.is_tier()
+ disperse_count = vol.disperse_count(tier, hot)
+ replica_count = vol.replica_count(tier, hot)
+ distribute_count = vol.distribution_count(tier, hot)
+ gconf.setconfig("master-distribution-count", distribute_count)
+
+ if (tier and not hot):
+ brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
+
+ subvol_size = disperse_count if disperse_count > 0 else replica_count
+ cnt = int((brick_idx + 1) / subvol_size)
+ rem = (brick_idx + 1) % subvol_size
+ if rem > 0:
+ cnt = cnt + 1
+
+ if (tier and hot):
+ return "hot_" + str(cnt)
+ elif (tier and not hot):
+ return "cold_" + str(cnt)
+ else:
+ return str(cnt)
class Monitor(object):
@@ -105,41 +63,16 @@ class Monitor(object):
"""class which spawns and manages gsyncd workers"""
ST_INIT = 'Initializing...'
- ST_STABLE = 'Stable'
- ST_FAULTY = 'faulty'
+ ST_STARTED = 'Started'
+ ST_STABLE = 'Active'
+ ST_FAULTY = 'Faulty'
ST_INCON = 'inconsistent'
_ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
def __init__(self):
self.lock = Lock()
self.state = {}
-
- def set_state(self, state, w=None):
- """set the state that can be used by external agents
- like glusterd for status reporting"""
- computestate = lambda: self.state and self._ST_ORD[
- max(self._ST_ORD.index(s) for s in self.state.values())]
- if w:
- self.lock.acquire()
- old_state = computestate()
- self.state[w] = state
- state = computestate()
- self.lock.release()
- if state != old_state:
- self.set_state(state)
- else:
- if getattr(gconf, 'state_file', None):
- # If previous state is paused, suffix the
- # new state with '(Paused)'
- try:
- with open(gconf.state_file, "r") as f:
- content = f.read()
- if "paused" in content.lower():
- state = state + '(Paused)'
- except IOError:
- pass
- logging.info('new state: %s' % state)
- update_file(gconf.state_file, lambda f: f.write(state + '\n'))
+ self.status = {}
@staticmethod
def terminate():
@@ -147,9 +80,10 @@ class Monitor(object):
# standard handler
set_term_handler(lambda *a: set_term_handler())
# give a chance to graceful exit
- os.kill(-os.getpid(), signal.SIGTERM)
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH])
- def monitor(self, w, argv, cpids, agents, slave_vol, slave_host):
+ def monitor(self, w, argv, cpids, slave_vol, slave_host, master,
+ suuid, slavenodes):
"""the monitor loop
Basic logic is a blantantly simple blunt heuristics:
@@ -168,19 +102,32 @@ class Monitor(object):
blown worker blows up on EPIPE if the net goes down,
due to the keep-alive thread)
"""
-
- self.set_state(self.ST_INIT, w)
-
+ if not self.status.get(w[0]['dir'], None):
+ self.status[w[0]['dir']] = GeorepStatus(gconf.get("state-file"),
+ w[0]['host'],
+ w[0]['dir'],
+ w[0]['uuid'],
+ master,
+ "%s::%s" % (slave_host,
+ slave_vol))
ret = 0
def nwait(p, o=0):
- p2, r = waitpid(p, o)
- if not p2:
- return
- return r
+ try:
+ p2, r = waitpid(p, o)
+ if not p2:
+ return
+ return r
+ except OSError as e:
+ # no child process, this happens if the child process
+ # already died and has been cleaned up
+ if e.errno == ECHILD:
+ return -1
+ else:
+ raise
def exit_signalled(s):
- """ child teminated due to receipt of SIGUSR1 """
+ """ child terminated due to receipt of SIGUSR1 """
return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
def exit_status(s):
@@ -188,72 +135,76 @@ class Monitor(object):
return os.WEXITSTATUS(s)
return 1
- conn_timeout = int(gconf.connection_timeout)
+ conn_timeout = gconf.get("connection-timeout")
while ret in (0, 1):
- remote_host = w[1]
+ remote_user, remote_host = w[1][0].split("@")
+ remote_id = w[1][1]
# Check the status of the connected slave node
# If the connected slave node is down then try to connect to
# different up node.
- m = re.match("(ssh|gluster|file):\/\/(.+)@([^:]+):(.+)",
- remote_host)
- if m:
- current_slave_host = m.group(3)
- slave_up_hosts = get_slave_bricks_status(
- slave_host, slave_vol)
-
- if current_slave_host not in slave_up_hosts:
- if len(slave_up_hosts) > 0:
- remote_host = "%s://%s@%s:%s" % (m.group(1),
- m.group(2),
- random.choice(
- slave_up_hosts),
- m.group(4))
-
- # Spawn the worker and agent in lock to avoid fd leak
+ current_slave_host = remote_host
+ slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port"))
+
+ if (current_slave_host, remote_id) not in slave_up_hosts:
+ if len(slave_up_hosts) > 0:
+ remote_new = random.choice(slave_up_hosts)
+ remote_host = "%s@%s" % (remote_user, remote_new[0])
+ remote_id = remote_new[1]
+
+ # Spawn the worker in lock to avoid fd leak
self.lock.acquire()
- logging.info('-' * conn_timeout)
- logging.info('starting gsyncd worker')
-
- # Couple of pipe pairs for RPC communication b/w
- # worker and changelog agent.
-
- # read/write end for agent
- (ra, ww) = os.pipe()
- # read/write end for worker
- (rw, wa) = os.pipe()
-
- # spawn the agent process
- apid = os.fork()
- if apid == 0:
- os.execv(sys.executable, argv + ['--local-path', w[0],
- '--agent',
- '--rpc-fd',
- ','.join([str(ra), str(wa),
- str(rw), str(ww)])])
- pr, pw = os.pipe()
+ self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
+ logging.info(lf('starting gsyncd worker',
+ brick=w[0]['dir'],
+ slave_node=remote_host))
+
+ pr, pw = pipe()
cpid = os.fork()
if cpid == 0:
os.close(pr)
- os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
- '--local-path', w[0],
- '--local-id',
- '.' + escape(w[0]),
- '--rpc-fd',
- ','.join([str(rw), str(ww),
- str(ra), str(wa)]),
- '--resource-remote',
- remote_host])
+
+ args_to_worker = argv + [
+ 'worker',
+ rconf.args.master,
+ rconf.args.slave,
+ '--feedback-fd', str(pw),
+ '--local-path', w[0]['dir'],
+ '--local-node', w[0]['host'],
+ '--local-node-id', w[0]['uuid'],
+ '--slave-id', suuid,
+ '--subvol-num', str(w[2]),
+ '--resource-remote', remote_host,
+ '--resource-remote-id', remote_id
+ ]
+
+ if rconf.args.config_file is not None:
+ args_to_worker += ['-c', rconf.args.config_file]
+
+ if w[3]:
+ args_to_worker.append("--is-hottier")
+
+ if rconf.args.debug:
+ args_to_worker.append("--debug")
+
+ access_mount = gconf.get("access-mount")
+ if access_mount:
+ os.execv(sys.executable, args_to_worker)
+ else:
+ if unshare_propagation_supported():
+ logging.debug("Worker would mount volume privately")
+ unshare_cmd = ['unshare', '-m', '--propagation',
+ 'private']
+ cmd = unshare_cmd + args_to_worker
+ os.execvp("unshare", cmd)
+ else:
+ logging.debug("Mount is not private. It would be lazy"
+ " umounted")
+ os.execv(sys.executable, args_to_worker)
cpids.add(cpid)
- agents.add(apid)
os.close(pw)
- # close all RPC pipes in monitor
- os.close(ra)
- os.close(wa)
- os.close(rw)
- os.close(ww)
self.lock.release()
t0 = time.time()
@@ -262,126 +213,183 @@ class Monitor(object):
if so:
ret = nwait(cpid, os.WNOHANG)
+
if ret is not None:
- logging.info("worker(%s) died before establishing "
- "connection" % w[0])
- nwait(apid) #wait for agent
+ logging.info(lf("worker died before establishing "
+ "connection",
+ brick=w[0]['dir']))
else:
- logging.debug("worker(%s) connected" % w[0])
+ logging.debug("worker(%s) connected" % w[0]['dir'])
while time.time() < t0 + conn_timeout:
ret = nwait(cpid, os.WNOHANG)
+
if ret is not None:
- logging.info("worker(%s) died in startup "
- "phase" % w[0])
- nwait(apid) #wait for agent
+ logging.info(lf("worker died in startup phase",
+ brick=w[0]['dir']))
break
+
time.sleep(1)
else:
- logging.info("worker(%s) not confirmed in %d sec, "
- "aborting it" % (w[0], conn_timeout))
- os.kill(cpid, signal.SIGKILL)
- nwait(apid) #wait for agent
+ logging.info(
+ lf("Worker not confirmed after wait, aborting it. "
+ "Gsyncd invocation on remote slave via SSH or "
+ "gluster master mount might have hung. Please "
+ "check the above logs for exact issue and check "
+ "master or slave volume for errors. Restarting "
+ "master/slave volume accordingly might help.",
+ brick=w[0]['dir'],
+ timeout=conn_timeout))
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
ret = nwait(cpid)
if ret is None:
- self.set_state(self.ST_STABLE, w)
- #If worker dies, agent terminates on EOF.
- #So lets wait for agent first.
- nwait(apid)
ret = nwait(cpid)
if exit_signalled(ret):
ret = 0
else:
ret = exit_status(ret)
if ret in (0, 1):
- self.set_state(self.ST_FAULTY, w)
+ self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
+ gf_event(EVENT_GEOREP_FAULTY,
+ master_volume=master.volume,
+ master_node=w[0]['host'],
+ master_node_id=w[0]['uuid'],
+ slave_host=slave_host,
+ slave_volume=slave_vol,
+ current_slave_host=current_slave_host,
+ brick_path=w[0]['dir'])
time.sleep(10)
- self.set_state(self.ST_INCON, w)
+ self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
return ret
- def multiplex(self, wspx, suuid, slave_vol, slave_host):
- argv = sys.argv[:]
- for o in ('-N', '--no-daemon', '--monitor'):
- while o in argv:
- argv.remove(o)
- argv.extend(('-N', '-p', '', '--slave-id', suuid))
- argv.insert(0, os.path.basename(sys.executable))
+ def multiplex(self, wspx, suuid, slave_vol, slave_host, master, slavenodes):
+ argv = [os.path.basename(sys.executable), sys.argv[0]]
cpids = set()
- agents = set()
ta = []
for wx in wspx:
def wmon(w):
- cpid, _ = self.monitor(w, argv, cpids, agents, slave_vol,
- slave_host)
+ cpid, _ = self.monitor(w, argv, cpids, slave_vol,
+ slave_host, master, suuid, slavenodes)
time.sleep(1)
self.lock.acquire()
for cpid in cpids:
- os.kill(cpid, signal.SIGKILL)
- for apid in agents:
- os.kill(apid, signal.SIGKILL)
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
self.lock.release()
finalize(exval=1)
t = Thread(target=wmon, args=[wx])
t.start()
ta.append(t)
+
+ # monitor status was being updated in each monitor thread. It
+ # should not be done as it can cause deadlock for a worker start.
+ # set_monitor_status uses flock to synchronize multple instances
+ # updating the file. Since each monitor thread forks worker,
+ # these processes can hold the reference to fd of status
+ # file causing deadlock to workers which starts later as flock
+ # will not be release until all references to same fd is closed.
+ # It will also cause fd leaks.
+
+ self.lock.acquire()
+ set_monitor_status(gconf.get("state-file"), self.ST_STARTED)
+ self.lock.release()
for t in ta:
t.join()
-def distribute(*resources):
- master, slave = resources
- mvol = Volinfo(master.volume, master.host)
+def distribute(master, slave):
+ if rconf.args.use_gconf_volinfo:
+ mvol = VolinfoFromGconf(master.volume, master=True)
+ else:
+ mvol = Volinfo(master.volume, master.host, master=True)
logging.debug('master bricks: ' + repr(mvol.bricks))
prelude = []
- si = slave
slave_host = None
slave_vol = None
- if isinstance(slave, SSH):
- prelude = gconf.ssh_command.split() + [slave.remote_addr]
- si = slave.inner_rsc
- logging.debug('slave SSH gateway: ' + slave.remote_addr)
- if isinstance(si, FILE):
- sbricks = {'host': 'localhost', 'dir': si.path}
- suuid = uuid.uuid5(uuid.NAMESPACE_URL, slave.get_url(canonical=True))
- elif isinstance(si, GLUSTER):
- svol = Volinfo(si.volume, slave.remote_addr.split('@')[-1])
- sbricks = svol.bricks
- suuid = svol.uuid
- slave_host = slave.remote_addr.split('@')[-1]
- slave_vol = si.volume
- else:
- raise GsyncdError("unkown slave type " + slave.url)
- logging.info('slave bricks: ' + repr(sbricks))
- if isinstance(si, FILE):
- slaves = [slave.url]
- else:
- slavenodes = set(b['host'] for b in sbricks)
- if isinstance(slave, SSH) and not gconf.isolated_slave:
- rap = SSH.parse_ssh_address(slave)
- slaves = ['ssh://' + rap['user'] + '@' + h + ':' + si.url
- for h in slavenodes]
- else:
- slavevols = [h + ':' + si.volume for h in slavenodes]
- if isinstance(slave, SSH):
- slaves = ['ssh://' + rap.remote_addr + ':' + v
- for v in slavevols]
- else:
- slaves = slavevols
-
- workerspex = [(brick['dir'], slaves[idx % len(slaves)])
- for idx, brick in enumerate(mvol.bricks)
- if is_host_local(brick['host'])]
- logging.info('worker specs: ' + repr(workerspex))
- return workerspex, suuid, slave_vol, slave_host
+ prelude = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [slave.remote_addr]
+ logging.debug('slave SSH gateway: ' + slave.remote_addr)
-def monitor(*resources):
+ if rconf.args.use_gconf_volinfo:
+ svol = VolinfoFromGconf(slave.volume, master=False)
+ else:
+ svol = Volinfo(slave.volume, "localhost", prelude, master=False)
+
+ sbricks = svol.bricks
+ suuid = svol.uuid
+ slave_host = slave.remote_addr.split('@')[-1]
+ slave_vol = slave.volume
+
+ # save this xattr for the session delete command
+ old_stime_xattr_prefix = gconf.get("stime-xattr-prefix", None)
+ new_stime_xattr_prefix = "trusted.glusterfs." + mvol.uuid + "." + \
+ svol.uuid
+ if not old_stime_xattr_prefix or \
+ old_stime_xattr_prefix != new_stime_xattr_prefix:
+ gconf.setconfig("stime-xattr-prefix", new_stime_xattr_prefix)
+
+ logging.debug('slave bricks: ' + repr(sbricks))
+
+ slavenodes = set((b['host'], b["uuid"]) for b in sbricks)
+ rap = SSH.parse_ssh_address(slave)
+ slaves = [(rap['user'] + '@' + h[0], h[1]) for h in slavenodes]
+
+ workerspex = []
+ for idx, brick in enumerate(mvol.bricks):
+ if rconf.args.local_node_id == brick['uuid']:
+ is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
+ workerspex.append((brick,
+ slaves[idx % len(slaves)],
+ get_subvol_num(idx, mvol, is_hot),
+ is_hot))
+ logging.debug('worker specs: ' + repr(workerspex))
+ return workerspex, suuid, slave_vol, slave_host, master, slavenodes
+
+
+def monitor(local, remote):
# Check if gsyncd restarted in pause state. If
# yes, send SIGSTOP to negative of monitor pid
# to go back to pause state.
- if gconf.pause_on_start:
- os.kill(-os.getpid(), signal.SIGSTOP)
+ if rconf.args.pause_on_start:
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGSTOP], [ESRCH])
"""oh yeah, actually Monitor is used as singleton, too"""
- return Monitor().multiplex(*distribute(*resources))
+ return Monitor().multiplex(*distribute(local, remote))
+
+
+def startup(go_daemon=True):
+ """set up logging, pidfile grabbing, daemonization"""
+ pid_file = gconf.get("pid-file")
+ if not grabpidfile():
+ sys.stderr.write("pidfile is taken, exiting.\n")
+ sys.exit(2)
+ rconf.pid_file_owned = True
+
+ if not go_daemon:
+ return
+
+ x, y = pipe()
+ cpid = os.fork()
+ if cpid:
+ os.close(x)
+ sys.exit()
+ os.close(y)
+ os.setsid()
+ dn = os.open(os.devnull, os.O_RDWR)
+ for f in (sys.stdin, sys.stdout, sys.stderr):
+ os.dup2(dn, f.fileno())
+
+ if not grabpidfile(pid_file + '.tmp'):
+ raise GsyncdError("cannot grab temporary pidfile")
+
+ os.rename(pid_file + '.tmp', pid_file)
+
+ # wait for parent to terminate
+ # so we can start up with
+ # no messing from the dirty
+ # ol' bustard
+ select((x,), (), ())
+ os.close(x)
diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py
new file mode 100644
index 00000000000..f9c76e1b50a
--- /dev/null
+++ b/geo-replication/syncdaemon/py2py3.py
@@ -0,0 +1,184 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from syncdutils import umask
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile.encode())
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile)
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/geo-replication/syncdaemon/gconf.py b/geo-replication/syncdaemon/rconf.py
index 1fc7c381bc4..ff716ee4d6d 100644
--- a/geo-replication/syncdaemon/gconf.py
+++ b/geo-replication/syncdaemon/rconf.py
@@ -9,9 +9,9 @@
#
-class GConf(object):
+class RConf(object):
- """singleton class to store globals
+ """singleton class to store runtime globals
shared between gsyncd modules"""
ssh_ctl_dir = None
@@ -21,5 +21,11 @@ class GConf(object):
log_exit = False
permanent_handles = []
log_metadata = {}
+ mgmt_lock_fd = None
+ args = None
+ turns = 0
+ mountbroker = False
+ mount_point = None
+ mbr_umount_cmd = []
-gconf = GConf()
+rconf = RConf()
diff --git a/geo-replication/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index d7b17dda796..c622afa6373 100644
--- a/geo-replication/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
@@ -14,30 +14,27 @@ import time
import logging
from threading import Condition
try:
- import thread
-except ImportError:
- # py 3
import _thread as thread
-try:
- from Queue import Queue
except ImportError:
- # py 3
+ import thread
+try:
from queue import Queue
+except ImportError:
+ from Queue import Queue
try:
import cPickle as pickle
except ImportError:
- # py 3
import pickle
-from syncdutils import Thread, select
+from syncdutils import Thread, select, lf
-pickle_proto = -1
+pickle_proto = 2
repce_version = 1.0
def ioparse(i, o):
if isinstance(i, int):
- i = os.fdopen(i)
+ i = os.fdopen(i, 'rb')
# rely on duck typing for recognizing
# streams as that works uniformly
# in py2 and py3
@@ -57,8 +54,15 @@ def send(out, *args):
def recv(inf):
- """load an object from input stream"""
- return pickle.load(inf)
+ """load an object from input stream
+ python2 and python3 compatibility, inf is sys.stdin
+ and is opened as text stream by default. Hence using the
+ buffer attribute in python3
+ """
+ if hasattr(inf, "buffer"):
+ return pickle.load(inf.buffer)
+ else:
+ return pickle.load(inf)
class RepceServer(object):
@@ -196,15 +200,17 @@ class RepceClient(object):
"""RePCe client is callabe, calling it implements a synchronous
remote call.
- We do a .push with a cbk which does a wakeup upon receiving anwser,
+ We do a .push with a cbk which does a wakeup upon receiving answer,
then wait on the RepceJob.
"""
rjob = self.push(
meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
exc, res = rjob.wait()
if exc:
- logging.error('call %s (%s) failed on peer with %s' %
- (repr(rjob), meth, str(type(res).__name__)))
+ logging.error(lf('call failed',
+ call=repr(rjob),
+ method=meth,
+ error=str(type(res).__name__)))
raise res
logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
return res
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index 1e11b980ad3..f12c7ceaa36 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -13,259 +13,43 @@ import os
import sys
import stat
import time
-import signal
import fcntl
-import errno
import types
import struct
-import socket
import logging
import tempfile
-import threading
import subprocess
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP
-from errno import EISDIR, ENOTEMPTY, ESTALE, EINVAL
-from select import error as SelectError
-import shutil
+from errno import (EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES,
+ EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM)
+import errno
+
+from rconf import rconf
+import gsyncdconfig as gconf
+import libgfchangelog
-from gconf import gconf
import repce
from repce import RepceServer, RepceClient
from master import gmaster_builder
import syncdutils
-from syncdutils import GsyncdError, select, privileged, boolify, funcode
-from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat
-from syncdutils import NoPurgeTimeAvailable, PartialHistoryAvailable
-from syncdutils import ChangelogException
-from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
+from syncdutils import (GsyncdError, select, privileged, funcode,
+ entry2pb, gauxpfx, errno_wrap, lstat,
+ NoStimeAvailable, PartialHistoryAvailable,
+ ChangelogException, ChangelogHistoryNotAvailable,
+ get_changelog_log_level, get_rsync_version,
+ GX_GFID_CANONICAL_LEN,
+ gf_mount_ready, lf, Popen, sup,
+ Xattr, matching_disk_gfid, get_gfid_from_mnt,
+ unshare_propagation_supported, get_slv_dir_path)
+from gsyncdstatus import GeorepStatus
+from py2py3 import (pipe, str_to_bytearray, entry_pack_reg,
+ entry_pack_reg_stat, entry_pack_mkdir,
+ entry_pack_symlink)
-UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
-HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
-UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+")
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
-
-def sup(x, *a, **kw):
- """a rubyesque "super" for python ;)
-
- invoke caller method in parent class with given args.
- """
- return getattr(super(type(x), x),
- sys._getframe(1).f_code.co_name)(*a, **kw)
-
-
-def desugar(ustr):
- """transform sugared url strings to standard <scheme>://<urlbody> form
-
- parsing logic enforces the constraint that sugared forms should contatin
- a ':' or a '/', which ensures that sugared urls do not conflict with
- gluster volume names.
- """
- m = re.match('([^:]*):(.*)', ustr)
- if m:
- if not m.groups()[0]:
- return "gluster://localhost" + ustr
- elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]):
- return "ssh://" + ustr
- else:
- return "gluster://" + ustr
- else:
- if ustr[0] != '/':
- raise GsyncdError("cannot resolve sugared url '%s'" % ustr)
- ap = os.path.normpath(ustr)
- if ap.startswith('//'):
- ap = ap[1:]
- return "file://" + ap
-
-
-def gethostbyname(hnam):
- """gethostbyname wrapper"""
- try:
- return socket.gethostbyname(hnam)
- except socket.gaierror:
- ex = sys.exc_info()[1]
- raise GsyncdError("failed to resolve %s: %s" %
- (hnam, ex.strerror))
-
-
-def parse_url(ustr):
- """instantiate an url object by scheme-to-class dispatch
-
- The url classes taken into consideration are the ones in
- this module whose names are full-caps.
- """
- m = UrlRX.match(ustr)
- if not m:
- ustr = desugar(ustr)
- m = UrlRX.match(ustr)
- if not m:
- raise GsyncdError("malformed url")
- sch, path = m.groups()
- this = sys.modules[__name__]
- if not hasattr(this, sch.upper()):
- raise GsyncdError("unknown url scheme " + sch)
- return getattr(this, sch.upper())(path)
-
-
-class _MetaXattr(object):
-
- """singleton class, a lazy wrapper around the
- libcxattr module
-
- libcxattr (a heavy import due to ctypes) is
- loaded only when when the single
- instance is tried to be used.
-
- This reduces runtime for those invocations
- which do not need filesystem manipulation
- (eg. for config, url parsing)
- """
-
- def __getattr__(self, meth):
- from libcxattr import Xattr as LXattr
- xmeth = [m for m in dir(LXattr) if m[0] != '_']
- if not meth in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LXattr, m))
- return getattr(self, meth)
-
-
-Xattr = _MetaXattr()
-
-
-class Popen(subprocess.Popen):
-
- """customized subclass of subprocess.Popen with a ring
- buffer for children error output"""
-
- @classmethod
- def init_errhandler(cls):
- """start the thread which handles children's error output"""
- cls.errstore = {}
-
- def tailer():
- while True:
- errstore = cls.errstore.copy()
- try:
- poe, _, _ = select(
- [po.stderr for po in errstore], [], [], 1)
- except (ValueError, SelectError):
- continue
- for po in errstore:
- if po.stderr not in poe:
- continue
- po.lock.acquire()
- try:
- if po.on_death_row:
- continue
- la = errstore[po]
- try:
- fd = po.stderr.fileno()
- except ValueError: # file is already closed
- continue
- l = os.read(fd, 1024)
- if not l:
- continue
- tots = len(l)
- for lx in la:
- tots += len(lx)
- while tots > 1 << 20 and la:
- tots -= len(la.pop(0))
- la.append(l)
- finally:
- po.lock.release()
- t = syncdutils.Thread(target=tailer)
- t.start()
- cls.errhandler = t
-
- @classmethod
- def fork(cls):
- """fork wrapper that restarts errhandler thread in child"""
- pid = os.fork()
- if not pid:
- cls.init_errhandler()
- return pid
-
- def __init__(self, args, *a, **kw):
- """customizations for subprocess.Popen instantiation
-
- - 'close_fds' is taken to be the default
- - if child's stderr is chosen to be managed,
- register it with the error handler thread
- """
- self.args = args
- if 'close_fds' not in kw:
- kw['close_fds'] = True
- self.lock = threading.Lock()
- self.on_death_row = False
- try:
- sup(self, args, *a, **kw)
- except:
- ex = sys.exc_info()[1]
- if not isinstance(ex, OSError):
- raise
- raise GsyncdError("""execution of "%s" failed with %s (%s)""" %
- (args[0], errno.errorcode[ex.errno],
- os.strerror(ex.errno)))
- if kw.get('stderr') == subprocess.PIPE:
- assert(getattr(self, 'errhandler', None))
- self.errstore[self] = []
-
- def errlog(self):
- """make a log about child's failure event"""
- filling = ""
- if self.elines:
- filling = ", saying:"
- logging.error("""command "%s" returned with %s%s""" %
- (" ".join(self.args), repr(self.returncode), filling))
- lp = ''
-
- def logerr(l):
- logging.error(self.args[0] + "> " + l)
- for l in self.elines:
- ls = l.split('\n')
- ls[0] = lp + ls[0]
- lp = ls.pop()
- for ll in ls:
- logerr(ll)
- if lp:
- logerr(lp)
-
- def errfail(self):
- """fail nicely if child did not terminate with success"""
- self.errlog()
- syncdutils.finalize(exval=1)
-
- def terminate_geterr(self, fail_on_err=True):
- """kill child, finalize stderr harvesting (unregister
- from errhandler, set up .elines), fail on error if
- asked for
- """
- self.lock.acquire()
- try:
- self.on_death_row = True
- finally:
- self.lock.release()
- elines = self.errstore.pop(self)
- if self.poll() is None:
- self.terminate()
- if self.poll() is None:
- time.sleep(0.1)
- self.kill()
- self.wait()
- while True:
- if not select([self.stderr], [], [], 0.1)[0]:
- break
- b = os.read(self.stderr.fileno(), 1024)
- if b:
- elines.append(b)
- else:
- break
- self.stderr.close()
- self.elines = elines
- if fail_on_err and self.returncode != 0:
- self.errfail()
+slv_volume = None
+slv_host = None
class Server(object):
@@ -282,7 +66,6 @@ class Server(object):
NTV_FMTSTR = "!" + "B" * 19 + "II"
FRGN_XTRA_FMT = "I"
FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
- GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
# for backend gfid fetch, do not use GX_NSPACE_PFX
GFID_XATTR = 'trusted.gfid'
@@ -292,15 +75,15 @@ class Server(object):
@classmethod
def _fmt_mknod(cls, l):
- return "!II%dsI%dsIII" % (cls.GX_GFID_CANONICAL_LEN, l + 1)
+ return "!II%dsI%dsIII" % (GX_GFID_CANONICAL_LEN, l + 1)
@classmethod
def _fmt_mkdir(cls, l):
- return "!II%dsI%dsII" % (cls.GX_GFID_CANONICAL_LEN, l + 1)
+ return "!II%dsI%dsII" % (GX_GFID_CANONICAL_LEN, l + 1)
@classmethod
def _fmt_symlink(cls, l1, l2):
- return "!II%dsI%ds%ds" % (cls.GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
+ return "!II%dsI%ds%ds" % (GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
def _pathguard(f):
"""decorator method that checks
@@ -312,14 +95,14 @@ class Server(object):
fc = funcode(f)
pi = list(fc.co_varnames).index('path')
- def ff(*a):
- path = a[pi]
+ def ff(*args):
+ path = args[pi]
ps = path.split('/')
if path[0] == '/' or '..' in ps:
raise ValueError('unsafe path')
- a = list(a)
- a[pi] = os.path.join(a[0].local_path, path)
- return f(*a)
+ args = list(args)
+ args[pi] = os.path.join(args[0].local_path, path)
+ return f(*args)
return ff
@classmethod
@@ -360,23 +143,15 @@ class Server(object):
@classmethod
@_pathguard
def gfid(cls, path):
- try:
- buf = Xattr.lgetxattr(path, cls.GFID_XATTR, 16)
+ buf = errno_wrap(Xattr.lgetxattr, [path, cls.GFID_XATTR, 16],
+ [ENOENT], [ESTALE, ENODATA])
+ if buf == ENOENT:
+ return buf
+ else:
+ buf = str_to_bytearray(buf)
m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(
['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)]))
return '-'.join(m.groups())
- except (IOError, OSError):
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- return ex.errno
- else:
- raise
-
- @classmethod
- def gfid_mnt(cls, gfidpath):
- return errno_wrap(Xattr.lgetxattr,
- [gfidpath, 'glusterfs.gfid.string',
- cls.GX_GFID_CANONICAL_LEN], [ENOENT])
@classmethod
@_pathguard
@@ -464,6 +239,7 @@ class Server(object):
val = Xattr.lgetxattr(path,
'.'.join([cls.GX_NSPACE, uuid, 'xtime']),
8)
+ val = str_to_bytearray(val)
return struct.unpack('!II', val)
except OSError:
ex = sys.exc_info()[1]
@@ -486,6 +262,7 @@ class Server(object):
val = Xattr.lgetxattr(path,
'.'.join([cls.GX_NSPACE, uuid, 'stime']),
8)
+ val = str_to_bytearray(val)
return struct.unpack('!II', val)
except OSError:
ex = sys.exc_info()[1]
@@ -508,6 +285,31 @@ class Server(object):
val = Xattr.lgetxattr(path,
'.'.join([cls.GX_NSPACE, uuid, 'stime']),
8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def entry_stime(cls, path, uuid):
+ """
+ entry_stime xattr to reduce the number of retry of Entry changes when
+ Geo-rep worker crashes and restarts. entry_stime is updated after
+ processing every changelog file. On failure and restart, worker only
+ have to reprocess the last changelog for Entry ops.
+ Xattr Key: <PFX>.<MASTERVOL_UUID>.<SLAVEVOL_UUID>.entry_stime
+ """
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid,
+ 'entry_stime']),
+ 8)
+ val = str_to_bytearray(val)
return struct.unpack('!II', val)
except OSError:
ex = sys.exc_info()[1]
@@ -529,17 +331,34 @@ class Server(object):
@_pathguard
def set_stime(cls, path, uuid, mark):
"""set @mark as stime for @uuid on @path"""
- Xattr.lsetxattr(
- path, '.'.join([cls.GX_NSPACE, uuid, 'stime']),
- struct.pack('!II', *mark))
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_entry_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'entry_stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
@classmethod
@_pathguard
def set_xtime(cls, path, uuid, mark):
"""set @mark as xtime for @uuid on @path"""
- Xattr.lsetxattr(
- path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
- struct.pack('!II', *mark))
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
@classmethod
@_pathguard
@@ -559,119 +378,390 @@ class Server(object):
def entry_ops(cls, entries):
pfx = gauxpfx()
logging.debug('entries: %s' % repr(entries))
- # regular file
-
- def entry_pack_reg(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(cls._fmt_mknod(blen),
- uid, gid, gf, mo, bn,
- stat.S_IMODE(mo), 0, umask())
-
- def entry_pack_reg_stat(gf, bn, st):
- blen = len(bn)
- mo = st['mode']
- return struct.pack(cls._fmt_mknod(blen),
- st['uid'], st['gid'],
- gf, mo, bn,
- stat.S_IMODE(mo), 0, umask())
- # mkdir
-
- def entry_pack_mkdir(gf, bn, mo, uid, gid):
- blen = len(bn)
- return struct.pack(cls._fmt_mkdir(blen),
- uid, gid, gf, mo, bn,
- stat.S_IMODE(mo), umask())
- # symlink
-
- def entry_pack_symlink(gf, bn, lnk, st):
- blen = len(bn)
- llen = len(lnk)
- return struct.pack(cls._fmt_symlink(blen, llen),
- st['uid'], st['gid'],
- gf, st['mode'], bn, lnk)
-
- def entry_purge(entry, gfid):
+ dist_count = rconf.args.master_dist_count
+
+ def entry_purge(op, entry, gfid, e, uid, gid):
# This is an extremely racy code and needs to be fixed ASAP.
# The GFID check here is to be sure that the pargfid/bname
# to be purged is the GFID gotten from the changelog.
# (a stat(changelog_gfid) would also be valid here)
# The race here is between the GFID check and the purge.
- disk_gfid = cls.gfid_mnt(entry)
- if isinstance(disk_gfid, int):
+
+ # If the entry or the gfid of the file to be deleted is not present
+ # on slave, we can ignore the unlink/rmdir
+ if isinstance(lstat(entry), int) or \
+ isinstance(lstat(os.path.join(pfx, gfid)), int):
return
- if not gfid == disk_gfid:
+
+ if not matching_disk_gfid(gfid, entry):
+ collect_failure(e, EEXIST, uid, gid)
return
- er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR])
- if isinstance(er, int):
+
+ if op == 'UNLINK':
+ er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY])
+ # EISDIR is safe error, ignore. This can only happen when
+ # unlink is sent from master while fixing gfid conflicts.
+ if er != EISDIR:
+ return er
+
+ elif op == 'RMDIR':
+ er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE,
+ ENOTEMPTY], [EBUSY])
+ if er == ENOTEMPTY:
+ return er
+
+ def collect_failure(e, cmd_ret, uid, gid, dst=False):
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = False
+ slv_entry_info['dst'] = dst
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_name'] = None
+ slv_entry_info['slave_gfid'] = None
+ # We do this for failing fops on Slave
+ # Master should be logging this
+ if cmd_ret is None:
+ return False
+
+ if e.get("stat", {}):
+ # Copy actual UID/GID value back to entry stat
+ e['stat']['uid'] = uid
+ e['stat']['gid'] = gid
+
+ if cmd_ret in [EEXIST, ESTALE]:
+ if dst:
+ en = e['entry1']
+ else:
+ en = e['entry']
+ disk_gfid = get_gfid_from_mnt(en)
+ if isinstance(disk_gfid, str) and \
+ e['gfid'] != disk_gfid:
+ slv_entry_info['gfid_mismatch'] = True
+ st = lstat(en)
+ if not isinstance(st, int):
+ if st and stat.S_ISDIR(st.st_mode):
+ slv_entry_info['slave_isdir'] = True
+ dir_name = get_slv_dir_path(slv_host, slv_volume,
+ disk_gfid)
+ slv_entry_info['slave_name'] = dir_name
+ else:
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_gfid'] = disk_gfid
+ failures.append((e, cmd_ret, slv_entry_info))
+ else:
+ return False
+ else:
+ failures.append((e, cmd_ret, slv_entry_info))
+
+ return True
+
+ failures = []
+
+ def recursive_rmdir(gfid, entry, path):
+ """disk_gfid check added for original path for which
+ recursive_delete is called. This disk gfid check executed
+ before every Unlink/Rmdir. If disk gfid is not matching
+ with GFID from Changelog, that means other worker
+ deleted the directory. Even if the subdir/file present,
+ it belongs to different parent. Exit without performing
+ further deletes.
+ """
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ names = []
+ names = errno_wrap(os.listdir, [path], [ENOENT], [ESTALE, ENOTSUP])
+ if isinstance(names, int):
+ return
+
+ for name in names:
+ fullname = os.path.join(path, name)
+ if not matching_disk_gfid(gfid, entry):
+ return
+ er = errno_wrap(os.remove, [fullname], [ENOENT, ESTALE,
+ EISDIR], [EBUSY])
+
if er == EISDIR:
- er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY])
- if er == ENOTEMPTY:
- return er
+ recursive_rmdir(gfid, entry, fullname)
+
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY])
+
+ def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid):
+ if not matching_disk_gfid(gfid, entry):
+ logging.error(lf("RENAME ignored: source entry does not match "
+ "with on-disk gfid",
+ source=entry,
+ gfid=gfid,
+ disk_gfid=get_gfid_from_mnt(entry),
+ target=en))
+ collect_failure(e, EEXIST, uid, gid)
+ return
+
+ cmd_ret = errno_wrap(os.rename,
+ [entry, en],
+ [ENOENT, EEXIST], [ESTALE, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
for e in entries:
blob = None
op = e['op']
gfid = e['gfid']
entry = e['entry']
+ uid = 0
+ gid = 0
+
+ # Skip entry processing if it's marked true during gfid
+ # conflict resolution
+ if e['skip_entry']:
+ continue
+
+ if e.get("stat", {}):
+ # Copy UID/GID value and then reset to zero. Copied UID/GID
+ # will be used to run chown once entry is created.
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ e['stat']['uid'] = 0
+ e['stat']['gid'] = 0
+
(pg, bname) = entry2pb(entry)
if op in ['RMDIR', 'UNLINK']:
- while True:
- er = entry_purge(entry, gfid)
- if isinstance(er, int):
- if er == ENOTEMPTY and op == 'RMDIR':
- er1 = errno_wrap(shutil.rmtree,
- [os.path.join(pg, bname)],
- [ENOENT])
- if not isinstance(er1, int):
- logging.info("Removed %s/%s recursively" %
- (pg, bname))
- break
-
- logging.warn("Failed to remove %s => %s/%s. %s" %
- (gfid, pg, bname, os.strerror(er)))
- time.sleep(1)
+ # Try once, if rmdir failed with ENOTEMPTY
+ # then delete recursively.
+ er = entry_purge(op, entry, gfid, e, uid, gid)
+ if isinstance(er, int):
+ if er == ENOTEMPTY and op == 'RMDIR':
+ # Retry if ENOTEMPTY, ESTALE
+ er1 = errno_wrap(recursive_rmdir,
+ [gfid, entry,
+ os.path.join(pg, bname)],
+ [], [ENOTEMPTY, ESTALE, ENODATA])
+ if not isinstance(er1, int):
+ logging.debug("Removed %s => %s/%s recursively" %
+ (gfid, pg, bname))
+ else:
+ logging.warn(lf("Recursive remove failed",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er1)))
else:
- break
+ logging.warn(lf("Failed to remove",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er)))
elif op in ['CREATE', 'MKNOD']:
- blob = entry_pack_reg(
- gfid, bname, e['mode'], e['uid'], e['gid'])
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_reg(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ # Self healed hardlinks are recorded as MKNOD.
+ # So if the gfid already exists, it should be
+ # processed as hard link not mknod.
+ elif op in ['MKNOD']:
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
elif op == 'MKDIR':
- blob = entry_pack_mkdir(
- gfid, bname, e['mode'], e['uid'], e['gid'])
+ en = e['entry']
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_mkdir(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ elif (isinstance(lstat(en), int) or
+ not matching_disk_gfid(gfid, en)):
+ # If gfid of a directory exists on slave but path based
+ # create is getting EEXIST. This means the directory is
+ # renamed in master but recorded as MKDIR during hybrid
+ # crawl. Get the directory path by reading the backend
+ # symlink and trying to rename to new name as said by
+ # master.
+ logging.info(lf("Special case: rename on mkdir",
+ gfid=gfid, entry=repr(entry)))
+ src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+ if src_entry is None:
+ collect_failure(e, ENOENT, uid, gid)
+ if src_entry is not None and src_entry != entry:
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = True
+ slv_entry_info['dst'] = False
+ slv_entry_info['slave_isdir'] = True
+ slv_entry_info['slave_gfid'] = gfid
+ slv_entry_info['slave_entry'] = src_entry
+
+ failures.append((e, EEXIST, slv_entry_info))
elif op == 'LINK':
slink = os.path.join(pfx, gfid)
st = lstat(slink)
if isinstance(st, int):
(pg, bname) = entry2pb(entry)
- blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ if stat.S_ISREG(e['stat']['mode']):
+ blob = entry_pack_reg_stat(cls, gfid, bname, e['stat'])
+ elif stat.S_ISLNK(e['stat']['mode']):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
else:
- errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST])
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
elif op == 'SYMLINK':
- blob = entry_pack_symlink(gfid, bname, e['link'], e['stat'])
- elif op == 'RENAME':
- en = e['entry1']
+ en = e['entry']
st = lstat(entry)
if isinstance(st, int):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid)
+ elif op == 'RENAME':
+ en = e['entry1']
+ # The matching disk gfid check validates two things
+ # 1. Validates name is present, return false otherwise
+ # 2. Validates gfid is same, returns false otherwise
+ # So both validations are necessary to decide src doesn't
+ # exist. We can't rely on only gfid stat as hardlink could
+ # be present and we can't rely only on name as name could
+ # exist with different gfid.
+ if not matching_disk_gfid(gfid, entry):
if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
- (pg, bname) = entry2pb(en)
- blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ if stat.S_ISLNK(e['stat']['mode']):
+ # src is not present, so don't sync symlink as
+ # we don't know target. It's ok to ignore. If
+ # it's unliked, it's fine. If it's renamed to
+ # something else, it will be synced then.
+ if e['link'] is not None:
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_symlink(cls, gfid, bname,
+ e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg_stat(cls, gfid, bname,
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link, [slink, en],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
else:
- errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST])
+ st = lstat(entry)
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ else:
+ if st.st_ino == st1.st_ino:
+ # we have a hard link, we can now unlink source
+ try:
+ errno_wrap(os.unlink, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == EISDIR:
+ try:
+ errno_wrap(os.rmdir, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == ENOTEMPTY:
+ logging.error(
+ lf("Directory Rename failed. "
+ "Both Old and New"
+ " directories exists",
+ old=entry,
+ new=en))
+ else:
+ raise
+ else:
+ raise
+ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ # We are here which means matching_disk_gfid for
+ # both source and destination has returned false
+ # and distribution count for master vol is greater
+ # then one. Which basically says both the source and
+ # destination exist and not hardlinks.
+ # So we are safe to go ahead with rename here.
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
if blob:
- errno_wrap(Xattr.lsetxattr,
- [pg, 'glusterfs.gfid.newfile', blob],
- [EEXIST],
- [ENOENT, ESTALE, EINVAL])
+ cmd_ret = errno_wrap(Xattr.lsetxattr,
+ [pg, 'glusterfs.gfid.newfile', blob],
+ [EEXIST, ENOENT, ESTALE],
+ [ESTALE, EINVAL, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ # If UID/GID is different than zero that means we are trying
+ # create Entry with different UID/GID. Create Entry with
+ # UID:0 and GID:0, and then call chown to set UID/GID
+ if uid != 0 or gid != 0:
+ path = os.path.join(pfx, gfid)
+ cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ return failures
@classmethod
def meta_ops(cls, meta_entries):
logging.debug('Meta-entries: %s' % repr(meta_entries))
+ failures = []
for e in meta_entries:
mode = e['stat']['mode']
uid = e['stat']['uid']
gid = e['stat']['gid']
+ atime = e['stat']['atime']
+ mtime = e['stat']['mtime']
go = e['go']
- errno_wrap(os.chmod, [go, mode], [ENOENT], [ESTALE, EINVAL])
- errno_wrap(os.chown, [go, uid, gid], [ENOENT], [ESTALE, EINVAL])
+ # Linux doesn't support chmod on symlink itself.
+ # It is always applied to the target file. So
+ # changelog would record target file's gfid
+ # and we are good. But 'chown' is supported on
+ # symlink file. So changelog would record symlink
+ # gfid in such cases. Since we do 'chown' 'chmod'
+ # 'utime' for each gfid recorded for metadata, and
+ # we know from changelog the metadata is on symlink's
+ # gfid or target file's gfid, we should be doing
+ # 'lchown' 'lchmod' 'utime with no-deference' blindly.
+ # But since 'lchmod' and 'utime with no de-reference' is
+ # not supported in python3, we have to rely on 'chmod'
+ # and 'utime with de-reference'. Hence avoiding 'chmod'
+ # and 'utime' if it's symlink file.
+
+ is_symlink = False
+ cmd_ret = errno_wrap(os.lchown, [go, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ continue
+
+ is_symlink = os.path.islink(go)
+
+ if not is_symlink:
+ cmd_ret = errno_wrap(os.chmod, [go, mode],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "chmod"))
+
+ cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "utime"))
+ return failures
@classmethod
@_pathguard
@@ -727,211 +817,273 @@ class Server(object):
return 1.0
-class SlaveLocal(object):
-
- """mix-in class to implement some factes of a slave server
-
- ("mix-in" is sort of like "abstract class", ie. it's not
- instantiated just included in the ancesty DAG. I use "mix-in"
- to indicate that it's not used as an abstract base class,
- rather just taken in to implement additional functionality
- on the basis of the assumed availability of certain interfaces.)
- """
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return not remote
-
- def service_loop(self):
- """start a RePCe server serving self's server
+class Mounter(object):
- stop servicing if a timeout is configured and got no
- keep-alime in that inteval
- """
-
- if boolify(gconf.use_rsync_xattrs) and not privileged():
- raise GsyncdError(
- "using rsync for extended attributes is not supported")
+ """Abstract base class for mounter backends"""
- repce = RepceServer(
- self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info("slave listening")
- if gconf.timeout and int(gconf.timeout) > 0:
- while True:
- lp = self.server.last_keep_alive
- time.sleep(int(gconf.timeout))
- if lp == self.server.last_keep_alive:
- logging.info(
- "connection inactive for %d seconds, stopping" %
- int(gconf.timeout))
- break
- else:
- select((), (), ())
+ def __init__(self, params):
+ self.params = params
+ self.mntpt = None
+ self.umount_cmd = []
+ @classmethod
+ def get_glusterprog(cls):
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ if rconf.args.subcmd == "slave":
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+ return os.path.join(gluster_cmd_dir, cls.glusterprog)
+
+ def umount_l(self, d):
+ """perform lazy umount"""
+ po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE,
+ universal_newlines=True)
+ po.wait()
+ return po
-class SlaveRemote(object):
+ @classmethod
+ def make_umount_argv(cls, d):
+ raise NotImplementedError
- """mix-in class to implement an interface to a remote slave"""
+ def make_mount_argv(self, label=None):
+ raise NotImplementedError
- def connect_remote(self, rargs=[], **opts):
- """connects to a remote slave
+ def cleanup_mntpt(self, *a):
+ pass
- Invoke an auxiliary utility (slave gsyncd, possibly wrapped)
- which sets up the connection and set up a RePCe client to
- communicate throuh its stdio.
- """
- slave = opts.get('slave', self.url)
- extra_opts = []
- so = getattr(gconf, 'session_owner', None)
- if so:
- extra_opts += ['--session-owner', so]
- if boolify(gconf.use_rsync_xattrs):
- extra_opts.append('--use-rsync-xattrs')
- po = Popen(rargs + gconf.remote_gsyncd.split() + extra_opts +
- ['-N', '--listen', '--timeout', str(gconf.timeout),
- slave],
- stdin=subprocess.PIPE, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- gconf.transport = po
- return self.start_fd_client(po.stdout, po.stdin, **opts)
+ def handle_mounter(self, po):
+ po.wait()
- def start_fd_client(self, i, o, **opts):
- """set up RePCe client, handshake with server
+ def inhibit(self, label):
+ """inhibit a gluster filesystem
- It's cut out as a separate method to let
- subclasses hook into client startup
+ Mount glusterfs over a temporary mountpoint,
+ change into the mount, and lazy unmount the
+ filesystem.
"""
- self.server = RepceClient(i, o)
- rv = self.server.__version__()
- exrv = {'proto': repce.repce_version, 'object': Server.version()}
- da0 = (rv, exrv)
- da1 = ({}, {})
- for i in range(2):
- for k, v in da0[i].iteritems():
- da1[i][k] = int(v)
- if da1[0] != da1[1]:
- raise GsyncdError(
- "RePCe major version mismatch: local %s, remote %s" %
- (exrv, rv))
-
- def rsync(self, files, *args):
- """invoke rsync"""
- if not files:
- raise GsyncdError("no files to sync")
- logging.debug("files: " + ", ".join(files))
- argv = gconf.rsync_command.split() + \
- ['-avR0', '--inplace', '--files-from=-', '--super',
- '--stats', '--numeric-ids', '--no-implied-dirs'] + \
- gconf.rsync_options.split() + \
- (boolify(gconf.use_rsync_xattrs) and ['--xattrs'] or []) + \
- ['.'] + list(args)
- po = Popen(argv, stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- for f in files:
- po.stdin.write(f)
- po.stdin.write('\0')
-
- po.stdin.close()
- po.wait()
- po.terminate_geterr(fail_on_err=False)
+ mpi, mpo = pipe()
+ mh = Popen.fork()
+ if mh:
+ # Parent
+ os.close(mpi)
+ fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ d = None
+ margv = self.make_mount_argv(label)
+ if self.mntpt:
+ # mntpt is determined pre-mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ po = Popen(margv, **self.mountkw)
+ self.handle_mounter(po)
+ po.terminate_geterr()
+ logging.debug('auxiliary glusterfs mount in place')
+ if not d:
+ # mntpt is determined during mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ encoded_msg = 'M'.encode()
+ os.write(mpo, encoded_msg)
+ t = syncdutils.Thread(target=lambda: os.chdir(d))
+ t.start()
+ tlim = rconf.starttime + gconf.get("connection-timeout")
+ while True:
+ if not t.isAlive():
+ break
- return po
+ if time.time() >= tlim:
+ syncdutils.finalize(exval=1)
+ time.sleep(1)
+ os.close(mpo)
+ _, rv = syncdutils.waitpid(mh, 0)
+ if rv:
+ rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
+ (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
+ logging.warn(lf('stale mount possibly left behind',
+ path=d))
+ raise GsyncdError("cleaning up temp mountpoint %s "
+ "failed with status %d" %
+ (d, rv))
+ else:
+ rv = 0
+ try:
+ os.setsid()
+ os.close(mpo)
+ mntdata = ''
+ while True:
+ c = os.read(mpi, 1)
+ c = c.decode()
+ if not c:
+ break
+ mntdata += c
+ if mntdata:
+ mounted = False
+ if mntdata[-1] == 'M':
+ mntdata = mntdata[:-1]
+ assert(mntdata)
+ mounted = True
+ assert(mntdata[-1] == '\0')
+ mntpt = mntdata[:-1]
+ assert(mntpt)
+
+ umount_master = False
+ umount_slave = False
+ if rconf.args.subcmd == "worker" \
+ and not unshare_propagation_supported() \
+ and not gconf.get("access-mount"):
+ umount_master = True
+ if rconf.args.subcmd == "slave" \
+ and not gconf.get("slave-access-mount"):
+ umount_slave = True
+
+ if mounted and (umount_master or umount_slave):
+ po = self.umount_l(mntpt)
+ po.terminate_geterr(fail_on_err=False)
+ if po.returncode != 0:
+ po.errlog()
+ rv = po.returncode
+ logging.debug("Lazy umount done: %s" % mntpt)
+ if umount_master or umount_slave:
+ self.cleanup_mntpt(mntpt)
+ except:
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
+
+ #Polling the dht.subvol.status value.
+ RETRIES = 10
+ while not gf_mount_ready():
+ if RETRIES < 0:
+ logging.error('Subvols are not up')
+ break
+ RETRIES -= 1
+ time.sleep(0.2)
- def tarssh(self, files, slaveurl):
- """invoke tar+ssh
- -z (compress) can be use if needed, but ommitting it now
- as it results in wierd error (tar+ssh errors out (errcode: 2)
- """
- if not files:
- raise GsyncdError("no files to sync")
- logging.debug("files: " + ", ".join(files))
- (host, rdir) = slaveurl.split(':')
- tar_cmd = ["tar", "-cf", "-", "--files-from", "-"]
- ssh_cmd = gconf.ssh_command_tar.split() + \
- [host, "tar", "--overwrite", "-xf", "-", "-C", rdir]
- p0 = Popen(tar_cmd, stdout=subprocess.PIPE,
- stdin=subprocess.PIPE, stderr=subprocess.PIPE)
- p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE)
- for f in files:
- p0.stdin.write(f)
- p0.stdin.write('\n')
- p0.stdin.close()
+ logging.debug('auxiliary glusterfs mount prepared')
- # wait() for tar to terminate, collecting any errors, further
- # waiting for transfer to complete
- p0.wait()
- p0.terminate_geterr(fail_on_err=False)
- p1.wait()
- p1.terminate_geterr(fail_on_err=False)
+class DirectMounter(Mounter):
- return p1
+ """mounter backend which calls mount(8), umount(8) directly"""
+ mountkw = {'stderr': subprocess.PIPE, 'universal_newlines': True}
+ glusterprog = 'glusterfs'
-class AbstractUrl(object):
+ @staticmethod
+ def make_umount_argv(d):
+ return ['umount', '-l', d]
- """abstract base class for url scheme classes"""
+ def make_mount_argv(self, label=None):
+ self.mntpt = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
+ rconf.mount_point = self.mntpt
+ return [self.get_glusterprog()] + \
+ ['--' + p for p in self.params] + [self.mntpt]
- def __init__(self, path, pattern):
- m = re.search(pattern, path)
- if not m:
- raise GsyncdError("malformed path")
- self.path = path
- return m.groups()
+ def cleanup_mntpt(self, mntpt=None):
+ if not mntpt:
+ mntpt = self.mntpt
+ errno_wrap(os.rmdir, [mntpt], [ENOENT, EBUSY])
- @property
- def scheme(self):
- return type(self).__name__.lower()
- def canonical_path(self):
- return self.path
+class MountbrokerMounter(Mounter):
- def get_url(self, canonical=False, escaped=False):
- """format self's url in various styles"""
- if canonical:
- pa = self.canonical_path()
- else:
- pa = self.path
- u = "://".join((self.scheme, pa))
- if escaped:
- u = syncdutils.escape(u)
- return u
+ """mounter backend using the mountbroker gluster service"""
- @property
- def url(self):
- return self.get_url()
+ mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE,
+ 'universal_newlines': True}
+ glusterprog = 'gluster'
+ @classmethod
+ def make_cli_argv(cls):
+ return [cls.get_glusterprog()] + ['--remote-host=localhost'] + \
+ gconf.get("gluster-cli-options").split() + ['system::']
-class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
+ @classmethod
+ def make_umount_argv(cls, d):
+ return cls.make_cli_argv() + ['umount', d, 'lazy']
- """scheme class for file:// urls
+ def make_mount_argv(self, label):
+ return self.make_cli_argv() + \
+ ['mount', label, 'user-map-root=' +
+ syncdutils.getusername()] + self.params
- can be used to represent a file slave server
- on slave side, or interface to a remote file
- file server on master side
- """
+ def handle_mounter(self, po):
+ self.mntpt = po.stdout.readline()[:-1]
+ rconf.mount_point = self.mntpt
+ rconf.mountbroker = True
+ self.umount_cmd = self.make_cli_argv() + ['umount']
+ rconf.mbr_umount_cmd = self.umount_cmd
+ po.stdout.close()
+ sup(self, po)
+ if po.returncode != 0:
+ # if cli terminated with error due to being
+ # refused by glusterd, what it put
+ # out on stdout is a diagnostic message
+ logging.error(lf('glusterd answered', mnt=self.mntpt))
- class FILEServer(Server):
- """included server flavor"""
- pass
+class GLUSTERServer(Server):
- server = FILEServer
+ "server enhancements for a glusterfs backend"""
- def __init__(self, path):
- sup(self, path, '^/')
+ @classmethod
+ def _attr_unpack_dict(cls, xattr, extra_fields=''):
+ """generic volume mark fetching/parsing backed"""
+ fmt_string = cls.NTV_FMTSTR + extra_fields
+ buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
+ buf = str_to_bytearray(buf)
+ vm = struct.unpack(fmt_string, buf)
+ m = re.match(
+ '(.{8})(.{4})(.{4})(.{4})(.{12})',
+ "".join(['%02x' % x for x in vm[2:18]]))
+ uuid = '-'.join(m.groups())
+ volinfo = {'version': vm[0:2],
+ 'uuid': uuid,
+ 'retval': vm[18],
+ 'volume_mark': vm[19:21],
+ }
+ if extra_fields:
+ return volinfo, vm[-len(extra_fields):]
+ else:
+ return volinfo
- def connect(self):
- """inhibit the resource beyond"""
- os.chdir(self.path)
+ @classmethod
+ def foreign_volume_infos(cls):
+ """return list of valid (not expired) foreign volume marks"""
+ dict_list = []
+ xattr_list = Xattr.llistxattr_buf('.')
+ for ele in xattr_list:
+ if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
+ d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
+ now = int(time.time())
+ if x[0] > now:
+ logging.debug("volinfo[%s] expires: %d "
+ "(%d sec later)" %
+ (d['uuid'], x[0], x[0] - now))
+ d['timeout'] = x[0]
+ dict_list.append(d)
+ else:
+ try:
+ Xattr.lremovexattr('.', ele)
+ except OSError:
+ pass
+ return dict_list
- def rsync(self, files):
- return sup(self, files, self.path)
+ @classmethod
+ def native_volume_info(cls):
+ """get the native volume mark of the underlying gluster volume"""
+ try:
+ return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE,
+ 'volume-mark']))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno != ENODATA:
+ raise
-class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
+class GLUSTER(object):
"""scheme class for gluster:// urls
@@ -941,238 +1093,17 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
(slave-ish features come from the mixins, master
functionality is outsourced to GMaster from master)
"""
-
- class GLUSTERServer(Server):
-
- "server enhancements for a glusterfs backend"""
-
- @classmethod
- def _attr_unpack_dict(cls, xattr, extra_fields=''):
- """generic volume mark fetching/parsing backed"""
- fmt_string = cls.NTV_FMTSTR + extra_fields
- buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
- vm = struct.unpack(fmt_string, buf)
- m = re.match(
- '(.{8})(.{4})(.{4})(.{4})(.{12})',
- "".join(['%02x' % x for x in vm[2:18]]))
- uuid = '-'.join(m.groups())
- volinfo = {'version': vm[0:2],
- 'uuid': uuid,
- 'retval': vm[18],
- 'volume_mark': vm[19:21],
- }
- if extra_fields:
- return volinfo, vm[-len(extra_fields):]
- else:
- return volinfo
-
- @classmethod
- def foreign_volume_infos(cls):
- """return list of valid (not expired) foreign volume marks"""
- dict_list = []
- xattr_list = Xattr.llistxattr_buf('.')
- for ele in xattr_list:
- if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
- d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
- now = int(time.time())
- if x[0] > now:
- logging.debug("volinfo[%s] expires: %d "
- "(%d sec later)" %
- (d['uuid'], x[0], x[0] - now))
- d['timeout'] = x[0]
- dict_list.append(d)
- else:
- try:
- Xattr.lremovexattr('.', ele)
- except OSError:
- pass
- return dict_list
-
- @classmethod
- def native_volume_info(cls):
- """get the native volume mark of the underlying gluster volume"""
- try:
- return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE,
- 'volume-mark']))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno != ENODATA:
- raise
-
server = GLUSTERServer
- def __init__(self, path):
- self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern)
-
- def canonical_path(self):
- return ':'.join([gethostbyname(self.host), self.volume])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return not remote or \
- (isinstance(remote, SSH) and isinstance(remote.inner_rsc, GLUSTER))
-
- class Mounter(object):
-
- """Abstract base class for mounter backends"""
-
- def __init__(self, params):
- self.params = params
- self.mntpt = None
-
- @classmethod
- def get_glusterprog(cls):
- return os.path.join(gconf.gluster_command_dir, cls.glusterprog)
-
- def umount_l(self, d):
- """perform lazy umount"""
- po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE)
- po.wait()
- return po
-
- @classmethod
- def make_umount_argv(cls, d):
- raise NotImplementedError
-
- def make_mount_argv(self, *a):
- raise NotImplementedError
-
- def cleanup_mntpt(self, *a):
- pass
-
- def handle_mounter(self, po):
- po.wait()
-
- def inhibit(self, *a):
- """inhibit a gluster filesystem
-
- Mount glusterfs over a temporary mountpoint,
- change into the mount, and lazy unmount the
- filesystem.
- """
-
- mpi, mpo = os.pipe()
- mh = Popen.fork()
- if mh:
- os.close(mpi)
- fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
- d = None
- margv = self.make_mount_argv(*a)
- if self.mntpt:
- # mntpt is determined pre-mount
- d = self.mntpt
- os.write(mpo, d + '\0')
- po = Popen(margv, **self.mountkw)
- self.handle_mounter(po)
- po.terminate_geterr()
- logging.debug('auxiliary glusterfs mount in place')
- if not d:
- # mntpt is determined during mount
- d = self.mntpt
- os.write(mpo, d + '\0')
- os.write(mpo, 'M')
- t = syncdutils.Thread(target=lambda: os.chdir(d))
- t.start()
- tlim = gconf.starttime + int(gconf.connection_timeout)
- while True:
- if not t.isAlive():
- break
- if time.time() >= tlim:
- syncdutils.finalize(exval=1)
- time.sleep(1)
- os.close(mpo)
- _, rv = syncdutils.waitpid(mh, 0)
- if rv:
- rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
- (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
- logging.warn('stale mount possibly left behind on ' + d)
- raise GsyncdError("cleaning up temp mountpoint %s "
- "failed with status %d" %
- (d, rv))
- else:
- rv = 0
- try:
- os.setsid()
- os.close(mpo)
- mntdata = ''
- while True:
- c = os.read(mpi, 1)
- if not c:
- break
- mntdata += c
- if mntdata:
- mounted = False
- if mntdata[-1] == 'M':
- mntdata = mntdata[:-1]
- assert(mntdata)
- mounted = True
- assert(mntdata[-1] == '\0')
- mntpt = mntdata[:-1]
- assert(mntpt)
- if mounted:
- po = self.umount_l(mntpt)
- po.terminate_geterr(fail_on_err=False)
- if po.returncode != 0:
- po.errlog()
- rv = po.returncode
- self.cleanup_mntpt(mntpt)
- except:
- logging.exception('mount cleanup failure:')
- rv = 200
- os._exit(rv)
- logging.debug('auxiliary glusterfs mount prepared')
-
- class DirectMounter(Mounter):
-
- """mounter backend which calls mount(8), umount(8) directly"""
-
- mountkw = {'stderr': subprocess.PIPE}
- glusterprog = 'glusterfs'
-
- @staticmethod
- def make_umount_argv(d):
- return ['umount', '-l', d]
-
- def make_mount_argv(self):
- self.mntpt = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
- return [self.get_glusterprog()] + \
- ['--' + p for p in self.params] + [self.mntpt]
-
- def cleanup_mntpt(self, mntpt=None):
- if not mntpt:
- mntpt = self.mntpt
- os.rmdir(mntpt)
-
- class MountbrokerMounter(Mounter):
-
- """mounter backend using the mountbroker gluster service"""
-
- mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE}
- glusterprog = 'gluster'
-
- @classmethod
- def make_cli_argv(cls):
- return [cls.get_glusterprog()] + ['--remote-host=localhost'] + \
- gconf.gluster_cli_options.split() + ['system::']
-
- @classmethod
- def make_umount_argv(cls, d):
- return cls.make_cli_argv() + ['umount', d, 'lazy']
-
- def make_mount_argv(self, label):
- return self.make_cli_argv() + \
- ['mount', label, 'user-map-root=' +
- syncdutils.getusername()] + self.params
+ def __init__(self, host, volume):
+ self.path = "%s:%s" % (host, volume)
+ self.host = host
+ self.volume = volume
- def handle_mounter(self, po):
- self.mntpt = po.stdout.readline()[:-1]
- po.stdout.close()
- sup(self, po)
- if po.returncode != 0:
- # if cli terminated with error due to being
- # refused by glusterd, what it put
- # out on stdout is a diagnostic message
- logging.error('glusterd answered: %s' % self.mntpt)
+ global slv_volume
+ global slv_host
+ slv_volume = self.volume
+ slv_host = self.host
def connect(self):
"""inhibit the resource beyond
@@ -1182,20 +1113,30 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
with given backend
"""
- label = getattr(gconf, 'mountbroker', None)
+ logging.info("Mounting gluster volume locally...")
+ t0 = time.time()
+ label = gconf.get('mountbroker', None)
if not label and not privileged():
label = syncdutils.getusername()
- mounter = label and self.MountbrokerMounter or self.DirectMounter
- params = gconf.gluster_params.split() + \
- (gconf.gluster_log_level and ['log-level=' +
- gconf.gluster_log_level] or []) + \
- ['log-file=' + gconf.gluster_log_file, 'volfile-server=' +
- self.host, 'volfile-id=' + self.volume, 'client-pid=-1']
- mounter(params).inhibit(*[l for l in [label] if l])
-
- def connect_remote(self, *a, **kw):
- sup(self, *a, **kw)
- self.slavedir = "/proc/%d/cwd" % self.server.pid()
+ mounter = label and MountbrokerMounter or DirectMounter
+
+ log_file = gconf.get("gluster-log-file")
+ if rconf.args.subcmd == "slave":
+ log_file = gconf.get("slave-gluster-log-file")
+
+ log_level = gconf.get("gluster-log-level")
+ if rconf.args.subcmd == "slave":
+ log_level = gconf.get("slave-gluster-log-level")
+
+ params = gconf.get("gluster-params").split() + \
+ ['log-level=' + log_level] + \
+ ['log-file=' + log_file, 'volfile-server=' + self.host] + \
+ ['volfile-id=' + self.volume, 'client-pid=-1']
+
+ self.mounter = mounter(params)
+ self.mounter.inhibit(label)
+ logging.info(lf("Mounted gluster volume",
+ duration="%.4f" % (time.time() - t0)))
def gmaster_instantiate_tuple(self, slave):
"""return a tuple of the 'one shot' and the 'main crawl'
@@ -1204,7 +1145,7 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
gmaster_builder()(self, slave),
gmaster_builder('changeloghistory')(self, slave))
- def service_loop(self, *args):
+ def service_loop(self, slave=None):
"""enter service loop
- if slave given, instantiate GMaster and
@@ -1212,152 +1153,173 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
master behavior
- else do that's what's inherited
"""
- if args:
- slave = args[0]
- if gconf.local_path:
- class brickserver(FILE.FILEServer):
- local_path = gconf.local_path
- aggregated = self.server
-
- @classmethod
- def entries(cls, path):
- e = super(brickserver, cls).entries(path)
- # on the brick don't mess with /.glusterfs
- if path == '.':
- try:
- e.remove('.glusterfs')
- except ValueError:
- pass
- return e
-
- @classmethod
- def lstat(cls, e):
- """ path based backend stat """
- return super(brickserver, cls).lstat(e)
-
- @classmethod
- def gfid(cls, e):
- """ path based backend gfid fetch """
- return super(brickserver, cls).gfid(e)
-
- @classmethod
- def linkto_check(cls, e):
- return super(brickserver, cls).linkto_check(e)
- if gconf.slave_id:
- # define {,set_}xtime in slave, thus preempting
- # the call to remote, so that it takes data from
- # the local brick
- slave.server.xtime = types.MethodType(
- lambda _self, path, uuid: (
- brickserver.xtime(path,
- uuid + '.' + gconf.slave_id)
- ),
- slave.server)
- slave.server.stime = types.MethodType(
- lambda _self, path, uuid: (
- brickserver.stime(path,
- uuid + '.' + gconf.slave_id)
- ),
- slave.server)
- slave.server.set_stime = types.MethodType(
- lambda _self, path, uuid, mark: (
- brickserver.set_stime(path,
- uuid + '.' + gconf.slave_id,
- mark)
- ),
- slave.server)
- (g1, g2, g3) = self.gmaster_instantiate_tuple(slave)
- g1.master.server = brickserver
- g2.master.server = brickserver
- g3.master.server = brickserver
- else:
- (g1, g2, g3) = self.gmaster_instantiate_tuple(slave)
- g1.master.server.aggregated = gmaster.master.server
- g2.master.server.aggregated = gmaster.master.server
- g3.master.server.aggregated = gmaster.master.server
- # bad bad bad: bad way to do things like this
- # need to make this elegant
- # register the crawlers and start crawling
- # g1 ==> Xsync, g2 ==> config.change_detector(changelog by default)
- # g3 ==> changelog History
- changelog_register_failed = False
- (inf, ouf, ra, wa) = gconf.rpc_fd.split(',')
- os.close(int(ra))
- os.close(int(wa))
- changelog_agent = RepceClient(int(inf), int(ouf))
- rv = changelog_agent.version()
- if int(rv) != CHANGELOG_AGENT_CLIENT_VERSION:
+ if rconf.args.subcmd == "slave":
+ if gconf.get("use-rsync-xattrs") and not privileged():
raise GsyncdError(
- "RePCe major version mismatch(changelog agent): "
- "local %s, remote %s" %
- (CHANGELOG_AGENT_CLIENT_VERSION, rv))
+ "using rsync for extended attributes is not supported")
+
+ repce = RepceServer(
+ self.server, sys.stdin, sys.stdout, gconf.get("sync-jobs"))
+ t = syncdutils.Thread(target=lambda: (repce.service_loop(),
+ syncdutils.finalize()))
+ t.start()
+ logging.info("slave listening")
+ if gconf.get("slave-timeout") and gconf.get("slave-timeout") > 0:
+ while True:
+ lp = self.server.last_keep_alive
+ time.sleep(gconf.get("slave-timeout"))
+ if lp == self.server.last_keep_alive:
+ logging.info(
+ lf("connection inactive, stopping",
+ timeout=gconf.get("slave-timeout")))
+ break
+ else:
+ select((), (), ())
- try:
- workdir = g2.setup_working_dir()
- # Register only when change_detector is not set to
- # xsync, else agent will generate changelog files
- # in .processing directory of working dir
- if gconf.change_detector != 'xsync':
- # register with the changelog library
- # 9 == log level (DEBUG)
- # 5 == connection retries
- changelog_agent.register(gconf.local_path,
- workdir, gconf.changelog_log_file,
- g2.CHANGELOG_LOG_LEVEL,
- g2.CHANGELOG_CONN_RETRIES)
-
- register_time = int(time.time())
- g2.register(register_time, changelog_agent)
- g3.register(register_time, changelog_agent)
- except ChangelogException:
- changelog_register_failed = True
- register_time = None
- logging.info("Changelog register failed, fallback to xsync")
-
- g1.register()
- logging.info("Register time: %s" % register_time)
- # oneshot: Try to use changelog history api, if not
- # available switch to FS crawl
- # Note: if config.change_detector is xsync then
- # it will not use changelog history api
- try:
- if not changelog_register_failed:
- g3.crawlwrap(oneshot=True)
- else:
- g1.crawlwrap(oneshot=True)
- except (ChangelogException, NoPurgeTimeAvailable,
- PartialHistoryAvailable) as e:
- if isinstance(e, ChangelogException):
- logging.info('Changelog history crawl failed, fallback '
- 'to xsync: %s - %s' % (e.errno, e.strerror))
- elif isinstance(e, PartialHistoryAvailable):
- logging.info('Partial history available, using xsync crawl'
- ' after consuming history '
- 'till %s' % str(e))
- g1.crawlwrap(oneshot=True, no_stime_update=True,
- register_time=register_time)
-
- # crawl loop: Try changelog crawl, if failed
- # switch to FS crawl
- try:
- if not changelog_register_failed:
- g2.crawlwrap()
- else:
- g1.crawlwrap()
- except ChangelogException as e:
- logging.info('Changelog crawl failed, fallback to xsync')
- g1.crawlwrap()
- else:
- sup(self, *args)
+ return
+
+ class brickserver(Server):
+ local_path = rconf.args.local_path
+ aggregated = self.server
- def rsync(self, files):
- return sup(self, files, self.slavedir)
+ @classmethod
+ def entries(cls, path):
+ e = super(brickserver, cls).entries(path)
+ # on the brick don't mess with /.glusterfs
+ if path == '.':
+ try:
+ e.remove('.glusterfs')
+ e.remove('.trashcan')
+ except ValueError:
+ pass
+ return e
+
+ @classmethod
+ def lstat(cls, e):
+ """ path based backend stat """
+ return super(brickserver, cls).lstat(e)
+
+ @classmethod
+ def gfid(cls, e):
+ """ path based backend gfid fetch """
+ return super(brickserver, cls).gfid(e)
+
+ @classmethod
+ def linkto_check(cls, e):
+ return super(brickserver, cls).linkto_check(e)
+
+ # define {,set_}xtime in slave, thus preempting
+ # the call to remote, so that it takes data from
+ # the local brick
+ slave.server.xtime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.xtime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.stime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.entry_stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.set_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_stime(path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+ slave.server.set_entry_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+
+ (g1, g2, g3) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server = brickserver
+ g2.master.server = brickserver
+ g3.master.server = brickserver
+
+ # bad bad bad: bad way to do things like this
+ # need to make this elegant
+ # register the crawlers and start crawling
+ # g1 ==> Xsync, g2 ==> config.change_detector(changelog by default)
+ # g3 ==> changelog History
+ status = GeorepStatus(gconf.get("state-file"),
+ rconf.args.local_node,
+ rconf.args.local_path,
+ rconf.args.local_node_id,
+ rconf.args.master,
+ rconf.args.slave)
+ status.reset_on_worker_start()
+
+ try:
+ workdir = g2.setup_working_dir()
+ # Register only when change_detector is not set to
+ # xsync, else agent will generate changelog files
+ # in .processing directory of working dir
+ if gconf.get("change-detector") != 'xsync':
+ # register with the changelog library
+ # 9 == log level (DEBUG)
+ # 5 == connection retries
+ libgfchangelog.register(rconf.args.local_path,
+ workdir,
+ gconf.get("changelog-log-file"),
+ get_changelog_log_level(
+ gconf.get("changelog-log-level")),
+ g2.CHANGELOG_CONN_RETRIES)
+
+ register_time = int(time.time())
+ g2.register(register_time, status)
+ g3.register(register_time, status)
+ except ChangelogException as e:
+ logging.error(lf("Changelog register failed", error=e))
+ sys.exit(1)
+
+ g1.register(status=status)
+ logging.info(lf("Register time",
+ time=register_time))
+ # oneshot: Try to use changelog history api, if not
+ # available switch to FS crawl
+ # Note: if config.change_detector is xsync then
+ # it will not use changelog history api
+ try:
+ g3.crawlwrap(oneshot=True)
+ except PartialHistoryAvailable as e:
+ logging.info(lf('Partial history available, using xsync crawl'
+ ' after consuming history',
+ till=e))
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogHistoryNotAvailable:
+ logging.info('Changelog history not available, using xsync')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except NoStimeAvailable:
+ logging.info('No stime available, using xsync crawl')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogException as e:
+ logging.error(lf("Changelog History Crawl failed",
+ error=e))
+ sys.exit(1)
- def tarssh(self, files):
- return sup(self, files, self.slavedir)
+ try:
+ g2.crawlwrap()
+ except ChangelogException as e:
+ logging.error(lf("Changelog crawl failed", error=e))
+ sys.exit(1)
-class SSH(AbstractUrl, SlaveRemote):
+class SSH(object):
"""scheme class for ssh:// urls
@@ -1365,13 +1327,9 @@ class SSH(AbstractUrl, SlaveRemote):
implementing an ssh based proxy
"""
- def __init__(self, path):
- self.remote_addr, inner_url = sup(self, path,
- '^((?:%s@)?%s):(.+)' %
- tuple([r.pattern
- for r in (UserRX, HostRX)]))
- self.inner_rsc = parse_url(inner_url)
- self.volume = inner_url[1:]
+ def __init__(self, host, volume):
+ self.remote_addr = host
+ self.volume = volume
@staticmethod
def parse_ssh_address(self):
@@ -1383,35 +1341,28 @@ class SSH(AbstractUrl, SlaveRemote):
self.remotehost = h
return {'user': u, 'host': h}
- def canonical_path(self):
- rap = self.parse_ssh_address(self)
- remote_addr = '@'.join([rap['user'], gethostbyname(rap['host'])])
- return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return False
-
- def start_fd_client(self, *a, **opts):
- """customizations for client startup
+ def start_fd_client(self, i, o):
+ """set up RePCe client, handshake with server
- - be a no-op if we are to daemonize (client startup is deferred
- to post-daemon stage)
- - determine target url for rsync after consulting server
+ It's cut out as a separate method to let
+ subclasses hook into client startup
"""
- if opts.get('deferred'):
- return a
- sup(self, *a)
- ityp = type(self.inner_rsc)
- if ityp == FILE:
- slavepath = self.inner_rsc.path
- elif ityp == GLUSTER:
- slavepath = "/proc/%d/cwd" % self.server.pid()
- else:
- raise NotImplementedError
+ self.server = RepceClient(i, o)
+ rv = self.server.__version__()
+ exrv = {'proto': repce.repce_version, 'object': Server.version()}
+ da0 = (rv, exrv)
+ da1 = ({}, {})
+ for i in range(2):
+ for k, v in da0[i].items():
+ da1[i][k] = int(v)
+ if da1[0] != da1[1]:
+ raise GsyncdError(
+ "RePCe major version mismatch: local %s, remote %s" %
+ (exrv, rv))
+ slavepath = "/proc/%d/cwd" % self.server.pid()
self.slaveurl = ':'.join([self.remote_addr, slavepath])
- def connect_remote(self, go_daemon=None):
+ def connect_remote(self):
"""connect to inner slave url through outer ssh url
Wrap the connecting utility in ssh.
@@ -1429,41 +1380,204 @@ class SSH(AbstractUrl, SlaveRemote):
[NB. ATM gluster product does not makes use of interactive
authentication.]
"""
- if go_daemon == 'done':
- return self.start_fd_client(*self.fd_pair)
-
syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'),
self.remote_addr,
- self.inner_rsc.url)
-
- deferred = go_daemon == 'postconn'
- ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args +
- [self.remote_addr],
- slave=self.inner_rsc.url, deferred=deferred)
-
- if deferred:
- # send a message to peer so that we can wait for
- # the answer from which we know connection is
- # established and we can proceed with daemonization
- # (doing that too early robs the ssh passwd prompt...)
- # However, we'd better not start the RepceClient
- # before daemonization (that's not preserved properly
- # in daemon), we just do a an ad-hoc linear put/get.
- i, o = ret
- inf = os.fdopen(i)
- repce.send(o, None, '__repce_version__')
- select((inf,), (), ())
- repce.recv(inf)
- # hack hack hack: store a global reference to the file
- # to save it from getting GC'd which implies closing it
- gconf.permanent_handles.append(inf)
- self.fd_pair = (i, o)
- return 'should'
-
- def rsync(self, files):
- return sup(self, files, '-e',
- " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args),
- *(gconf.rsync_ssh_options.split() + [self.slaveurl]))
-
- def tarssh(self, files):
- return sup(self, files, self.slaveurl)
+ self.volume)
+
+ logging.info("Initializing SSH connection between master and slave...")
+ t0 = time.time()
+
+ extra_opts = []
+ remote_gsyncd = gconf.get("remote-gsyncd")
+ if remote_gsyncd == "":
+ remote_gsyncd = "/nonexistent/gsyncd"
+
+ if gconf.get("use-rsync-xattrs"):
+ extra_opts.append('--use-rsync-xattrs')
+
+ args_to_slave = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + [self.remote_addr] + \
+ [remote_gsyncd, "slave"] + \
+ extra_opts + \
+ [rconf.args.master, rconf.args.slave] + \
+ [
+ '--master-node', rconf.args.local_node,
+ '--master-node-id', rconf.args.local_node_id,
+ '--master-brick', rconf.args.local_path,
+ '--local-node', rconf.args.resource_remote,
+ '--local-node-id', rconf.args.resource_remote_id] + \
+ [
+ # Add all config arguments here, slave gsyncd will not use
+ # config file in slave side, so all overriding options should
+ # be sent as arguments
+ '--slave-timeout', str(gconf.get("slave-timeout")),
+ '--slave-log-level', gconf.get("slave-log-level"),
+ '--slave-gluster-log-level',
+ gconf.get("slave-gluster-log-level"),
+ '--slave-gluster-command-dir',
+ gconf.get("slave-gluster-command-dir"),
+ '--master-dist-count',
+ str(gconf.get("master-distribution-count"))]
+
+ if gconf.get("slave-access-mount"):
+ args_to_slave.append('--slave-access-mount')
+
+ if rconf.args.debug:
+ args_to_slave.append('--debug')
+
+ po = Popen(args_to_slave,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ rconf.transport = po
+ self.start_fd_client(po.stdout, po.stdin)
+ logging.info(lf("SSH connection between master and slave established.",
+ duration="%.4f" % (time.time() - t0)))
+
+ def rsync(self, files, *args, **kw):
+ """invoke rsync"""
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+
+ extra_rsync_flags = []
+ # Performance flag, --ignore-missing-args, if rsync version is
+ # greater than 3.1.0 then include this flag.
+ if gconf.get("rsync-opt-ignore-missing-args") and \
+ get_rsync_version(gconf.get("rsync-command")) >= "3.1.0":
+ extra_rsync_flags = ["--ignore-missing-args"]
+
+ rsync_ssh_opts = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + \
+ gconf.get("rsync-ssh-options").split()
+
+ argv = [
+ gconf.get("rsync-command"),
+ '-aR0',
+ '--inplace',
+ '--files-from=-',
+ '--super',
+ '--stats',
+ '--numeric-ids',
+ '--no-implied-dirs'
+ ]
+
+ if gconf.get("rsync-opt-existing"):
+ argv += ["--existing"]
+
+ if gconf.get("sync-xattrs"):
+ argv += ['--xattrs']
+
+ if gconf.get("sync-acls"):
+ argv += ['--acls']
+
+ argv = argv + \
+ gconf.get("rsync-options").split() + \
+ extra_rsync_flags + ['.'] + \
+ ["-e", " ".join(rsync_ssh_opts)] + \
+ [self.slaveurl]
+
+ log_rsync_performance = gconf.getr("log-rsync-performance", False)
+
+ if log_rsync_performance:
+ # use stdout=PIPE only when log_rsync_performance enabled
+ # Else rsync will write to stdout and nobody is there
+ # to consume. If PIPE is full rsync hangs.
+ po = Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, universal_newlines=True)
+ else:
+ po = Popen(argv, stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+
+ for f in files:
+ po.stdin.write(f)
+ po.stdin.write('\0')
+
+ stdout, stderr = po.communicate()
+
+ if kw.get("log_err", False):
+ for errline in stderr.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Rsync",
+ error=errline))
+
+ if log_rsync_performance:
+ rsync_msg = []
+ for line in stdout.split("\n"):
+ if line.startswith("Number of files:") or \
+ line.startswith("Number of regular files transferred:") or \
+ line.startswith("Total file size:") or \
+ line.startswith("Total transferred file size:") or \
+ line.startswith("Literal data:") or \
+ line.startswith("Matched data:") or \
+ line.startswith("Total bytes sent:") or \
+ line.startswith("Total bytes received:") or \
+ line.startswith("sent "):
+ rsync_msg.append(line)
+ logging.info(lf("rsync performance",
+ data=", ".join(rsync_msg)))
+
+ return po
+
+ def tarssh(self, files, log_err=False):
+ """invoke tar+ssh
+ -z (compress) can be use if needed, but omitting it now
+ as it results in weird error (tar+ssh errors out (errcode: 2)
+ """
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+ (host, rdir) = self.slaveurl.split(':')
+
+ tar_cmd = ["tar"] + \
+ ["--sparse", "-cf", "-", "--files-from", "-"]
+ ssh_cmd = gconf.get("ssh-command").split() + \
+ gconf.get("ssh-options-tar").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [host, "tar"] + \
+ ["--overwrite", "-xf", "-", "-C", rdir]
+ p0 = Popen(tar_cmd, stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ for f in files:
+ p0.stdin.write(f)
+ p0.stdin.write('\n')
+
+ p0.stdin.close()
+ p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
+
+ # stdin and stdout of p0 is already closed, Reset to None and
+ # wait for child process to complete
+ p0.stdin = None
+ p0.stdout = None
+
+ def wait_for_tar(p0):
+ _, stderr = p0.communicate()
+ if log_err:
+ for errline in stderr.strip().split("\n")[:-1]:
+ if "No such file or directory" not in errline:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ t = syncdutils.Thread(target=wait_for_tar, args=(p0, ))
+ # wait for tar to terminate, collecting any errors, further
+ # waiting for transfer to complete
+ t.start()
+
+ # wait for ssh process
+ _, stderr1 = p1.communicate()
+ t.join()
+
+ if log_err:
+ for errline in stderr1.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ return p1
diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
new file mode 100644
index 00000000000..b8508532e30
--- /dev/null
+++ b/geo-replication/syncdaemon/subcmds.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+from syncdutils import lf
+import logging
+import gsyncdconfig as gconf
+
+
+ERROR_CONFIG_INVALID = 2
+ERROR_CONFIG_INVALID_VALUE = 3
+ERROR_CONFIG_NOT_CONFIGURABLE = 4
+
+
+def subcmd_monitor_status(args):
+ from gsyncdstatus import set_monitor_status
+ from rconf import rconf
+
+ set_monitor_status(gconf.get("state-file"), args.status)
+ rconf.log_exit = False
+ logging.info(lf("Monitor Status Change", status=args.status))
+
+
+def subcmd_status(args):
+ from gsyncdstatus import GeorepStatus
+
+ master_name = args.master.replace(":", "")
+ slave_data = args.slave.replace("ssh://", "")
+
+ brick_status = GeorepStatus(gconf.get("state-file"),
+ "",
+ args.local_path,
+ "",
+ master_name,
+ slave_data,
+ gconf.get("pid-file"))
+ checkpoint_time = gconf.get("checkpoint", 0)
+ brick_status.print_status(checkpoint_time=checkpoint_time,
+ json_output=args.json)
+
+
+def subcmd_monitor(args):
+ import monitor
+ from resource import GLUSTER, SSH, Popen
+ go_daemon = False if args.debug else True
+
+ monitor.startup(go_daemon)
+ Popen.init_errhandler()
+ local = GLUSTER("localhost", args.master)
+ slavehost, slavevol = args.slave.split("::")
+ remote = SSH(slavehost, slavevol)
+ return monitor.monitor(local, remote)
+
+
+def subcmd_verify_spawning(args):
+ logging.info("Able to spawn gsyncd.py")
+
+
+def subcmd_worker(args):
+ import os
+ import fcntl
+
+ from resource import GLUSTER, SSH, Popen
+
+ Popen.init_errhandler()
+ fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ local = GLUSTER("localhost", args.master)
+ slave_url, slavevol = args.slave.split("::")
+ if "@" not in slave_url:
+ slavehost = args.resource_remote
+ else:
+ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
+ remote = SSH(slavehost, slavevol)
+ remote.connect_remote()
+ local.connect()
+ logging.info("Worker spawn successful. Acknowledging back to monitor")
+ os.close(args.feedback_fd)
+ local.service_loop(remote)
+
+
+def subcmd_slave(args):
+ from resource import GLUSTER, Popen
+
+ Popen.init_errhandler()
+ slavevol = args.slave.split("::")[-1]
+ local = GLUSTER("localhost", slavevol)
+
+ local.connect()
+ local.service_loop()
+
+
+def subcmd_voluuidget(args):
+ from subprocess import Popen, PIPE
+ import xml.etree.ElementTree as XET
+
+ ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+ cmd = ['gluster', '--xml', '--remote-host=' + args.host,
+ 'volume', 'info', args.volname]
+
+ if args.inet6:
+ cmd.append("--inet6")
+
+ po = Popen(cmd, bufsize=0,
+ stdin=None, stdout=PIPE, stderr=PIPE,
+ universal_newlines=True)
+
+ vix, err = po.communicate()
+ if po.returncode != 0:
+ logging.info(lf("Volume info failed, unable to get "
+ "volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=po.returncode))
+ return ""
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ logging.info(lf("Unable to get volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=vi.find('opErrstr').text))
+ return ""
+
+ try:
+ voluuid = vi.find("volInfo/volumes/volume/id").text
+ except (ParseError, AttributeError, ValueError) as e:
+ logging.info(lf("Parsing failed to volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=e))
+ voluuid = ""
+
+ print(voluuid)
+
+
+def _unlink(path):
+ import os
+ from errno import ENOENT
+ from syncdutils import GsyncdError
+ import sys
+
+ try:
+ os.unlink(path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Unlink error: %s' % path)
+
+
+def subcmd_delete(args):
+ import logging
+ import shutil
+ import glob
+ import sys
+ from errno import ENOENT, ENODATA
+ import struct
+
+ from syncdutils import GsyncdError, Xattr, errno_wrap
+ import gsyncdconfig as gconf
+
+ logging.info('geo-replication delete')
+ # remove the stime xattr from all the brick paths so that
+ # a re-create of a session will start sync all over again
+ stime_xattr_prefix = gconf.get('stime-xattr-prefix', None)
+
+ # Delete pid file, status file, socket file
+ cleanup_paths = []
+ cleanup_paths.append(gconf.get("pid-file"))
+
+ # Cleanup Session dir
+ try:
+ shutil.rmtree(gconf.get("georep-session-working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("georep-session-working-dir"))
+
+ # Cleanup changelog working dirs
+ try:
+ shutil.rmtree(gconf.get("working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("working-dir"))
+
+ for path in cleanup_paths:
+ # To delete temp files
+ for f in glob.glob(path + "*"):
+ _unlink(f)
+
+ if args.reset_sync_time and stime_xattr_prefix:
+ for p in args.paths:
+ if p != "":
+ # set stime to (0,0) to trigger full volume content resync
+ # to slave on session recreation
+ # look at master.py::Xcrawl hint: zero_zero
+ errno_wrap(Xattr.lsetxattr,
+ (p, stime_xattr_prefix + ".stime",
+ struct.pack("!II", 0, 0)),
+ [ENOENT, ENODATA])
+ errno_wrap(Xattr.lremovexattr,
+ (p, stime_xattr_prefix + ".entry_stime"),
+ [ENOENT, ENODATA])
+
+ return
+
+
+def print_config(name, value, only_value=False, use_underscore=False):
+ val = value
+ if isinstance(value, bool):
+ val = str(value).lower()
+
+ if only_value:
+ print(val)
+ else:
+ if use_underscore:
+ name = name.replace("-", "_")
+
+ print(("%s:%s" % (name, val)))
+
+
+def config_name_format(val):
+ return val.replace("_", "-")
+
+
+def subcmd_config_get(args):
+ import sys
+ import json
+
+ all_config = gconf.getall(show_defaults=args.show_defaults,
+ show_non_configurable=True)
+ if args.name is not None:
+ val = all_config.get(config_name_format(args.name), None)
+ if val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ print_config(args.name, val["value"], only_value=args.only_value,
+ use_underscore=args.use_underscore)
+ return
+
+ if args.json:
+ out = []
+ # Convert all values as string
+ for k in sorted(all_config):
+ v = all_config[k]
+ out.append({
+ "name": k,
+ "value": str(v["value"]),
+ "default": str(v["default"]),
+ "configurable": v["configurable"],
+ "modified": v["modified"]
+ })
+
+ print((json.dumps(out)))
+ return
+
+ for k in sorted(all_config):
+ print_config(k, all_config[k]["value"],
+ use_underscore=args.use_underscore)
+
+
+def subcmd_config_check(args):
+ import sys
+
+ try:
+ gconf.check(config_name_format(args.name), value=args.value,
+ with_conffile=False)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_set(args):
+ import sys
+
+ try:
+ gconf.setconfig(config_name_format(args.name), args.value)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_reset(args):
+ import sys
+
+ try:
+ gconf.resetconfig(config_name_format(args.name))
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 9eda6044472..a3df103e76c 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -15,53 +15,105 @@ import time
import fcntl
import shutil
import logging
+import errno
+import threading
+import subprocess
import socket
+from subprocess import PIPE
from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED
-from errno import EINTR, ENOENT, EPERM, ESTALE, errorcode
+from errno import (EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED,
+ EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO)
from signal import signal, SIGTERM
import select as oselect
from os import waitpid as owaitpid
-
+import xml.etree.ElementTree as XET
+from select import error as SelectError
try:
from cPickle import PickleError
except ImportError:
- # py 3
from pickle import PickleError
-from gconf import gconf
-
+from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+EVENTS_ENABLED = True
try:
- # py 3
- from urllib import parse as urllib
+ from gfevents.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY
+ from gfevents.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE
+ from gfevents.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE
+ from gfevents.eventtypes import GEOREP_CHECKPOINT_COMPLETED \
+ as EVENT_GEOREP_CHECKPOINT_COMPLETED
except ImportError:
- import urllib
+ # Events APIs not installed, dummy eventtypes with None
+ EVENTS_ENABLED = False
+ EVENT_GEOREP_FAULTY = None
+ EVENT_GEOREP_ACTIVE = None
+ EVENT_GEOREP_PASSIVE = None
+ EVENT_GEOREP_CHECKPOINT_COMPLETED = None
-try:
- from hashlib import md5 as md5
-except ImportError:
- # py 2.4
- from md5 import new as md5
+import gsyncdconfig as gconf
+from rconf import rconf
-# auxillary gfid based access prefix
+from hashlib import sha256 as sha256
+
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
+# auxiliary gfid based access prefix
_CL_AUX_GFID_PFX = ".gfid/"
-GF_OP_RETRIES = 20
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+GF_OP_RETRIES = 10
+
+GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
+NodeID = None
+rsync_version = None
+unshare_mnt_propagation = None
+slv_bricks = None
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
-CHANGELOG_AGENT_SERVER_VERSION = 1.0
-CHANGELOG_AGENT_CLIENT_VERSION = 1.0
+final_lock = Lock()
+
+def sup(x, *a, **kw):
+ """a rubyesque "super" for python ;)
+
+ invoke caller method in parent class with given args.
+ """
+ return getattr(super(type(x), x),
+ sys._getframe(1).f_code.co_name)(*a, **kw)
def escape(s):
"""the chosen flavor of string escaping, used all over
to turn whatever data to creatable representation"""
- return urllib.quote_plus(s)
+ return s.replace("/", "-").strip("-")
-def unescape(s):
- """inverse of .escape"""
- return urllib.unquote_plus(s)
+def escape_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
+def unescape_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+# gf_mount_ready() returns 1 if all subvols are up, else 0
+def gf_mount_ready():
+ ret = errno_wrap(Xattr.lgetxattr,
+ ['.', 'dht.subvol.status', 16],
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
+
+ if isinstance(ret, int):
+ logging.error("failed to get the xattr value")
+ return 1
+ ret = ret.rstrip('\x00')
+ if ret == "1":
+ return 1
+ return 0
+
def norm(s):
if s:
return s.replace('-', '_')
@@ -119,17 +171,26 @@ def setup_ssh_ctl(ctld, remote_addr, resource_url):
"""
Setup GConf ssh control path parameters
"""
- gconf.ssh_ctl_dir = ctld
+ rconf.ssh_ctl_dir = ctld
content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr,
resource_url)
- content_md5 = md5hex(content)
- fname = os.path.join(gconf.ssh_ctl_dir,
- "%s.mft" % content_md5)
+ encoded_content = content.encode()
+ content_sha256 = sha256hex(encoded_content)
+ """
+ The length of ctl_path for ssh connection should not be > 108.
+ ssh fails with ctl_path too long if it is so. But when rsync
+ is piped to ssh, it is not taking > 90. Hence using first 32
+ bytes of hash. Hash collision doesn't matter as only one sock
+ file is created per directory.
+ """
+ content_sha256 = content_sha256[:32]
+ fname = os.path.join(rconf.ssh_ctl_dir,
+ "%s.mft" % content_sha256)
- create_manifest(fname, content)
- ssh_ctl_path = os.path.join(gconf.ssh_ctl_dir,
- "%s.sock" % content_md5)
- gconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
+ create_manifest(fname, encoded_content)
+ ssh_ctl_path = os.path.join(rconf.ssh_ctl_dir,
+ "%s.sock" % content_sha256)
+ rconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
def grabfile(fname, content=None):
@@ -139,7 +200,7 @@ def grabfile(fname, content=None):
"""
# damn those messy open() mode codes
fd = os.open(fname, os.O_CREAT | os.O_RDWR)
- f = os.fdopen(fd, 'r+b', 0)
+ f = os.fdopen(fd, 'r+')
try:
fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
except:
@@ -153,34 +214,34 @@ def grabfile(fname, content=None):
try:
f.truncate()
f.write(content)
+ f.flush()
except:
f.close()
raise
- gconf.permanent_handles.append(f)
+ rconf.permanent_handles.append(f)
return f
def grabpidfile(fname=None, setpid=True):
""".grabfile customization for pid files"""
if not fname:
- fname = gconf.pid_file
+ fname = gconf.get("pid-file")
content = None
if setpid:
content = str(os.getpid()) + '\n'
return grabfile(fname, content=content)
-final_lock = Lock()
-
-def finalize(*a, **kw):
+def finalize(*args, **kwargs):
"""all those messy final steps we go trough upon termination
Do away with pidfile, ssh control dir and logging.
"""
+
final_lock.acquire()
- if getattr(gconf, 'pid_file', None):
- rm_pidf = gconf.pid_file_owned
- if gconf.cpid:
+ if gconf.get('pid_file'):
+ rm_pidf = rconf.pid_file_owned
+ if rconf.cpid:
# exit path from parent branch of daemonization
rm_pidf = False
while True:
@@ -188,33 +249,50 @@ def finalize(*a, **kw):
if not f:
# child has already taken over pidfile
break
- if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
+ if os.waitpid(rconf.cpid, os.WNOHANG)[0] == rconf.cpid:
# child has terminated
rm_pidf = True
break
time.sleep(0.1)
if rm_pidf:
try:
- os.unlink(gconf.pid_file)
+ os.unlink(rconf.pid_file)
except:
ex = sys.exc_info()[1]
if ex.errno == ENOENT:
pass
else:
raise
- if gconf.ssh_ctl_dir and not gconf.cpid:
- shutil.rmtree(gconf.ssh_ctl_dir)
- if getattr(gconf, 'state_socket', None):
- try:
- os.unlink(gconf.state_socket)
- except:
- if sys.exc_info()[0] == OSError:
+ if rconf.ssh_ctl_dir and not rconf.cpid:
+ def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+ raise exc_info[1]
+
+ shutil.rmtree(rconf.ssh_ctl_dir, onerror=handle_rm_error)
+
+ """ Unmount if not done """
+ if rconf.mount_point:
+ if rconf.mountbroker:
+ umount_cmd = rconf.mbr_umount_cmd + [rconf.mount_point, 'lazy']
+ else:
+ umount_cmd = ['umount', '-l', rconf.mount_point]
+ p0 = subprocess.Popen(umount_cmd, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ _, errdata = p0.communicate()
+ if p0.returncode == 0:
+ try:
+ os.rmdir(rconf.mount_point)
+ except OSError:
pass
- if gconf.log_exit:
+ else:
+ pass
+
+ if rconf.log_exit:
logging.info("exiting.")
sys.stdout.flush()
sys.stderr.flush()
- os._exit(kw.get('exval', 0))
+ os._exit(kwargs.get('exval', 0))
def log_raise_exception(excont):
@@ -224,6 +302,7 @@ def log_raise_exception(excont):
Translate some weird sounding but well understood exceptions
into human-friendly lingo
"""
+
is_filelog = False
for h in logging.getLogger().handlers:
fno = getattr(getattr(h, 'stream', None), 'fileno', None)
@@ -244,31 +323,45 @@ def log_raise_exception(excont):
((isinstance(exc, OSError) or isinstance(exc, IOError)) and
exc.errno == EPIPE):
logging.error('connection to peer is broken')
- if hasattr(gconf, 'transport'):
- gconf.transport.wait()
- if gconf.transport.returncode == 127:
- logging.warn("!!!!!!!!!!!!!")
- logging.warn('!!! getting "No such file or directory" '
- "errors is most likely due to "
- "MISCONFIGURATION"
- ", please consult https://access.redhat.com"
- "/site/documentation/en-US/Red_Hat_Storage"
- "/2.1/html/Administration_Guide"
- "/chap-User_Guide-Geo_Rep-Preparation-"
- "Settingup_Environment.html")
- logging.warn("!!!!!!!!!!!!!")
- gconf.transport.terminate_geterr()
+ if hasattr(rconf, 'transport'):
+ rconf.transport.wait()
+ if rconf.transport.returncode == 127:
+ logging.error("getting \"No such file or directory\""
+ "errors is most likely due to "
+ "MISCONFIGURATION, please remove all "
+ "the public keys added by geo-replication "
+ "from authorized_keys file in slave nodes "
+ "and run Geo-replication create "
+ "command again.")
+ logging.error("If `gsec_create container` was used, then "
+ "run `gluster volume geo-replication "
+ "<MASTERVOL> [<SLAVEUSER>@]<SLAVEHOST>::"
+ "<SLAVEVOL> config remote-gsyncd "
+ "<GSYNCD_PATH> (Example GSYNCD_PATH: "
+ "`/usr/libexec/glusterfs/gsyncd`)")
+ rconf.transport.terminate_geterr()
elif isinstance(exc, OSError) and exc.errno in (ENOTCONN,
ECONNABORTED):
- logging.error('glusterfs session went down [%s]',
- errorcode[exc.errno])
+ logging.error(lf('Gluster Mount process exited',
+ error=errorcode[exc.errno]))
+ elif isinstance(exc, OSError) and exc.errno == EIO:
+ logging.error("Getting \"Input/Output error\" "
+ "is most likely due to "
+ "a. Brick is down or "
+ "b. Split brain issue.")
+ logging.error("This is expected as per design to "
+ "keep the consistency of the file system. "
+ "Once the above issue is resolved "
+ "geo-replication would automatically "
+ "proceed further.")
+ logtag = "FAIL"
else:
logtag = "FAIL"
if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
logtag = "FULL EXCEPTION TRACE"
if logtag:
logging.exception(logtag + ": ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
+ sys.stderr.write("failed with %s: %s.\n" % (type(exc).__name__, exc))
excont.exval = 1
sys.exit(excont.exval)
@@ -291,20 +384,20 @@ class Thread(baseThread):
function coughs up an exception
"""
- def __init__(self, *a, **kw):
- tf = kw.get('target')
+ def __init__(self, *args, **kwargs):
+ tf = kwargs.get('target')
if tf:
- def twrap(*aa):
+ def twrap(*aargs):
excont = FreeObject(exval=0)
try:
- tf(*aa)
+ tf(*aargs)
except:
try:
log_raise_exception(excont)
finally:
finalize(exval=excont.exval)
- kw['target'] = twrap
- baseThread.__init__(self, *a, **kw)
+ kwargs['target'] = twrap
+ baseThread.__init__(self, *args, **kwargs)
self.setDaemon(True)
@@ -312,6 +405,33 @@ class GsyncdError(Exception):
pass
+class _MetaXattr(object):
+
+ """singleton class, a lazy wrapper around the
+ libcxattr module
+
+ libcxattr (a heavy import due to ctypes) is
+ loaded only when when the single
+ instance is tried to be used.
+
+ This reduces runtime for those invocations
+ which do not need filesystem manipulation
+ (eg. for config, url parsing)
+ """
+
+ def __getattr__(self, meth):
+ from libcxattr import Xattr as LXattr
+ xmeth = [m for m in dir(LXattr) if m[0] != '_']
+ if meth not in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LXattr, m))
+ return getattr(self, meth)
+
+
+Xattr = _MetaXattr()
+
+
def getusername(uid=None):
if uid is None:
uid = os.geteuid()
@@ -342,81 +462,63 @@ def boolify(s):
lstr = s.lower()
if lstr in true_list:
rv = True
- elif not lstr in false_list:
- logging.warn("Unknown string (%s) in string to boolean conversion "
- "defaulting to False\n" % (s))
+ elif lstr not in false_list:
+ logging.warn(lf("Unknown string in \"string to boolean\" conversion, "
+ "defaulting to False",
+ str=s))
return rv
-def eintr_wrap(func, exc, *a):
+def eintr_wrap(func, exc, *args):
"""
wrapper around syscalls resilient to interrupt caused
by signals
"""
while True:
try:
- return func(*a)
+ return func(*args)
except exc:
ex = sys.exc_info()[1]
if not ex.args[0] == EINTR:
raise
-def select(*a):
- return eintr_wrap(oselect.select, oselect.error, *a)
+def select(*args):
+ return eintr_wrap(oselect.select, oselect.error, *args)
+
+def waitpid(*args):
+ return eintr_wrap(owaitpid, OSError, *args)
-def waitpid(*a):
- return eintr_wrap(owaitpid, OSError, *a)
+def term_handler_default_hook(signum, frame):
+ finalize(signum, frame, exval=1)
-def set_term_handler(hook=lambda *a: finalize(*a, **{'exval': 1})):
+
+def set_term_handler(hook=term_handler_default_hook):
signal(SIGTERM, hook)
-def is_host_local(host):
- locaddr = False
- for ai in socket.getaddrinfo(host, None):
- # cf. http://github.com/gluster/glusterfs/blob/ce111f47/xlators
- # /mgmt/glusterd/src/glusterd-utils.c#L125
- if ai[0] == socket.AF_INET:
- if ai[-1][0].split(".")[0] == "127":
- locaddr = True
- break
- elif ai[0] == socket.AF_INET6:
- if ai[-1][0] == "::1":
- locaddr = True
+def get_node_uuid():
+ global NodeID
+ if NodeID is not None:
+ return NodeID
+
+ NodeID = ""
+ with open(UUID_FILE) as f:
+ for line in f:
+ if line.startswith("UUID="):
+ NodeID = line.strip().split("=")[-1]
break
- else:
- continue
- try:
- # use ICMP socket to avoid net.ipv4.ip_nonlocal_bind issue,
- # cf. https://bugzilla.redhat.com/show_bug.cgi?id=890587
- s = socket.socket(ai[0], socket.SOCK_RAW, socket.IPPROTO_ICMP)
- except socket.error:
- ex = sys.exc_info()[1]
- if ex.errno != EPERM:
- raise
- f = None
- try:
- f = open("/proc/sys/net/ipv4/ip_nonlocal_bind")
- if int(f.read()) != 0:
- raise GsyncdError(
- "non-local bind is set and not allowed to create "
- "raw sockets, cannot determine if %s is local" % host)
- s = socket.socket(ai[0], socket.SOCK_DGRAM)
- finally:
- if f:
- f.close()
- try:
- s.bind(ai[-1])
- locaddr = True
- break
- except:
- pass
- s.close()
- return locaddr
+
+ if NodeID == "":
+ raise GsyncdError("Failed to get Host UUID from %s" % UUID_FILE)
+ return NodeID
+
+
+def is_host_local(host_id):
+ return host_id == get_node_uuid()
def funcode(f):
@@ -452,17 +554,16 @@ def gauxpfx():
return _CL_AUX_GFID_PFX
-def md5hex(s):
- return md5(s).hexdigest()
+def sha256hex(s):
+ return sha256(s).hexdigest()
def selfkill(sig=SIGTERM):
os.kill(os.getpid(), sig)
-def errno_wrap(call, arg=[], errnos=[], retry_errnos=[ESTALE]):
+def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
""" wrapper around calls resilient to errnos.
- retry in case of ESTALE by default.
"""
nr_tries = 0
while True:
@@ -472,28 +573,39 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[ESTALE]):
ex = sys.exc_info()[1]
if ex.errno in errnos:
return ex.errno
- if not ex.errno in retry_errnos:
+ if ex.errno not in retry_errnos:
raise
nr_tries += 1
if nr_tries == GF_OP_RETRIES:
# probably a screwed state, cannot do much...
- logging.warn('reached maximum retries (%s)...' % repr(arg))
- return
+ logging.warn(lf('reached maximum retries',
+ args=repr(arg),
+ error=ex))
+ raise
time.sleep(0.250) # retry the call
def lstat(e):
- try:
- return os.lstat(e)
- except (IOError, OSError):
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- return ex.errno
- else:
- raise
+ return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+
+def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+ GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE])
+
+def matching_disk_gfid(gfid, entry):
+ disk_gfid = get_gfid_from_mnt(entry)
+ if isinstance(disk_gfid, int):
+ return False
-class NoPurgeTimeAvailable(Exception):
+ if not gfid == disk_gfid:
+ return False
+
+ return True
+
+
+class NoStimeAvailable(Exception):
pass
@@ -501,5 +613,503 @@ class PartialHistoryAvailable(Exception):
pass
+class ChangelogHistoryNotAvailable(Exception):
+ pass
+
+
class ChangelogException(OSError):
pass
+
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED:
+ from gfevents.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
+
+
+class GlusterLogLevel(object):
+ NONE = 0
+ EMERG = 1
+ ALERT = 2
+ CRITICAL = 3
+ ERROR = 4
+ WARNING = 5
+ NOTICE = 6
+ INFO = 7
+ DEBUG = 8
+ TRACE = 9
+
+
+def get_changelog_log_level(lvl):
+ return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO)
+
+
+def get_master_and_slave_data_from_args(args):
+ master_name = None
+ slave_data = None
+ for arg in args:
+ if arg.startswith(":"):
+ master_name = arg.replace(":", "")
+ if "::" in arg:
+ slave_data = arg.replace("ssh://", "")
+
+ return (master_name, slave_data)
+
+def unshare_propagation_supported():
+ global unshare_mnt_propagation
+ if unshare_mnt_propagation is not None:
+ return unshare_mnt_propagation
+
+ unshare_mnt_propagation = False
+ p = subprocess.Popen(["unshare", "--help"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ if "propagation" in out:
+ unshare_mnt_propagation = True
+
+ return unshare_mnt_propagation
+
+
+def get_rsync_version(rsync_cmd):
+ global rsync_version
+ if rsync_version is not None:
+ return rsync_version
+
+ rsync_version = "0"
+ p = subprocess.Popen([rsync_cmd, "--version"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ rsync_version = out.split(" ", 4)[3]
+
+ return rsync_version
+
+
+def get_slv_dir_path(slv_host, slv_volume, gfid):
+ global slv_bricks
+
+ dir_path = ENOENT
+ pfx = gauxpfx()
+
+ if not slv_bricks:
+ slv_info = Volinfo(slv_volume, slv_host, master=False)
+ slv_bricks = slv_info.bricks
+ # Result of readlink would be of format as below.
+ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
+ for brick in slv_bricks:
+ dir_path = errno_wrap(os.path.join,
+ [brick['dir'],
+ ".glusterfs", gfid[0:2],
+ gfid[2:4],
+ gfid], [ENOENT], [ESTALE])
+ if dir_path != ENOENT:
+ try:
+ realpath = errno_wrap(os.readlink, [dir_path],
+ [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ realpath_parts = realpath.split('/')
+ pargfid = realpath_parts[-2]
+ basename = realpath_parts[-1]
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+ except OSError:
+ # .gfid/GFID
+ gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
+ realpath = errno_wrap(Xattr.lgetxattr_buf,
+ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ basename = os.path.basename(realpath).rstrip('\x00')
+ dirpath = os.path.dirname(realpath)
+ if dirpath == "/":
+ pargfid = ROOT_GFID
+ else:
+ dirpath = dirpath.strip("/")
+ pargfid = get_gfid_from_mnt(dirpath)
+ if isinstance(pargfid, int):
+ return None
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+
+ return None
+
+
+def lf(event, **kwargs):
+ """
+ Log Format helper function, log messages can be
+ easily modified to structured log format.
+ lf("Config Change", sync_jobs=4, brick=/bricks/b1) will be
+ converted as "Config Change [{brick=/bricks/b1}, {sync_jobs=4}]"
+ """
+ msgparts = []
+ for k, v in kwargs.items():
+ msgparts.append("{%s=%s}" % (k, v))
+ return "%s [%s]" % (event, ", ".join(msgparts))
+
+
+class Popen(subprocess.Popen):
+
+ """customized subclass of subprocess.Popen with a ring
+ buffer for children error output"""
+
+ @classmethod
+ def init_errhandler(cls):
+ """start the thread which handles children's error output"""
+ cls.errstore = {}
+
+ def tailer():
+ while True:
+ errstore = cls.errstore.copy()
+ try:
+ poe, _, _ = select(
+ [po.stderr for po in errstore], [], [], 1)
+ except (ValueError, SelectError):
+ # stderr is already closed wait for some time before
+ # checking next error
+ time.sleep(0.5)
+ continue
+ for po in errstore:
+ if po.stderr not in poe:
+ continue
+ po.lock.acquire()
+ try:
+ if po.on_death_row:
+ continue
+ la = errstore[po]
+ try:
+ fd = po.stderr.fileno()
+ except ValueError: # file is already closed
+ time.sleep(0.5)
+ continue
+
+ try:
+ l = os.read(fd, 1024)
+ except OSError:
+ time.sleep(0.5)
+ continue
+
+ if not l:
+ continue
+ tots = len(l)
+ for lx in la:
+ tots += len(lx)
+ while tots > 1 << 20 and la:
+ tots -= len(la.pop(0))
+ la.append(l)
+ finally:
+ po.lock.release()
+ t = Thread(target=tailer)
+ t.start()
+ cls.errhandler = t
+
+ @classmethod
+ def fork(cls):
+ """fork wrapper that restarts errhandler thread in child"""
+ pid = os.fork()
+ if not pid:
+ cls.init_errhandler()
+ return pid
+
+ def __init__(self, args, *a, **kw):
+ """customizations for subprocess.Popen instantiation
+
+ - 'close_fds' is taken to be the default
+ - if child's stderr is chosen to be managed,
+ register it with the error handler thread
+ """
+ self.args = args
+ if 'close_fds' not in kw:
+ kw['close_fds'] = True
+ self.lock = threading.Lock()
+ self.on_death_row = False
+ self.elines = []
+ try:
+ sup(self, args, *a, **kw)
+ except:
+ ex = sys.exc_info()[1]
+ if not isinstance(ex, OSError):
+ raise
+ raise GsyncdError("""execution of "%s" failed with %s (%s)""" %
+ (args[0], errno.errorcode[ex.errno],
+ os.strerror(ex.errno)))
+ if kw.get('stderr') == subprocess.PIPE:
+ assert(getattr(self, 'errhandler', None))
+ self.errstore[self] = []
+
+ def errlog(self):
+ """make a log about child's failure event"""
+ logging.error(lf("command returned error",
+ cmd=" ".join(self.args),
+ error=self.returncode))
+ lp = ''
+
+ def logerr(l):
+ logging.error(self.args[0] + "> " + l)
+ for l in self.elines:
+ ls = l.split('\n')
+ ls[0] = lp + ls[0]
+ lp = ls.pop()
+ for ll in ls:
+ logerr(ll)
+ if lp:
+ logerr(lp)
+
+ def errfail(self):
+ """fail nicely if child did not terminate with success"""
+ self.errlog()
+ finalize(exval=1)
+
+ def terminate_geterr(self, fail_on_err=True):
+ """kill child, finalize stderr harvesting (unregister
+ from errhandler, set up .elines), fail on error if
+ asked for
+ """
+ self.lock.acquire()
+ try:
+ self.on_death_row = True
+ finally:
+ self.lock.release()
+ elines = self.errstore.pop(self)
+ if self.poll() is None:
+ self.terminate()
+ if self.poll() is None:
+ time.sleep(0.1)
+ self.kill()
+ self.wait()
+ while True:
+ if not select([self.stderr], [], [], 0.1)[0]:
+ break
+ b = os.read(self.stderr.fileno(), 1024)
+ if b:
+ elines.append(b.decode())
+ else:
+ break
+ self.stderr.close()
+ self.elines = elines
+ if fail_on_err and self.returncode != 0:
+ self.errfail()
+
+
+def host_brick_split(value):
+ """
+ IPv6 compatible way to split and get the host
+ and brick information. Example inputs:
+ node1.example.com:/exports/bricks/brick1/brick
+ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
+ """
+ parts = value.split(":")
+ brick = parts[-1]
+ hostparts = parts[0:-1]
+ return (":".join(hostparts), brick)
+
+
+class Volinfo(object):
+
+ def __init__(self, vol, host='localhost', prelude=[], master=True):
+ if master:
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ else:
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+
+ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster')
+ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host,
+ 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+ vix = po.stdout.read()
+ po.wait()
+ po.terminate_geterr()
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ if prelude:
+ via = '(via %s) ' % prelude.join(' ')
+ else:
+ via = ' '
+ raise GsyncdError('getting volume info of %s%s '
+ 'failed with errorcode %s' %
+ (vol, via, vi.find('opErrno').text))
+ self.tree = vi
+ self.volume = vol
+ self.host = host
+
+ def get(self, elem):
+ return self.tree.findall('.//' + elem)
+
+ def is_tier(self):
+ return (self.get('typeStr')[0].text == 'Tier')
+
+ def is_hot(self, brickpath):
+ logging.debug('brickpath: ' + repr(brickpath))
+ return brickpath in self.hot_bricks
+
+ @property
+ @memoize
+ def bricks(self):
+ def bparse(b):
+ host, dirp = host_brick_split(b.find("name").text)
+ return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
+ return [bparse(b) for b in self.get('brick')]
+
+ @property
+ @memoize
+ def uuid(self):
+ ids = self.get('id')
+ if len(ids) != 1:
+ raise GsyncdError("volume info of %s obtained from %s: "
+ "ambiguous uuid" % (self.volume, self.host))
+ return ids[0].text
+
+ def replica_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotreplicaCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/coldreplicaCount')[0].text)
+ else:
+ return int(self.get('replicaCount')[0].text)
+
+ def disperse_count(self, tier, hot):
+ if (tier and hot):
+ # Tiering doesn't support disperse volume as hot brick,
+ # hence no xml output, so returning 0. In case, if it's
+ # supported later, we should change here.
+ return 0
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddisperseCount')[0].text)
+ else:
+ return int(self.get('disperseCount')[0].text)
+
+ def distribution_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotdistCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddistCount')[0].text)
+ else:
+ return int(self.get('distCount')[0].text)
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return [b.text for b in self.get('hotBricks/brick')]
+
+ def get_hot_bricks_count(self, tier):
+ if (tier):
+ return int(self.get('hotBricks/hotbrickCount')[0].text)
+ else:
+ return 0
+
+
+class VolinfoFromGconf(object):
+ # Glusterd will generate following config items before Geo-rep start
+ # So that Geo-rep need not run gluster commands from inside
+ # Volinfo object API/interface kept as is so that caller need not
+ # change anything except calling this instead of Volinfo()
+ #
+ # master-bricks=
+ # master-bricks=NODEID:HOSTNAME:PATH,..
+ # slave-bricks=NODEID:HOSTNAME,..
+ # master-volume-id=
+ # slave-volume-id=
+ # master-replica-count=
+ # master-disperse_count=
+ def __init__(self, vol, host='localhost', master=True):
+ self.volume = vol
+ self.host = host
+ self.master = master
+
+ def is_tier(self):
+ return False
+
+ def is_hot(self, brickpath):
+ return False
+
+ def is_uuid(self, value):
+ try:
+ uuid.UUID(value)
+ return True
+ except ValueError:
+ return False
+
+ def possible_path(self, value):
+ return "/" in value
+
+ @property
+ @memoize
+ def bricks(self):
+ pfx = "master-" if self.master else "slave-"
+ bricks_data = gconf.get(pfx + "bricks")
+ if bricks_data is None:
+ return []
+
+ bricks_data = bricks_data.split(",")
+ bricks_data = [b.strip() for b in bricks_data]
+ out = []
+ for b in bricks_data:
+ parts = b.split(":")
+ b_uuid = None
+ if self.is_uuid(parts[0]):
+ b_uuid = parts[0]
+ # Set all parts except first
+ parts = parts[1:]
+
+ if self.possible_path(parts[-1]):
+ bpath = parts[-1]
+ # Set all parts except last
+ parts = parts[0:-1]
+
+ out.append({
+ "host": ":".join(parts), # if remaining parts are IPv6 name
+ "dir": bpath,
+ "uuid": b_uuid
+ })
+
+ return out
+
+ @property
+ @memoize
+ def uuid(self):
+ if self.master:
+ return gconf.get("master-volume-id")
+ else:
+ return gconf.get("slave-volume-id")
+
+ def replica_count(self, tier, hot):
+ return gconf.get("master-replica-count")
+
+ def disperse_count(self, tier, hot):
+ return gconf.get("master-disperse-count")
+
+ def distribution_count(self, tier, hot):
+ return gconf.get("master-distribution-count")
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return []
+
+ def get_hot_bricks_count(self, tier):
+ return 0
+
+
+def can_ssh(host, port=22):
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s.connect((host, port))
+ flag = True
+ except socket.error:
+ flag = False
+
+ s.close()
+ return flag
+
+
+def get_up_nodes(hosts, port):
+ # List of hosts with Hostname/IP and UUID
+ up_nodes = []
+ for h in hosts:
+ if can_ssh(h[0], port):
+ up_nodes.append(h)
+
+ return up_nodes