summaryrefslogtreecommitdiffstats
path: root/geo-replication
diff options
context:
space:
mode:
authorKotresh HR <khiremat@redhat.com>2017-08-08 10:12:14 -0400
committerShyamsundar Ranganathan <srangana@redhat.com>2017-08-29 12:42:31 +0000
commitb04b3dd4dc8fcff9ad44e31d1da810eba223cd08 (patch)
tree6835e658214cafdb83fd7d66f17962b0504b05cd /geo-replication
parent7e89bcd213fd69dd6da7cdb1ec0699e7b8a547d5 (diff)
geo-rep: Fix syncing of hardlink of symlink
Problem: If there is a hardlink to a symlink on master and if the symlink file is deleted on master, geo-rep fails to sync the hardlink. Typical Usecase: It's easily hit with rsnapshot use case where it uses hardlinks. Example Reproducer: Setup geo-replication between master and slave volume and in master mount point, do the following. 1. mkdir /tmp/symlinkbug 2. ln -f -s /does/not/exist /tmp/symlinkbug/a_symlink 3. rsync -a /tmp/symlinkbug ./ 4. cp -al symlinkbug symlinkbug.0 5. ln -f -s /does/not/exist2 /tmp/symlinkbug/a_symlink 6. rsync -a /tmp/symlinkbug ./ 7. cp -al symlinkbug symlinkbug.1 Cause: If the source was not present while syncing hardlink, it was always packing the blob as regular file. Fix: If the source was not present while syncing hardlink, pack the blob based on the mode. > Change-Id: Iaa12d6f99de47b18e0650e7c4eb455f23f8390f2 > BUG: 1432046 > Signed-off-by: Kotresh HR <khiremat@redhat.com> > Reported-by: Christian Lohmaier <lohmaier+rhbz@gmail.com> > Reviewed-on: https://review.gluster.org/18011 > Smoke: Gluster Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Aravinda VK <avishwan@redhat.com> (cherry picked from commit e893962deaabab8e934813f8a0443a8f94e009f2) Change-Id: Iaa12d6f99de47b18e0650e7c4eb455f23f8390f2 BUG: 1486120 Signed-off-by: Kotresh HR <khiremat@redhat.com> Reviewed-on: https://review.gluster.org/18127 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
Diffstat (limited to 'geo-replication')
-rw-r--r--geo-replication/syncdaemon/master.py26
-rw-r--r--geo-replication/syncdaemon/resource.py97
-rw-r--r--geo-replication/syncdaemon/syncdutils.py46
3 files changed, 105 insertions, 64 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index 2aae860f5d1..2987bca0601 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -24,7 +24,7 @@ from datetime import datetime
from gconf import gconf
from syncdutils import Thread, GsyncdError, boolify, escape_space_newline
from syncdutils import unescape_space_newline, gauxpfx, md5hex, selfkill
-from syncdutils import lstat, errno_wrap, FreeObject, lf
+from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
from syncdutils import NoStimeAvailable, PartialHistoryAvailable
URXTIME = (-1, 0)
@@ -792,6 +792,10 @@ class GMasterChangelogMixin(GMasterCommon):
fix_entry_ops = []
failures1 = []
for failure in failures:
+ if failure[2]['dst']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
if failure[2]['gfid_mismatch']:
slave_gfid = failure[2]['slave_gfid']
st = lstat(os.path.join(pfx, slave_gfid))
@@ -800,7 +804,6 @@ class GMasterChangelogMixin(GMasterCommon):
' the entry', retry_count=retry_count,
entry=repr(failure)))
#Add deletion to fix_entry_ops list
- pbname = failure[0]['entry']
if failure[2]['slave_isdir']:
fix_entry_ops.append(edct('RMDIR',
gfid=failure[2]['slave_gfid'],
@@ -836,7 +839,6 @@ class GMasterChangelogMixin(GMasterCommon):
' Deleting the entry',
retry_count=retry_count,
entry=repr(failure)))
- pbname = failure[0]['entry']
fix_entry_ops.append(edct('UNLINK',
gfid=failure[2]['slave_gfid'],
entry=pbname))
@@ -1024,15 +1026,27 @@ class GMasterChangelogMixin(GMasterCommon):
stat=st, link=rl))
else:
# stat() to get mode and other information
+ if not matching_disk_gfid(gfid, en):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
go = os.path.join(pfx, gfid)
st = lstat(go)
if isinstance(st, int):
- logging.debug(lf('file got purged in the interim',
- file=go))
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
continue
if ty == 'LINK':
- entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE])
+ if isinstance(rl, int):
+ rl = None
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
+ link=rl))
elif ty == 'SYMLINK':
rl = errno_wrap(os.readlink, [en], [ENOENT], [ESTALE])
if isinstance(rl, int):
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index a608944f9be..a6e351590f8 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -39,9 +39,11 @@ from syncdutils import NoStimeAvailable, PartialHistoryAvailable
from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
from syncdutils import get_changelog_log_level, get_rsync_version
from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION
+from syncdutils import GX_GFID_CANONICAL_LEN
from gsyncdstatus import GeorepStatus
from syncdutils import get_master_and_slave_data_from_args
from syncdutils import mntpt_list, lf
+from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt
UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
HostRX = re.compile('[a-zA-Z\d](?:[a-zA-Z\d.-]*[a-zA-Z\d])?', re.I)
@@ -110,33 +112,6 @@ def parse_url(ustr):
return getattr(this, sch.upper())(path)
-class _MetaXattr(object):
-
- """singleton class, a lazy wrapper around the
- libcxattr module
-
- libcxattr (a heavy import due to ctypes) is
- loaded only when when the single
- instance is tried to be used.
-
- This reduces runtime for those invocations
- which do not need filesystem manipulation
- (eg. for config, url parsing)
- """
-
- def __getattr__(self, meth):
- from libcxattr import Xattr as LXattr
- xmeth = [m for m in dir(LXattr) if m[0] != '_']
- if not meth in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LXattr, m))
- return getattr(self, meth)
-
-
-Xattr = _MetaXattr()
-
-
class Popen(subprocess.Popen):
"""customized subclass of subprocess.Popen with a ring
@@ -294,7 +269,6 @@ class Server(object):
NTV_FMTSTR = "!" + "B" * 19 + "II"
FRGN_XTRA_FMT = "I"
FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
- GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
# for backend gfid fetch, do not use GX_NSPACE_PFX
GFID_XATTR = 'trusted.gfid'
@@ -304,15 +278,15 @@ class Server(object):
@classmethod
def _fmt_mknod(cls, l):
- return "!II%dsI%dsIII" % (cls.GX_GFID_CANONICAL_LEN, l + 1)
+ return "!II%dsI%dsIII" % (GX_GFID_CANONICAL_LEN, l + 1)
@classmethod
def _fmt_mkdir(cls, l):
- return "!II%dsI%dsII" % (cls.GX_GFID_CANONICAL_LEN, l + 1)
+ return "!II%dsI%dsII" % (GX_GFID_CANONICAL_LEN, l + 1)
@classmethod
def _fmt_symlink(cls, l1, l2):
- return "!II%dsI%ds%ds" % (cls.GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
+ return "!II%dsI%ds%ds" % (GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
def _pathguard(f):
"""decorator method that checks
@@ -385,12 +359,6 @@ class Server(object):
raise
@classmethod
- def gfid_mnt(cls, gfidpath):
- return errno_wrap(Xattr.lgetxattr,
- [gfidpath, 'glusterfs.gfid.string',
- cls.GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE])
-
- @classmethod
@_pathguard
def purge(cls, path, entries=None):
"""force-delete subtrees
@@ -653,27 +621,33 @@ class Server(object):
return
if op == 'UNLINK':
- errno_wrap(os.unlink, [entry], [ENOENT, ESTALE],
- [EBUSY])
+ er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY])
+ return er
+
elif op == 'RMDIR':
er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE,
ENOTEMPTY], [EBUSY])
if er == ENOTEMPTY:
return er
- def collect_failure(e, cmd_ret):
+ def collect_failure(e, cmd_ret, dst=False):
slv_entry_info = {}
slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['dst'] = dst
# We do this for failing fops on Slave
# Master should be logging this
if cmd_ret is None:
return False
if cmd_ret == EEXIST:
- disk_gfid = cls.gfid_mnt(e['entry'])
+ if dst:
+ en = e['entry1']
+ else:
+ en = e['entry']
+ disk_gfid = get_gfid_from_mnt(en)
if isinstance(disk_gfid, basestring) and e['gfid'] != disk_gfid:
slv_entry_info['gfid_mismatch'] = True
- st = lstat(e['entry'])
+ st = lstat(en)
if not isinstance(st, int):
if st and stat.S_ISDIR(st.st_mode):
slv_entry_info['slave_isdir'] = True
@@ -690,16 +664,6 @@ class Server(object):
failures = []
- def matching_disk_gfid(gfid, entry):
- disk_gfid = cls.gfid_mnt(entry)
- if isinstance(disk_gfid, int):
- return False
-
- if not gfid == disk_gfid:
- return False
-
- return True
-
def recursive_rmdir(gfid, entry, path):
"""disk_gfid check added for original path for which
recursive_delete is called. This disk gfid check executed
@@ -738,8 +702,9 @@ class Server(object):
"with on-disk gfid",
source=entry,
gfid=gfid,
- disk_gfid=cls.gfid_mnt(entry),
+ disk_gfid=get_gfid_from_mnt(entry),
target=en))
+ collect_failure(e, EEXIST)
return
cmd_ret = errno_wrap(os.rename,
@@ -817,14 +782,24 @@ class Server(object):
st = lstat(slink)
if isinstance(st, int):
(pg, bname) = entry2pb(entry)
- blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ if stat.S_ISREG(e['stat']['mode']):
+ blob = entry_pack_reg_stat(gfid, bname, e['stat'])
+ elif stat.S_ISLNK(e['stat']['mode']):
+ blob = entry_pack_symlink(gfid, bname, e['link'],
+ e['stat'])
else:
cmd_ret = errno_wrap(os.link,
[slink, entry],
[ENOENT, EEXIST], [ESTALE])
collect_failure(e, cmd_ret)
elif op == 'SYMLINK':
- blob = entry_pack_symlink(gfid, bname, e['link'], e['stat'])
+ en = e['entry']
+ st = lstat(entry)
+ if isinstance(st, int):
+ blob = entry_pack_symlink(gfid, bname, e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST)
elif op == 'RENAME':
en = e['entry1']
st = lstat(entry)
@@ -832,9 +807,13 @@ class Server(object):
if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
if stat.S_ISLNK(e['stat']['mode']) and \
e['link'] is not None:
- (pg, bname) = entry2pb(en)
- blob = entry_pack_symlink(gfid, bname,
- e['link'], e['stat'])
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_symlink(gfid, bname,
+ e['link'], e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, True)
else:
(pg, bname) = entry2pb(en)
blob = entry_pack_reg_stat(gfid, bname, e['stat'])
@@ -865,6 +844,8 @@ class Server(object):
raise
else:
raise
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, True)
else:
rename_with_disk_gfid_confirmation(gfid, entry, en)
if blob:
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index 43b21668a46..2187ecd226b 100644
--- a/geo-replication/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -66,6 +66,8 @@ except ImportError:
_CL_AUX_GFID_PFX = ".gfid/"
GF_OP_RETRIES = 10
+GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
CHANGELOG_AGENT_SERVER_VERSION = 1.0
CHANGELOG_AGENT_CLIENT_VERSION = 1.0
NodeID = None
@@ -371,6 +373,33 @@ class GsyncdError(Exception):
pass
+class _MetaXattr(object):
+
+ """singleton class, a lazy wrapper around the
+ libcxattr module
+
+ libcxattr (a heavy import due to ctypes) is
+ loaded only when when the single
+ instance is tried to be used.
+
+ This reduces runtime for those invocations
+ which do not need filesystem manipulation
+ (eg. for config, url parsing)
+ """
+
+ def __getattr__(self, meth):
+ from libcxattr import Xattr as LXattr
+ xmeth = [m for m in dir(LXattr) if m[0] != '_']
+ if meth not in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LXattr, m))
+ return getattr(self, meth)
+
+
+Xattr = _MetaXattr()
+
+
def getusername(uid=None):
if uid is None:
uid = os.geteuid()
@@ -524,6 +553,23 @@ def lstat(e):
return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+ GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE])
+
+
+def matching_disk_gfid(gfid, entry):
+ disk_gfid = get_gfid_from_mnt(entry)
+ if isinstance(disk_gfid, int):
+ return False
+
+ if not gfid == disk_gfid:
+ return False
+
+ return True
+
+
class NoStimeAvailable(Exception):
pass