summaryrefslogtreecommitdiffstats
path: root/geo-replication/syncdaemon/master.py
diff options
context:
space:
mode:
Diffstat (limited to 'geo-replication/syncdaemon/master.py')
-rw-r--r--geo-replication/syncdaemon/master.py278
1 files changed, 195 insertions, 83 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index d9f63a440fb..9501aeae6b5 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -22,11 +22,13 @@ from threading import Condition, Lock
from datetime import datetime
import gsyncdconfig as gconf
+import libgfchangelog
from rconf import rconf
-from syncdutils import Thread, GsyncdError, escape_space_newline
-from syncdutils import unescape_space_newline, gauxpfx, escape
-from syncdutils import lstat, errno_wrap, FreeObject, lf, matching_disk_gfid
-from syncdutils import NoStimeAvailable, PartialHistoryAvailable
+from syncdutils import (Thread, GsyncdError, escape_space_newline,
+ unescape_space_newline, gauxpfx, escape,
+ lstat, errno_wrap, FreeObject, lf, matching_disk_gfid,
+ NoStimeAvailable, PartialHistoryAvailable,
+ host_brick_split)
URXTIME = (-1, 0)
@@ -65,6 +67,9 @@ def _volinfo_hook_relax_foreign(self):
def edct(op, **ed):
dct = {}
dct['op'] = op
+ # This is used in automatic gfid conflict resolution.
+ # When marked True, it's skipped during re-processing.
+ dct['skip_entry'] = False
for k in ed:
if k == 'stat':
st = ed[k]
@@ -192,7 +197,7 @@ class NormalMixin(object):
vi = vi.copy()
vi['timeout'] = int(time.time()) + timo
else:
- # send keep-alives more frequently to
+ # send keep-alive more frequently to
# avoid a delay in announcing our volume info
# to slave if it becomes established in the
# meantime
@@ -448,18 +453,10 @@ class GMasterCommon(object):
if rconf.mgmt_lock_fd:
try:
fcntl.lockf(rconf.mgmt_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
- if not rconf.active_earlier:
- rconf.active_earlier = True
- logging.info(lf("Got lock Becoming ACTIVE",
- brick=rconf.args.local_path))
return True
except:
ex = sys.exc_info()[1]
if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- if not rconf.passive_earlier:
- rconf.passive_earlier = True
- logging.info(lf("Didn't get lock Becoming PASSIVE",
- brick=rconf.local_path))
return False
raise
@@ -494,18 +491,10 @@ class GMasterCommon(object):
ex = sys.exc_info()[1]
if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
# cannot grab, it's taken
- if not rconf.passive_earlier:
- rconf.passive_earlier = True
- logging.info(lf("Didn't get lock Becoming PASSIVE",
- brick=rconf.args.local_path))
rconf.mgmt_lock_fd = fd
return False
raise
- if not rconf.active_earlier:
- rconf.active_earlier = True
- logging.info(lf("Got lock Becoming ACTIVE",
- brick=rconf.args.local_path))
return True
def should_crawl(self):
@@ -529,8 +518,8 @@ class GMasterCommon(object):
# If crawlwrap is called when partial history available,
# then it sets register_time which is the time when geo-rep
- # worker registerd to changelog consumption. Since nsec is
- # not considered in register time, their are chances of skipping
+ # worker registered to changelog consumption. Since nsec is
+ # not considered in register time, there are chances of skipping
# changes detection in xsync crawl. This limit will be reset when
# crawlwrap is called again.
self.live_changelog_start_time = None
@@ -540,7 +529,7 @@ class GMasterCommon(object):
# no need to maintain volinfo state machine.
# in a cascading setup, each geo-replication session is
# independent (ie. 'volume-mark' and 'xtime' are not
- # propogated). This is because the slave's xtime is now
+ # propagated). This is because the slave's xtime is now
# stored on the master itself. 'volume-mark' just identifies
# that we are in a cascading setup and need to enable
# 'geo-replication.ignore-pid-check' option.
@@ -711,7 +700,8 @@ class GMasterChangelogMixin(GMasterCommon):
TYPE_GFID = "D "
TYPE_ENTRY = "E "
- MAX_EF_RETRIES = 15
+ MAX_EF_RETRIES = 10
+ MAX_OE_RETRIES = 10
# flat directory hierarchy for gfid based access
FLAT_DIR_HIERARCHY = '.'
@@ -803,21 +793,29 @@ class GMasterChangelogMixin(GMasterCommon):
self.status.inc_value("failures", num_failures)
- def fix_possible_entry_failures(self, failures, retry_count):
+ def fix_possible_entry_failures(self, failures, retry_count, entries):
pfx = gauxpfx()
fix_entry_ops = []
failures1 = []
+ remove_gfids = set()
for failure in failures:
- if failure[2]['dst']:
+ if failure[2]['name_mismatch']:
+ pbname = failure[2]['slave_entry']
+ elif failure[2]['dst']:
pbname = failure[0]['entry1']
else:
pbname = failure[0]['entry']
- if failure[2]['gfid_mismatch']:
+
+ op = failure[0]['op']
+ # name exists but gfid is different
+ if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']:
slave_gfid = failure[2]['slave_gfid']
st = lstat(os.path.join(pfx, slave_gfid))
+ # Takes care of scenarios with no hardlinks
if isinstance(st, int) and st == ENOENT:
- logging.info(lf('Fixing gfid mismatch in slave. Deleting'
- ' the entry', retry_count=retry_count,
+ logging.debug(lf('Entry not present on master. Fixing gfid '
+ 'mismatch in slave. Deleting the entry',
+ retry_count=retry_count,
entry=repr(failure)))
# Add deletion to fix_entry_ops list
if failure[2]['slave_isdir']:
@@ -830,79 +828,163 @@ class GMasterChangelogMixin(GMasterCommon):
edct('UNLINK',
gfid=failure[2]['slave_gfid'],
entry=pbname))
+ remove_gfids.add(slave_gfid)
+ if op in ['RENAME']:
+ # If renamed gfid doesn't exists on master, remove
+ # rename entry and unlink src on slave
+ st = lstat(os.path.join(pfx, failure[0]['gfid']))
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug("Unlink source %s" % repr(failure))
+ remove_gfids.add(failure[0]['gfid'])
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # Takes care of scenarios of hardlinks/renames on master
elif not isinstance(st, int):
- # The file exists on master but with different name.
- # Probabaly renamed and got missed during xsync crawl.
- if failure[2]['slave_isdir']:
- logging.info(lf('Fixing gfid mismatch in slave',
+ if matching_disk_gfid(slave_gfid, pbname):
+ # Safe to ignore the failure as master contains same
+ # file with same gfid. Remove entry from entries list
+ logging.debug(lf('Fixing gfid mismatch in slave. '
+ ' Safe to ignore, take out entry',
retry_count=retry_count,
entry=repr(failure)))
+ remove_gfids.add(failure[0]['gfid'])
+ if op == 'RENAME':
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # The file exists on master but with different name.
+ # Probably renamed and got missed during xsync crawl.
+ elif failure[2]['slave_isdir']:
realpath = os.readlink(os.path.join(
- rconf.args.local_path,
- ".glusterfs",
- slave_gfid[0:2],
- slave_gfid[2:4],
- slave_gfid))
+ rconf.args.local_path,
+ ".glusterfs",
+ slave_gfid[0:2],
+ slave_gfid[2:4],
+ slave_gfid))
dst_entry = os.path.join(pfx, realpath.split('/')[-2],
realpath.split('/')[-1])
- rename_dict = edct('RENAME', gfid=slave_gfid,
- entry=failure[0]['entry'],
- entry1=dst_entry, stat=st,
- link=None)
- logging.info(lf('Fixing gfid mismatch in slave. '
- 'Renaming', retry_count=retry_count,
- entry=repr(rename_dict)))
- fix_entry_ops.append(rename_dict)
+ src_entry = pbname
+ logging.debug(lf('Fixing dir name/gfid mismatch in '
+ 'slave', retry_count=retry_count,
+ entry=repr(failure)))
+ if src_entry == dst_entry:
+ # Safe to ignore the failure as master contains
+ # same directory as in slave with same gfid.
+ # Remove the failure entry from entries list
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Safe to ignore, '
+ 'take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ rename_dict = edct('RENAME', gfid=slave_gfid,
+ entry=src_entry,
+ entry1=dst_entry, stat=st,
+ link=None)
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Renaming',
+ retry_count=retry_count,
+ entry=repr(rename_dict)))
+ fix_entry_ops.append(rename_dict)
else:
- logging.info(lf('Fixing gfid mismatch in slave. '
- ' Deleting the entry',
+ # A hardlink file exists with different name or
+ # renamed file exists and we are sure from
+ # matching_disk_gfid check that the entry doesn't
+ # exist with same gfid so we can safely delete on slave
+ logging.debug(lf('Fixing file gfid mismatch in slave. '
+ 'Hardlink/Rename Case. Deleting entry',
retry_count=retry_count,
entry=repr(failure)))
fix_entry_ops.append(
edct('UNLINK',
gfid=failure[2]['slave_gfid'],
entry=pbname))
- logging.error(lf('Entry cannot be fixed in slave due '
- 'to GFID mismatch, find respective '
- 'path for the GFID and trigger sync',
- gfid=slave_gfid))
+ elif failure[1] == ENOENT:
+ if op in ['RENAME']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ pargfid = pbname.split('/')[1]
+ st = lstat(os.path.join(pfx, pargfid))
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+ logging.debug(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ logging.debug(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ realpath = os.readlink(os.path.join(rconf.args.local_path,
+ ".glusterfs",
+ pargfid[0:2],
+ pargfid[2:4],
+ pargfid))
+ dir_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ fix_entry_ops.append(
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+
+ logging.debug("remove_gfids: %s" % repr(remove_gfids))
+ if remove_gfids:
+ for e in entries:
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
+ and e['gfid'] in remove_gfids:
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
+ e['skip_entry'] = True
if fix_entry_ops:
# Process deletions of entries whose gfids are mismatched
failures1 = self.slave.server.entry_ops(fix_entry_ops)
- if not failures1:
- logging.info("Sucessfully fixed entry ops with gfid mismatch")
- return failures1
+ return (failures1, fix_entry_ops)
def handle_entry_failures(self, failures, entries):
retries = 0
pending_failures = False
failures1 = []
failures2 = []
+ entry_ops1 = []
+ entry_ops2 = []
if failures:
pending_failures = True
failures1 = failures
+ entry_ops1 = entries
while pending_failures and retries < self.MAX_EF_RETRIES:
retries += 1
- failures2 = self.fix_possible_entry_failures(failures1,
- retries)
+ (failures2, entry_ops2) = self.fix_possible_entry_failures(
+ failures1, retries, entry_ops1)
if not failures2:
pending_failures = False
+ logging.info(lf('Successfully fixed entry ops with gfid '
+ 'mismatch', retry_count=retries))
else:
pending_failures = True
failures1 = failures2
+ entry_ops1 = entry_ops2
if pending_failures:
for failure in failures1:
logging.error("Failed to fix entry ops %s", repr(failure))
- else:
- # Retry original entry list 5 times
- failures = self.slave.server.entry_ops(entries)
-
- self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
def process_change(self, change, done, retry):
pfx = gauxpfx()
@@ -1003,7 +1085,7 @@ class GMasterChangelogMixin(GMasterCommon):
# Special case: record mknod as link
if ty in ['MKNOD']:
mode = int(ec[2])
- if mode & 01000:
+ if mode & 0o1000:
# Avoid stat'ing the file as it
# may be deleted in the interim
st = FreeObject(st_mode=int(ec[2]),
@@ -1044,6 +1126,11 @@ class GMasterChangelogMixin(GMasterCommon):
os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en,
stat=st, link=rl))
+ # If src doesn't exist while doing rename, destination
+ # is created. If data is not followed by rename, this
+ # remains zero byte file on slave. Hence add data entry
+ # for renames
+ datas.add(os.path.join(pfx, gfid))
else:
# stat() to get mode and other information
if not matching_disk_gfid(gfid, en):
@@ -1067,6 +1154,12 @@ class GMasterChangelogMixin(GMasterCommon):
rl = None
entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
link=rl))
+ # If src doesn't exist while doing link, destination
+ # is created based on file type. If data is not
+ # followed by link, this remains zero byte file on
+ # slave. Hence add data entry for links
+ if rl is None:
+ datas.add(os.path.join(pfx, gfid))
elif ty == 'SYMLINK':
rl = errno_wrap(os.readlink, [en], [ENOENT],
[ESTALE, EINTR])
@@ -1114,7 +1207,6 @@ class GMasterChangelogMixin(GMasterCommon):
logging.debug('entries: %s' % repr(entries))
# Increment counters for Status
- self.status.inc_value("entry", len(entries))
self.files_in_batch += len(datas)
self.status.inc_value("data", len(datas))
@@ -1129,7 +1221,23 @@ class GMasterChangelogMixin(GMasterCommon):
self.status.inc_value("entry", len(entries))
failures = self.slave.server.entry_ops(entries)
- self.handle_entry_failures(failures, entries)
+
+ if gconf.get("gfid-conflict-resolution"):
+ count = 0
+ if failures:
+ logging.info(lf('Entry ops failed with gfid mismatch',
+ count=len(failures)))
+ while failures and count < self.MAX_OE_RETRIES:
+ count += 1
+ self.handle_entry_failures(failures, entries)
+ logging.info(lf('Retry original entries', count=count))
+ failures = self.slave.server.entry_ops(entries)
+ if not failures:
+ logging.info("Successfully fixed all entry ops with "
+ "gfid mismatch")
+ break
+
+ self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
self.status.dec_value("entry", len(entries))
# Update Entry stime in Brick Root only in case of Changelog mode
@@ -1164,10 +1272,10 @@ class GMasterChangelogMixin(GMasterCommon):
continue
meta_entries.append(edct('META', go=go[0], stat=st))
if meta_entries:
- self.status.inc_value("meta", len(entries))
+ self.status.inc_value("meta", len(meta_entries))
failures = self.slave.server.meta_ops(meta_entries)
self.log_failures(failures, 'go', '', 'META')
- self.status.dec_value("meta", len(entries))
+ self.status.dec_value("meta", len(meta_entries))
self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time
@@ -1359,7 +1467,7 @@ class GMasterChangelogMixin(GMasterCommon):
node = rconf.args.resource_remote
node_data = node.split("@")
node = node_data[-1]
- remote_node_ip = node.split(":")[0]
+ remote_node_ip, _ = host_brick_split(node)
self.status.set_slave_node(remote_node_ip)
def changelogs_batch_process(self, changes):
@@ -1392,9 +1500,9 @@ class GMasterChangelogMixin(GMasterCommon):
# that are _historical_ to that time.
data_stime = self.get_data_stime()
- self.changelog_agent.scan()
+ libgfchangelog.scan()
self.crawls += 1
- changes = self.changelog_agent.getchanges()
+ changes = libgfchangelog.getchanges()
if changes:
if data_stime:
logging.info(lf("slave's time",
@@ -1411,10 +1519,9 @@ class GMasterChangelogMixin(GMasterCommon):
self.changelogs_batch_process(changes)
- def register(self, register_time, changelog_agent, status):
- self.changelog_agent = changelog_agent
+ def register(self, register_time, status):
self.sleep_interval = gconf.get("change-interval")
- self.changelog_done_func = self.changelog_agent.done
+ self.changelog_done_func = libgfchangelog.done
self.tempdir = self.setup_working_dir()
self.processed_changelogs_dir = os.path.join(self.tempdir,
".processed")
@@ -1423,11 +1530,10 @@ class GMasterChangelogMixin(GMasterCommon):
class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
- def register(self, register_time, changelog_agent, status):
- self.changelog_agent = changelog_agent
+ def register(self, register_time, status):
self.changelog_register_time = register_time
self.history_crawl_start_time = register_time
- self.changelog_done_func = self.changelog_agent.history_done
+ self.changelog_done_func = libgfchangelog.history_done
self.history_turns = 0
self.tempdir = self.setup_working_dir()
self.processed_changelogs_dir = os.path.join(self.tempdir,
@@ -1441,6 +1547,12 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
data_stime = self.get_data_stime()
end_time = int(time.time())
+
+ #as start of historical crawl marks Geo-rep worker restart
+ if gconf.get("ignore-deletes"):
+ logging.info(lf('ignore-deletes config option is set',
+ stime=data_stime))
+
logging.info(lf('starting history crawl',
turns=self.history_turns,
stime=data_stime,
@@ -1455,7 +1567,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
# location then consuming history will not work(Known issue as of now)
changelog_path = os.path.join(rconf.args.local_path,
".glusterfs/changelogs")
- ret, actual_end = self.changelog_agent.history(
+ ret, actual_end = libgfchangelog.history_changelog(
changelog_path,
data_stime[0],
end_time,
@@ -1467,10 +1579,10 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
# to be processed. returns positive value as number of changelogs
# to be processed, which will be fetched using
# history_getchanges()
- while self.changelog_agent.history_scan() > 0:
+ while libgfchangelog.history_scan() > 0:
self.crawls += 1
- changes = self.changelog_agent.history_getchanges()
+ changes = libgfchangelog.history_getchanges()
if changes:
if data_stime:
logging.info(lf("slave's time",
@@ -1504,7 +1616,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
self.history_crawl_start_time = int(time.time())
self.crawl()
else:
- # This exeption will be catched in resource.py and
+ # This exception will be caught in resource.py and
# fallback to xsync for the small gap.
raise PartialHistoryAvailable(str(actual_end))
@@ -1523,7 +1635,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
XSYNC_MAX_ENTRIES = 1 << 13
- def register(self, register_time=None, changelog_agent=None, status=None):
+ def register(self, register_time=None, status=None):
self.status = status
self.counter = 0
self.comlist = []
@@ -1641,7 +1753,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
def is_sticky(self, path, mo):
"""check for DHTs linkto sticky bit file"""
sticky = False
- if mo & 01000:
+ if mo & 0o1000:
sticky = self.master.server.linkto_check(path)
return sticky