summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2015-03-11 13:31:09 +0530
committerVijay Bellur <vbellur@redhat.com>2015-03-15 22:55:31 -0700
commit7d8be3613f7384f5118f26e194fe7c64ea69d11c (patch)
treeb6dcdd9c04ebb8f855e40390165d790ba7fcfa75
parentf0224ce93ae9ad420e23612fe6e6707a821f9cab (diff)
geo-rep: Do not use xsync_upper_limit for change detection
Use register time(xsync_upper_limit) only for stime update, do not use for change detection. Problem 1: If a file created before geo-rep, xtime xattr does not exist. Geo-rep updates xtime of the file to current time if not exists. xtime > upper_limit so geo-rep will not pick those files. Changelog either will have SETXATTR, and fails to sync the file. Problem 2: If a file is created before geo-rep create and updated after geo-rep start. xtime of the file is greater than upper limit(geo-rep start time/changelog register time). Geo-rep(XSync) will not pick this file for syncing. Changelog will have only DATA recorded for that file. Geo-rep tries DATA without any ENTRY ops and fails with rsync error. BUG: 1200733 Change-Id: Ie4e8f284db689d2c755ef8e7ecbb658db1c0785f Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: http://review.gluster.org/9855 Reviewed-by: Kotresh HR <khiremat@redhat.com> Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com> Tested-by: Saravanakumar Arumugam <sarumuga@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--geo-replication/syncdaemon/master.py53
-rw-r--r--geo-replication/syncdaemon/resource.py7
2 files changed, 26 insertions, 34 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
index dfe65fe6709..e60624391a1 100644
--- a/geo-replication/syncdaemon/master.py
+++ b/geo-replication/syncdaemon/master.py
@@ -170,10 +170,7 @@ class NormalMixin(object):
raise GsyncdError("timestamp corruption for " + path)
def need_sync(self, e, xte, xtrd):
- if self.xsync_upper_limit:
- return xte > xtrd and xte <= self.xsync_upper_limit
- else:
- return xte > xtrd
+ return xte > xtrd
def set_slave_xtime(self, path, mark):
self.slave.server.set_stime(path, self.uuid, mark)
@@ -491,8 +488,7 @@ class GMasterCommon(object):
def register(self):
self.register()
- def crawlwrap(self, oneshot=False, no_stime_update=False,
- register_time=None):
+ def crawlwrap(self, oneshot=False, register_time=None):
if oneshot:
# it's important to do this during the oneshot crawl as
# for a passive gsyncd (ie. in a replicate scenario)
@@ -503,11 +499,11 @@ class GMasterCommon(object):
# then it sets register_time which is the time when geo-rep
# worker registerd to changelog consumption. Since nsec is
# not considered in register time, their are chances of skipping
- # changes detection in xsync crawl. Add 1 sec to upper_limit.
- # This limit will be reset when crawlwrap is called again.
- self.xsync_upper_limit = None
+ # changes detection in xsync crawl. This limit will be reset when
+ # crawlwrap is called again.
+ self.live_changelog_start_time = None
if register_time:
- self.xsync_upper_limit = (register_time + 1, 0)
+ self.live_changelog_start_time = (register_time, 0)
# no need to maintain volinfo state machine.
# in a cascading setup, each geo-replication session is
@@ -583,7 +579,7 @@ class GMasterCommon(object):
time.sleep(5)
continue
self.update_worker_health("Active")
- self.crawl(no_stime_update=no_stime_update)
+ self.crawl()
if oneshot:
return
time.sleep(self.sleep_interval)
@@ -1278,7 +1274,7 @@ class GMasterChangelogMixin(GMasterCommon):
except:
raise
- def crawl(self, no_stime_update=False):
+ def crawl(self):
self.update_worker_crawl_status("Changelog Crawl")
changes = []
# get stime (from the brick) and purge changelogs
@@ -1323,7 +1319,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
self.processed_changelogs_dir = os.path.join(self.setup_working_dir(),
".history/.processed")
- def crawl(self, no_stime_update=False):
+ def crawl(self):
self.history_turns += 1
self.update_worker_crawl_status("History Crawl")
purge_time = self.get_purge_time()
@@ -1425,7 +1421,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
else:
raise
- def crawl(self, no_stime_update=False):
+ def crawl(self):
"""
event dispatcher thread
@@ -1451,18 +1447,8 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
self.process([item[1]], 0)
self.archive_and_purge_changelogs([item[1]])
elif item[0] == 'stime':
- if not no_stime_update:
- # xsync is started after running history but if
- # history actual end time is less than register time
- # then if we update stime, live changelog processing
- # will skip the changelogs for which TS is less than
- # stime. During this deletes and renames are not
- # propogated. By not setting stime live changelog will
- # start processing from the register time. Since we
- # have xsync_upper_limit their will not be much
- # overlap/redo of changelogs.
- logging.debug('setting slave time: %s' % repr(item[1]))
- self.upd_stime(item[1][1], item[1][0])
+ logging.debug('setting slave time: %s' % repr(item[1]))
+ self.upd_stime(item[1][1], item[1][0])
else:
logging.warn('unknown tuple in comlist (%s)' % repr(item))
except IndexError:
@@ -1603,8 +1589,15 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
str(st.st_mtime)])
self.Xcrawl(e, xtr_root)
stime_to_update = xte
- if self.xsync_upper_limit:
- stime_to_update = min(self.xsync_upper_limit, xte)
+ # Live Changelog Start time indicates that from that time
+ # onwards Live changelogs are available. If we update stime
+ # greater than live_changelog_start time then Geo-rep will
+ # skip those changelogs as already processed. But Xsync
+ # actually failed to sync the deletes and Renames. Update
+ # stime as min(Live_changelogs_time, Actual_stime) When it
+ # switches to Changelog mode, it syncs Deletes and Renames.
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xte)
self.stimes.append((e, stime_to_update))
elif stat.S_ISLNK(mo):
self.write_entry_change(
@@ -1630,8 +1623,8 @@ class GMasterXsyncMixin(GMasterChangelogMixin):
self.write_entry_change("D", [gfid])
if path == '.':
stime_to_update = xtl
- if self.xsync_upper_limit:
- stime_to_update = min(self.xsync_upper_limit, xtl)
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xtl)
self.stimes.append((path, stime_to_update))
self.sync_done(self.stimes, True)
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index e9796fc48f6..ae94f04aa37 100644
--- a/geo-replication/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -1333,8 +1333,8 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
g3.crawlwrap(oneshot=True)
else:
g1.crawlwrap(oneshot=True)
- except (ChangelogException, NoPurgeTimeAvailable,
- PartialHistoryAvailable) as e:
+ except (ChangelogException, PartialHistoryAvailable,
+ NoPurgeTimeAvailable) as e:
if isinstance(e, ChangelogException):
logging.info('Changelog history crawl failed, fallback '
'to xsync: %s - %s' % (e.errno, e.strerror))
@@ -1342,8 +1342,7 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
logging.info('Partial history available, using xsync crawl'
' after consuming history '
'till %s' % str(e))
- g1.crawlwrap(oneshot=True, no_stime_update=True,
- register_time=register_time)
+ g1.crawlwrap(oneshot=True, register_time=register_time)
# crawl loop: Try changelog crawl, if failed
# switch to FS crawl