diff options
| author | Aravinda VK <avishwan@redhat.com> | 2015-03-11 13:31:09 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-15 22:55:31 -0700 | 
| commit | 7d8be3613f7384f5118f26e194fe7c64ea69d11c (patch) | |
| tree | b6dcdd9c04ebb8f855e40390165d790ba7fcfa75 /geo-replication | |
| parent | f0224ce93ae9ad420e23612fe6e6707a821f9cab (diff) | |
geo-rep: Do not use xsync_upper_limit for change detection
Use register time(xsync_upper_limit) only for stime update, do not
use for change detection.
Problem 1:
If a file created before geo-rep, xtime xattr does not exist.
Geo-rep updates xtime of the file to current time if not exists.
xtime > upper_limit so geo-rep will not pick those files. Changelog
either will have SETXATTR, and fails to sync the file.
Problem 2:
If a file is created before geo-rep create and updated after
geo-rep start. xtime of the file is greater than upper limit(geo-rep
start time/changelog register time). Geo-rep(XSync) will not pick this
file for syncing. Changelog will have only DATA recorded for that file.
Geo-rep tries DATA without any ENTRY ops and fails with rsync error.
BUG: 1200733
Change-Id: Ie4e8f284db689d2c755ef8e7ecbb658db1c0785f
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: http://review.gluster.org/9855
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Tested-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'geo-replication')
| -rw-r--r-- | geo-replication/syncdaemon/master.py | 53 | ||||
| -rw-r--r-- | geo-replication/syncdaemon/resource.py | 7 | 
2 files changed, 26 insertions, 34 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index dfe65fe6709..e60624391a1 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -170,10 +170,7 @@ class NormalMixin(object):              raise GsyncdError("timestamp corruption for " + path)      def need_sync(self, e, xte, xtrd): -        if self.xsync_upper_limit: -            return xte > xtrd and xte <= self.xsync_upper_limit -        else: -            return xte > xtrd +        return xte > xtrd      def set_slave_xtime(self, path, mark):          self.slave.server.set_stime(path, self.uuid, mark) @@ -491,8 +488,7 @@ class GMasterCommon(object):      def register(self):          self.register() -    def crawlwrap(self, oneshot=False, no_stime_update=False, -                  register_time=None): +    def crawlwrap(self, oneshot=False, register_time=None):          if oneshot:              # it's important to do this during the oneshot crawl as              # for a passive gsyncd (ie. in a replicate scenario) @@ -503,11 +499,11 @@ class GMasterCommon(object):          # then it sets register_time which is the time when geo-rep          # worker registerd to changelog consumption. Since nsec is          # not considered in register time, their are chances of skipping -        # changes detection in xsync crawl. Add 1 sec to upper_limit. -        # This limit will be reset when crawlwrap is called again. -        self.xsync_upper_limit = None +        # changes detection in xsync crawl. This limit will be reset when +        # crawlwrap is called again. +        self.live_changelog_start_time = None          if register_time: -            self.xsync_upper_limit = (register_time + 1, 0) +            self.live_changelog_start_time = (register_time, 0)          # no need to maintain volinfo state machine.          # in a cascading setup, each geo-replication session is @@ -583,7 +579,7 @@ class GMasterCommon(object):                  time.sleep(5)                  continue              self.update_worker_health("Active") -            self.crawl(no_stime_update=no_stime_update) +            self.crawl()              if oneshot:                  return              time.sleep(self.sleep_interval) @@ -1278,7 +1274,7 @@ class GMasterChangelogMixin(GMasterCommon):              except:                  raise -    def crawl(self, no_stime_update=False): +    def crawl(self):          self.update_worker_crawl_status("Changelog Crawl")          changes = []          # get stime (from the brick) and purge changelogs @@ -1323,7 +1319,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin):          self.processed_changelogs_dir = os.path.join(self.setup_working_dir(),                                                       ".history/.processed") -    def crawl(self, no_stime_update=False): +    def crawl(self):          self.history_turns += 1          self.update_worker_crawl_status("History Crawl")          purge_time = self.get_purge_time() @@ -1425,7 +1421,7 @@ class GMasterXsyncMixin(GMasterChangelogMixin):              else:                  raise -    def crawl(self, no_stime_update=False): +    def crawl(self):          """          event dispatcher thread @@ -1451,18 +1447,8 @@ class GMasterXsyncMixin(GMasterChangelogMixin):                      self.process([item[1]], 0)                      self.archive_and_purge_changelogs([item[1]])                  elif item[0] == 'stime': -                    if not no_stime_update: -                        # xsync is started after running history but if -                        # history actual end time is less than register time -                        # then if we update stime, live changelog processing -                        # will skip the changelogs for which TS is less than -                        # stime. During this deletes and renames are not -                        # propogated. By not setting stime live changelog will -                        # start processing from the register time. Since we -                        # have xsync_upper_limit their will not be much -                        # overlap/redo of changelogs. -                        logging.debug('setting slave time: %s' % repr(item[1])) -                        self.upd_stime(item[1][1], item[1][0]) +                    logging.debug('setting slave time: %s' % repr(item[1])) +                    self.upd_stime(item[1][1], item[1][0])                  else:                      logging.warn('unknown tuple in comlist (%s)' % repr(item))              except IndexError: @@ -1603,8 +1589,15 @@ class GMasterXsyncMixin(GMasterChangelogMixin):                                                str(st.st_mtime)])                  self.Xcrawl(e, xtr_root)                  stime_to_update = xte -                if self.xsync_upper_limit: -                    stime_to_update = min(self.xsync_upper_limit, xte) +                # Live Changelog Start time indicates that from that time +                # onwards Live changelogs are available. If we update stime +                # greater than live_changelog_start time then Geo-rep will +                # skip those changelogs as already processed. But Xsync +                # actually failed to sync the deletes and Renames. Update +                # stime as min(Live_changelogs_time, Actual_stime) When it +                # switches to Changelog mode, it syncs Deletes and Renames. +                if self.live_changelog_start_time: +                    stime_to_update = min(self.live_changelog_start_time, xte)                  self.stimes.append((e, stime_to_update))              elif stat.S_ISLNK(mo):                  self.write_entry_change( @@ -1630,8 +1623,8 @@ class GMasterXsyncMixin(GMasterChangelogMixin):                  self.write_entry_change("D", [gfid])          if path == '.':              stime_to_update = xtl -            if self.xsync_upper_limit: -                stime_to_update = min(self.xsync_upper_limit, xtl) +            if self.live_changelog_start_time: +                stime_to_update = min(self.live_changelog_start_time, xtl)              self.stimes.append((path, stime_to_update))              self.sync_done(self.stimes, True) diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index e9796fc48f6..ae94f04aa37 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -1333,8 +1333,8 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):                      g3.crawlwrap(oneshot=True)                  else:                      g1.crawlwrap(oneshot=True) -            except (ChangelogException, NoPurgeTimeAvailable, -                    PartialHistoryAvailable) as e: +            except (ChangelogException, PartialHistoryAvailable, +                    NoPurgeTimeAvailable) as e:                  if isinstance(e, ChangelogException):                      logging.info('Changelog history crawl failed, fallback '                                   'to xsync: %s - %s' % (e.errno, e.strerror)) @@ -1342,8 +1342,7 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):                      logging.info('Partial history available, using xsync crawl'                                   ' after consuming history '                                   'till %s' % str(e)) -                g1.crawlwrap(oneshot=True, no_stime_update=True, -                             register_time=register_time) +                g1.crawlwrap(oneshot=True, register_time=register_time)              # crawl loop: Try changelog crawl, if failed              # switch to FS crawl  | 
