diff options
author | Kotresh HR <khiremat@redhat.com> | 2015-07-03 16:32:56 +0530 |
---|---|---|
committer | Venky Shankar <vshankar@redhat.com> | 2015-08-05 22:49:52 -0700 |
commit | b7118970edab7c3ab9c7039ef340c40326ff6930 (patch) | |
tree | 0eca5db307cdf4714089bd395e55dcd9a73372c9 /geo-replication/syncdaemon/master.py | |
parent | 0459bca39f00b7ac9f6d661c049cba03179efeff (diff) |
geo-rep: Fix history failure
Both ACTIVE and PASSIVE workers register to changelog
at almost same time. When PASSIVE worker becomes ACTIVE,
the start and end time would be current stime and register_time
repectively for history API. Hence register_time would be less
then stime for which history obviously fails. But it will
be successful for the next restart as new register_time > stime.
Fix is to pass current time as the end time to history call
instead of the register_time.
Also improvised the logging for ACTIVE/PASSIVE switching.
BUG: 1247882
Change-Id: I40c582cc32fe29a6c30340ec81a3b5d30e461e71
Reviewed-on: http://review.gluster.org/11524
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Aravinda VK <avishwan@redhat.com>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/11784
Reviewed-by: Milind Changire <mchangir@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon/master.py')
-rw-r--r-- | geo-replication/syncdaemon/master.py | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 5b8abc5fd9f..1bc2450c101 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -441,6 +441,7 @@ class GMasterCommon(object): t.start() def mgmt_lock(self): + """Take management volume lock """ fd = None bname = str(self.uuid) + "_" + str(gconf.slave_id) + "_subvol_" \ @@ -473,10 +474,16 @@ class GMasterCommon(object): os.close(fd) if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): # cannot grab, it's taken - logging.debug("Lock held by someother worker process") + if not gconf.passive_earlier: + gconf.passive_earlier = True + logging.info("Didn't get lock : %s : Becoming PASSIVE" + % gconf.local_path) return False raise - logging.debug("Got the lock") + + if not gconf.active_earlier: + gconf.active_earlier = True + logging.info("Got lock : %s : Becoming ACTIVE" % gconf.local_path) return True def should_crawl(self): @@ -1123,8 +1130,9 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin): self.status.set_worker_crawl_status("History Crawl") purge_time = self.get_purge_time() - logging.info('starting history crawl... turns: %s, stime: %s' - % (self.history_turns, repr(purge_time))) + end_time = int(time.time()) + logging.info('starting history crawl... turns: %s, stime: %s, etime: %s' + % (self.history_turns, repr(purge_time), repr(end_time))) if not purge_time or purge_time == URXTIME: logging.info("stime not available, abandoning history crawl") @@ -1138,7 +1146,7 @@ class GMasterChangeloghistoryMixin(GMasterChangelogMixin): ret, actual_end = self.changelog_agent.history( changelog_path, purge_time[0], - self.changelog_register_time, + end_time, int(gconf.sync_jobs)) # scan followed by getchanges till scan returns zero. |