diff options
-rw-r--r-- | geo-replication/syncdaemon/master.py | 133 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 9 |
2 files changed, 42 insertions, 100 deletions
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index a19fe264419..1ef76061976 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -41,11 +41,7 @@ def _volinfo_hook_relax_foreign(self): expiry) time.sleep(expiry) volinfo_sys = self.get_sys_volinfo() - self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state, - volinfo_sys) - if self.inter_master: - raise GsyncdError("cannot be intermediate master in special mode") - return (volinfo_sys, state_change) + return volinfo_sys # The API! @@ -134,10 +130,7 @@ class NormalMixin(object): return (vi, gap) def volinfo_hook(self): - volinfo_sys = self.get_sys_volinfo() - self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state, - volinfo_sys) - return (volinfo_sys, state_change) + return self.get_sys_volinfo() def xtime_reversion_hook(self, path, xtl, xtr): if xtr > xtl: @@ -227,13 +220,6 @@ class GMasterCommon(object): if self.volinfo: return self.volinfo['volume_mark'] - @property - def inter_master(self): - """decide if we are an intermediate master - in a cascading setup - """ - return self.volinfo_state[self.KFGN] and True or False - def xtime(self, path, *a, **opts): """get amended xtime @@ -303,11 +289,6 @@ class GMasterCommon(object): self.total_crawl_stats = None self.start = None self.change_seen = None - # the authoritative (foreign, native) volinfo pair - # which lets us deduce what to do when we refetch - # the volinfos from system - uuid_preset = getattr(gconf, 'volume_id', None) - self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None) # the actual volinfo we make use of self.volinfo = None self.terminate = False @@ -326,34 +307,6 @@ class GMasterCommon(object): t = Thread(target=keep_alive) t.start() - def volinfo_query(self): - """volume info state machine""" - volinfo_sys, state_change = self.volinfo_hook() - if self.inter_master: - self.volinfo = volinfo_sys[self.KFGN] - else: - self.volinfo = volinfo_sys[self.KNAT] - if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master): - logging.info('new master is %s', self.uuid) - if self.volinfo: - logging.info("%s master with volume id %s ..." % \ - (self.inter_master and "intermediate" or "primary", - self.uuid)) - if state_change == self.KFGN: - gconf.configinterface.set('volume_id', self.uuid) - if self.volinfo: - if self.volinfo['retval']: - raise GsyncdError ("master is corrupt") - self.start_checkpoint_thread() - else: - if should_display_info or self.crawls == 0: - if self.inter_master: - logging.info("waiting for being synced from %s ..." % \ - self.volinfo_state[self.KFGN]['uuid']) - else: - logging.info("waiting for volume info ...") - return True - def should_crawl(cls): return (gconf.glusterd_uuid in cls.master.server.node_uuid()) @@ -366,25 +319,38 @@ class GMasterCommon(object): # for a passive gsyncd (ie. in a replicate scenario) # the keepalive thread would keep the connection alive. self.init_keep_alive() + + # no need to maintain volinfo state machine. + # in a cascading setup, each geo-replication session is + # independent (ie. 'volume-mark' and 'xtime' are not + # propogated). This is beacuse the slave's xtime is now + # stored on the master itself. 'volume-mark' just identifies + # that we are in a cascading setup and need to enable + # 'geo-replication.ignore-pid-check' option. + volinfo_sys = self.volinfo_hook() + self.volinfo = volinfo_sys[self.KNAT] + inter_master = volinfo_sys[self.KFGN] + logging.info("%s master with volume id %s ..." % \ + (inter_master and "intermediate" or "primary", + self.uuid)) + gconf.configinterface.set('volume_id', self.uuid) + if self.volinfo: + if self.volinfo['retval']: + raise GsyncdError("master is corrupt") + self.start_checkpoint_thread() + else: + raise GsyncdError("master volinfo unavailable") self.total_crawl_stats = self.get_initial_crawl_data() self.lastreport['time'] = time.time() logging.info('crawl interval: %d seconds' % self.sleep_interval) + t0 = time.time() crawl = self.should_crawl() while not self.terminate: - if self.volinfo_query(): - continue - t1 = time.time() - if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds - crawl = self.should_crawl() - t0 = t1 - if not crawl: - time.sleep(5) - continue if self.start: logging.debug("... crawl #%d done, took %.6f seconds" % \ (self.crawls, time.time() - self.start)) - self.start = t1 + self.start = time.time() should_display_info = self.start - self.lastreport['time'] >= 60 if should_display_info: logging.info("%d crawls, %d turns", @@ -393,6 +359,13 @@ class GMasterCommon(object): self.lastreport.update(crawls = self.crawls, turns = self.turns, time = self.start) + t1 = time.time() + if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds + crawl = self.should_crawl() + t0 = t1 + if not crawl: + time.sleep(5) + continue self.crawl() if oneshot: return @@ -585,46 +558,6 @@ class GMasterCommon(object): self.slave.server.setattr(path, adct) self.set_slave_xtime(path, mark) - @staticmethod - def volinfo_state_machine(volinfo_state, volinfo_sys): - """compute new volinfo_state from old one and incoming - as of current system state, also indicating if there was a - change regarding which volume mark is the authoritative one - - @volinfo_state, @volinfo_sys are pairs of volume mark dicts - (foreign, native). - - Note this method is marked as static, ie. the computation is - pure, without reliance on any excess implicit state. State - transitions which are deemed as ambiguous or banned will raise - an exception. - - """ - # store the value below "boxed" to emulate proper closures - # (variables of the enclosing scope are available inner functions - # provided they are no reassigned; mutation is OK). - param = FreeObject(relax_mismatch = False, state_change = None, index=-1) - def select_vi(vi0, vi): - param.index += 1 - if vi and (not vi0 or vi0['uuid'] == vi['uuid']): - if not vi0 and not param.relax_mismatch: - param.state_change = param.index - # valid new value found; for the rest, we are graceful about - # uuid mismatch - param.relax_mismatch = True - return vi - if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch: - # uuid mismatch for master candidate, bail out - raise GsyncdError("aborting on uuid change from %s to %s" % \ - (vi0['uuid'], vi['uuid'])) - # fall back to old - return vi0 - newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys)) - srep = lambda vi: vi and vi['uuid'][0:8] - logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \ - tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate)) - return newstate, param.state_change - class GMasterChangelogMixin(GMasterCommon): """ changelog based change detection and syncing """ @@ -765,7 +698,7 @@ class GMasterChangelogMixin(GMasterCommon): e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) else: - pass + logging.warn('ignoring %s [op %s]' % (gfid, ty)) elif et in self.TYPE_GFID: go = os.path.join(pfx, ec[0]) st = lstat(go) diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 745bc282ce8..05599989813 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -2832,6 +2832,15 @@ glusterd_set_gsync_confs (glusterd_volinfo_t *volinfo) if (ret) goto out; + /** + * enable ignore-pid-check blindly as it could be needed for + * cascading setups. + */ + ret = glusterd_set_gsync_knob (volinfo, VKEY_MARKER_XTIME_FORCE, + &volfile_changed); + if (ret) + goto out; + ret = glusterd_set_gsync_knob (volinfo, VKEY_CHANGELOG, &volfile_changed); if (ret) |