diff options
author | Csaba Henk <csaba@gluster.com> | 2011-04-29 06:24:45 +0000 |
---|---|---|
committer | Anand Avati <avati@gluster.com> | 2011-05-09 13:27:03 -0700 |
commit | 6f2ed82bfcfe494ef273bff55cf69480a7472cc5 (patch) | |
tree | e911f81c9bcc82628b3dc3e651616349dbc0f6b3 /xlators | |
parent | 9ffd9c51f3833e04a60373648ba3c69c2ed7a68a (diff) |
syncdaemon: some refactor on monitor
- detect faulty state early
- keep the feedback fd in gsyncd module
Signed-off-by: Csaba Henk <csaba@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2537 (gsync autorestart)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2537
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/gsyncd.py | 5 | ||||
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/master.py | 3 | ||||
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/monitor.py | 22 |
3 files changed, 17 insertions, 13 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py index 963e7101b..a502c2009 100644 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py @@ -252,8 +252,7 @@ def main_i(): ffd = rconf.get('feedback_fd') if ffd: - gconf.feedback_fd = ffd - fcntl.fcntl(int(ffd), fcntl.F_SETFD, fcntl.FD_CLOEXEC) + fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) #normalize loglevel lvl0 = gconf.log_level @@ -295,6 +294,8 @@ def main_i(): # complete remote connection in child remote.connect_remote(go_daemon='done') local.connect() + if ffd: + os.close(ffd) local.service_loop(*[r for r in [remote] if r]) logging.info("exiting.") diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py index 76f924ed3..35dc4ee06 100644 --- a/xlators/features/marker/utils/syncdaemon/master.py +++ b/xlators/features/marker/utils/syncdaemon/master.py @@ -87,9 +87,6 @@ class GMaster(object): self.terminate = False def crawl_loop(self): - ffd = getattr(gconf, 'feedback_fd', None) - if ffd: - os.close(int(ffd)) timo = int(gconf.timeout or 0) if timo > 0: def keep_alive(): diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py index a86acdc75..365e91435 100644 --- a/xlators/features/marker/utils/syncdaemon/monitor.py +++ b/xlators/features/marker/utils/syncdaemon/monitor.py @@ -48,17 +48,23 @@ class Monitor(object): os.execv(sys.executable, argv + ['--feedback-fd', str(pw)]) os.close(pw) t0 = time.time() - select.select((pr,), (), (), conn_timeout) + so = select.select((pr,), (), (), conn_timeout)[0] os.close(pr) - et = time.time() - t0 - if et < conn_timeout: - et2 = conn_timeout - et - logging.debug("worker got connected in %d sec, " - "waiting %d more to make sure it's fine" % (et, et2)) - time.sleep(et2) + if so: ret = nwait(cpid, os.WNOHANG) + if ret != None: + logging.debug("worker died before establishing connection") + else: + logging.debug("worker seems to be connected (?? racy check)") + while time.time() < t0 + conn_timeout: + ret = nwait(cpid, os.WNOHANG) + if ret != None: + logging.debug("worker died in startup phase") + break + time.sleep(1) else: - logging.debug("worker not confirmed in %d sec, aborting it" % et) + logging.debug("worker not confirmed in %d sec, aborting it" % \ + conn_timeout) os.kill(cpid, SIGKILL) ret = nwait(cpid) if ret == None: |