diff options
| -rw-r--r-- | xlators/features/marker/utils/syncdaemon/gsyncd.py | 5 | ||||
| -rw-r--r-- | xlators/features/marker/utils/syncdaemon/master.py | 3 | ||||
| -rw-r--r-- | xlators/features/marker/utils/syncdaemon/monitor.py | 22 | 
3 files changed, 17 insertions, 13 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py index 60980f54659..193af9d5f37 100644 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py @@ -249,8 +249,7 @@ def main_i():      ffd = rconf.get('feedback_fd')      if ffd: -        gconf.feedback_fd = ffd -        fcntl.fcntl(int(ffd), fcntl.F_SETFD, fcntl.FD_CLOEXEC) +        fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)      #normalize loglevel      lvl0 = gconf.log_level @@ -292,6 +291,8 @@ def main_i():              # complete remote connection in child              remote.connect_remote(go_daemon='done')      local.connect() +    if ffd: +        os.close(ffd)      local.service_loop(*[r for r in [remote] if r])      logging.info("exiting.") diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py index 76f924ed37a..35dc4ee06aa 100644 --- a/xlators/features/marker/utils/syncdaemon/master.py +++ b/xlators/features/marker/utils/syncdaemon/master.py @@ -87,9 +87,6 @@ class GMaster(object):  	self.terminate = False      def crawl_loop(self): -        ffd = getattr(gconf, 'feedback_fd', None) -        if ffd: -            os.close(int(ffd))          timo = int(gconf.timeout or 0)          if timo > 0:              def keep_alive(): diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py index a86acdc7566..365e91435fd 100644 --- a/xlators/features/marker/utils/syncdaemon/monitor.py +++ b/xlators/features/marker/utils/syncdaemon/monitor.py @@ -48,17 +48,23 @@ class Monitor(object):                  os.execv(sys.executable, argv + ['--feedback-fd', str(pw)])              os.close(pw)              t0 = time.time() -            select.select((pr,), (), (), conn_timeout) +            so = select.select((pr,), (), (), conn_timeout)[0]              os.close(pr) -            et = time.time() - t0 -            if et < conn_timeout: -                et2 = conn_timeout - et -                logging.debug("worker got connected in %d sec, " -                              "waiting %d more to make sure it's fine" % (et, et2)) -                time.sleep(et2) +            if so:                  ret = nwait(cpid, os.WNOHANG) +                if ret != None: +                    logging.debug("worker died before establishing connection") +                else: +                    logging.debug("worker seems to be connected (?? racy check)") +                    while time.time() < t0 + conn_timeout: +                        ret = nwait(cpid, os.WNOHANG) +                        if ret != None: +                            logging.debug("worker died in startup phase") +                            break +                        time.sleep(1)              else: -                logging.debug("worker not confirmed in %d sec, aborting it" % et) +                logging.debug("worker not confirmed in %d sec, aborting it" % \ +                              conn_timeout)                  os.kill(cpid, SIGKILL)                  ret = nwait(cpid)              if ret == None:  | 
