summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py5
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py3
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py22
3 files changed, 17 insertions, 13 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py
index 963e7101bf4..a502c200998 100644
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py
@@ -252,8 +252,7 @@ def main_i():
ffd = rconf.get('feedback_fd')
if ffd:
- gconf.feedback_fd = ffd
- fcntl.fcntl(int(ffd), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
#normalize loglevel
lvl0 = gconf.log_level
@@ -295,6 +294,8 @@ def main_i():
# complete remote connection in child
remote.connect_remote(go_daemon='done')
local.connect()
+ if ffd:
+ os.close(ffd)
local.service_loop(*[r for r in [remote] if r])
logging.info("exiting.")
diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py
index 76f924ed37a..35dc4ee06aa 100644
--- a/xlators/features/marker/utils/syncdaemon/master.py
+++ b/xlators/features/marker/utils/syncdaemon/master.py
@@ -87,9 +87,6 @@ class GMaster(object):
self.terminate = False
def crawl_loop(self):
- ffd = getattr(gconf, 'feedback_fd', None)
- if ffd:
- os.close(int(ffd))
timo = int(gconf.timeout or 0)
if timo > 0:
def keep_alive():
diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py
index a86acdc7566..365e91435fd 100644
--- a/xlators/features/marker/utils/syncdaemon/monitor.py
+++ b/xlators/features/marker/utils/syncdaemon/monitor.py
@@ -48,17 +48,23 @@ class Monitor(object):
os.execv(sys.executable, argv + ['--feedback-fd', str(pw)])
os.close(pw)
t0 = time.time()
- select.select((pr,), (), (), conn_timeout)
+ so = select.select((pr,), (), (), conn_timeout)[0]
os.close(pr)
- et = time.time() - t0
- if et < conn_timeout:
- et2 = conn_timeout - et
- logging.debug("worker got connected in %d sec, "
- "waiting %d more to make sure it's fine" % (et, et2))
- time.sleep(et2)
+ if so:
ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died before establishing connection")
+ else:
+ logging.debug("worker seems to be connected (?? racy check)")
+ while time.time() < t0 + conn_timeout:
+ ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died in startup phase")
+ break
+ time.sleep(1)
else:
- logging.debug("worker not confirmed in %d sec, aborting it" % et)
+ logging.debug("worker not confirmed in %d sec, aborting it" % \
+ conn_timeout)
os.kill(cpid, SIGKILL)
ret = nwait(cpid)
if ret == None: