From 6a6bd449247cfed587922cbc1b6b54a1fa0301ad Mon Sep 17 00:00:00 2001 From: Aravinda VK Date: Fri, 27 Jun 2014 17:52:25 +0530 Subject: geo-rep: Fix the fd leak in worker/agent spawn worker and agent uses pipe to communicate, if worker dies for some reason agent should get EOF and terminate. Each worker-agent spawning is done in thread, Due to race if multiple workers in same node retain the pipe refs of other workers. Hence agent will not get EOF even if worker dies. BUG: 1114003 Change-Id: I36b9709b9392299483606bd3ef1db764fa3f2bff Signed-off-by: Aravinda VK Reviewed-on: http://review.gluster.org/8194 Tested-by: Justin Clift Reviewed-by: Venky Shankar Tested-by: Venky Shankar --- geo-replication/syncdaemon/monitor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'geo-replication/syncdaemon') diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index 5f4745501ec..dbe9c0b0d40 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -157,6 +157,9 @@ class Monitor(object): return 1 conn_timeout = int(gconf.connection_timeout) while ret in (0, 1): + # Spawn the worker and agent in lock to avoid fd leak + self.lock.acquire() + logging.info('-' * conn_timeout) logging.info('starting gsyncd worker') @@ -188,10 +191,9 @@ class Monitor(object): ','.join([str(rw), str(ww), str(ra), str(wa)]), '--resource-remote', w[1]]) - self.lock.acquire() + cpids.add(cpid) agents.add(apid) - self.lock.release() os.close(pw) # close all RPC pipes in monitor @@ -199,6 +201,7 @@ class Monitor(object): os.close(wa) os.close(rw) os.close(ww) + self.lock.release() t0 = time.time() so = select((pr,), (), (), conn_timeout)[0] -- cgit