diff options
author | Aravinda VK <avishwan@redhat.com> | 2014-06-27 17:52:25 +0530 |
---|---|---|
committer | Venky Shankar <vshankar@redhat.com> | 2014-06-30 03:40:45 -0700 |
commit | 6a6bd449247cfed587922cbc1b6b54a1fa0301ad (patch) | |
tree | e3747dec97a3b8f491660d430e824c011bd0bbc5 /geo-replication/syncdaemon | |
parent | dd7c7b8a0fe209826f80cd158b80194835d3d155 (diff) |
geo-rep: Fix the fd leak in worker/agent spawn
worker and agent uses pipe to communicate, if worker dies
for some reason agent should get EOF and terminate.
Each worker-agent spawning is done in thread, Due to race
if multiple workers in same node retain the pipe refs of
other workers. Hence agent will not get EOF even if
worker dies.
BUG: 1114003
Change-Id: I36b9709b9392299483606bd3ef1db764fa3f2bff
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: http://review.gluster.org/8194
Tested-by: Justin Clift <justin@gluster.org>
Reviewed-by: Venky Shankar <vshankar@redhat.com>
Tested-by: Venky Shankar <vshankar@redhat.com>
Diffstat (limited to 'geo-replication/syncdaemon')
-rw-r--r-- | geo-replication/syncdaemon/monitor.py | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py index 5f4745501ec..dbe9c0b0d40 100644 --- a/geo-replication/syncdaemon/monitor.py +++ b/geo-replication/syncdaemon/monitor.py @@ -157,6 +157,9 @@ class Monitor(object): return 1 conn_timeout = int(gconf.connection_timeout) while ret in (0, 1): + # Spawn the worker and agent in lock to avoid fd leak + self.lock.acquire() + logging.info('-' * conn_timeout) logging.info('starting gsyncd worker') @@ -188,10 +191,9 @@ class Monitor(object): ','.join([str(rw), str(ww), str(ra), str(wa)]), '--resource-remote', w[1]]) - self.lock.acquire() + cpids.add(cpid) agents.add(apid) - self.lock.release() os.close(pw) # close all RPC pipes in monitor @@ -199,6 +201,7 @@ class Monitor(object): os.close(wa) os.close(rw) os.close(ww) + self.lock.release() t0 = time.time() so = select((pr,), (), (), conn_timeout)[0] |