summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--geo-replication/syncdaemon/gsyncdstatus.py1
-rw-r--r--geo-replication/syncdaemon/monitor.py18
2 files changed, 15 insertions, 4 deletions
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
index e8a810f4b38..87fa09c070c 100644
--- a/geo-replication/syncdaemon/gsyncdstatus.py
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -103,6 +103,7 @@ class LockedOpen(object):
return f
def __exit__(self, _exc_type, _exc_value, _traceback):
+ fcntl.flock(self.fileobj, fcntl.LOCK_UN)
self.fileobj.close()
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
index 52ae256fb14..97274f32422 100644
--- a/geo-replication/syncdaemon/monitor.py
+++ b/geo-replication/syncdaemon/monitor.py
@@ -105,10 +105,6 @@ class Monitor(object):
master,
"%s::%s" % (slave_host,
slave_vol))
-
- set_monitor_status(gconf.get("state-file"), self.ST_STARTED)
- self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
-
ret = 0
def nwait(p, o=0):
@@ -153,6 +149,7 @@ class Monitor(object):
# Spawn the worker and agent in lock to avoid fd leak
self.lock.acquire()
+ self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
logging.info(lf('starting gsyncd worker',
brick=w[0]['dir'],
slave_node=remote_host))
@@ -349,6 +346,19 @@ class Monitor(object):
t = Thread(target=wmon, args=[wx])
t.start()
ta.append(t)
+
+ # monitor status was being updated in each monitor thread. It
+ # should not be done as it can cause deadlock for a worker start.
+ # set_monitor_status uses flock to synchronize multple instances
+ # updating the file. Since each monitor thread forks worker and
+ # agent, these processes can hold the reference to fd of status
+ # file causing deadlock to workers which starts later as flock
+ # will not be release until all references to same fd is closed.
+ # It will also cause fd leaks.
+
+ self.lock.acquire()
+ set_monitor_status(gconf.get("state-file"), self.ST_STARTED)
+ self.lock.release()
for t in ta:
t.join()