diff options
author | Csaba Henk <csaba@gluster.com> | 2011-02-15 10:52:32 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2011-02-15 21:44:43 -0800 |
commit | 1569424d1425a2f81c428d3968cd103ab2cad49e (patch) | |
tree | 51f2233a8434eaae6068ea93d2be4b5b5497af03 /xlators | |
parent | a68b4ad416970ec0ca710f650f54b87c3b92428e (diff) |
syncdaemon: change pidfile handling approach
Signal handling of the python interpreter is a bit messy, so we cannot
rely on executing a final clause upon termination. Switch over fcntl
lock based pidfile handling which can provide reliable info about status
of the process. (Due to aforementioned reason, pidfile as such is not
guaranteed to be cleaned up, but lock acquisition is a reliable measure.)
Signed-off-by: Csaba Henk <csaba@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 1570 (geosync related changes)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1570
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/gconf.py | 1 | ||||
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/gsyncd.py | 73 | ||||
-rw-r--r-- | xlators/features/marker/utils/syncdaemon/resource.py | 2 |
3 files changed, 48 insertions, 28 deletions
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py index 7bedce5148a..cec5be0789b 100644 --- a/xlators/features/marker/utils/syncdaemon/gconf.py +++ b/xlators/features/marker/utils/syncdaemon/gconf.py @@ -4,6 +4,7 @@ class GConf(object): ssh_ctl_dir = None ssh_ctl_args = None cpid = None + permanent_handles = [] @classmethod def setup_ssh_ctl(cls, ctld): diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py index 8a91c5ef959..b8b92056b54 100644 --- a/xlators/features/marker/utils/syncdaemon/gsyncd.py +++ b/xlators/features/marker/utils/syncdaemon/gsyncd.py @@ -9,9 +9,10 @@ import signal import select import shutil import optparse +import fcntl from optparse import OptionParser, SUPPRESS_HELP from logging import Logger -from errno import EEXIST, ENOENT +from errno import EEXIST, ENOENT, EACCES, EAGAIN from gconf import gconf from configinterface import GConffile @@ -53,26 +54,42 @@ class GLogger(Logger): logging.basicConfig(**lprm) -def startup(**kw): - def write_pid(fn): - fd = None +def grabfile(fname, content=None): + # damn those messy open() mode codes + fd = os.open(fname, os.O_CREAT|os.O_RDWR) + f = os.fdopen(fd, 'r+b', 0) + try: + fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB) + except: + ex = sys.exc_info()[1] + f.close() + if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN): + # cannot grab, it's taken + return + raise + if content: try: - fd = os.open(fn, os.O_CREAT|os.O_TRUNC|os.O_WRONLY|os.O_EXCL) - os.write(fd, str(os.getpid()) + '\n') - finally: - if fd: - os.close(fd) + f.truncate() + f.write(content) + except: + f.close() + raise + gconf.permanent_handles.append(f) + return f + +def grabpidfile(fname=None, setpid=True): + if not fname: + fname = gconf.pid_file + content = None + if setpid: + content = str(os.getpid()) + '\n' + return grabfile(fname, content=content) +def startup(**kw): if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn': - try: - write_pid(gconf.pid_file) - except OSError: - gconf.pid_file = None - ex = sys.exc_info()[1] - if ex.errno == EEXIST: - sys.stderr.write("pidfile is taken, exiting.\n") - exit(2) - raise + if not grabpidfile(): + sys.stderr.write("pidfile is taken, exiting.\n") + exit(2) if kw.get('go_daemon') == 'should': x, y = os.pipe() @@ -86,7 +103,8 @@ def startup(**kw): for f in (sys.stdin, sys.stdout, sys.stderr): os.dup2(dn, f.fileno()) if getattr(gconf, 'pid_file', None): - write_pid(gconf.pid_file + '.tmp') + if not grabpidfile(gconf.pid_file + '.tmp'): + raise RuntimeError("cannot grap temporary pidfile") os.rename(gconf.pid_file + '.tmp', gconf.pid_file) # wait for parent to terminate # so we can start up with @@ -102,20 +120,21 @@ def startup(**kw): def finalize(*a): if getattr(gconf, 'pid_file', None): + rm_pidf = True if gconf.cpid: + # exit path from parent branch of daemonization + rm_pidf = False while True: - f = open(gconf.pid_file) - pid = f.read() - f.close() - pid = int(pid.strip()) - if pid == gconf.cpid: + f = grabpidfile(setpid=False) + if not f: + # child has already taken over pidfile break - if pid != os.getpid(): - raise RuntimeError("corrupt pidfile") if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid: + # child has terminated + rm_pidf = True break; time.sleep(0.1) - else: + if rm_pidf: try: os.unlink(gconf.pid_file) except: diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py index 1005e408680..6697ab8c485 100644 --- a/xlators/features/marker/utils/syncdaemon/resource.py +++ b/xlators/features/marker/utils/syncdaemon/resource.py @@ -433,7 +433,7 @@ class SSH(AbstractUrl, SlaveRemote): repce.recv(inf) # hack hack hack: store a global reference to the file # to save it from getting GC'd which implies closing it - gconf._in_fd_reference = inf + gconf.permanent_handles.append(inf) self.fd_pair = (i, o) return 'should' |