diff options
author | Saravanakumar Arumugam <sarumuga@redhat.com> | 2015-08-10 18:42:05 +0530 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2015-09-01 06:11:01 -0700 |
commit | 4d4c7d5dc54850dcf916083b2b1398d9bfe2bfe6 (patch) | |
tree | fd6fc9252c75fcc67a3174ae61b476373f4dca13 /geo-replication | |
parent | d713488e47a384814e55b484f9a7e81664021d52 (diff) |
geo-rep: Update geo-rep status, if monitor process is killed
Problem:
When the monitor process itself is getting killed, geo-rep session
still shows as active.
Status command will just pick up the content from the status file
to show the output. Monitor process is the one which updates the Status file.
When the monitor process itself gets killed, there is no way to update
the status file. So, geo-rep session status command ends up showing
last updated Status present in the status file.
Solution:
While getting the status output, check whether monitor process is running.
If it is NOT running, update the status as STOPPED.
Change-Id: I86a7ac1746dd8f27eef93658e992ef16f6068d9d
BUG: 1251980
Signed-off-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-on: http://review.gluster.org/11873
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Kotresh HR <khiremat@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'geo-replication')
-rw-r--r-- | geo-replication/syncdaemon/gsyncd.py | 3 | ||||
-rw-r--r-- | geo-replication/syncdaemon/gsyncdstatus.py | 17 |
2 files changed, 18 insertions, 2 deletions
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 32e4eb7828d..b3c7e62506e 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -615,7 +615,8 @@ def main_i(): status_get = rconf.get('status_get') if status_get: for brick in gconf.path: - brick_status = GeorepStatus(gconf.state_file, brick) + brick_status = GeorepStatus(gconf.state_file, brick, + getattr(gconf, "pid_file", None)) checkpoint_time = int(getattr(gconf, "checkpoint", "0")) brick_status.print_status(checkpoint_time=checkpoint_time) return diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py index 57692f8fab0..77f3f9e7569 100644 --- a/geo-replication/syncdaemon/gsyncdstatus.py +++ b/geo-replication/syncdaemon/gsyncdstatus.py @@ -16,6 +16,7 @@ import urllib import json import time from datetime import datetime +from errno import EACCES, EAGAIN DEFAULT_STATUS = "N/A" MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped") @@ -113,7 +114,7 @@ def set_monitor_status(status_file, status): class GeorepStatus(object): - def __init__(self, monitor_status_file, brick): + def __init__(self, monitor_status_file, brick, monitor_pid_file=None): self.work_dir = os.path.dirname(monitor_status_file) self.monitor_status_file = monitor_status_file self.filename = os.path.join(self.work_dir, @@ -126,6 +127,7 @@ class GeorepStatus(object): os.close(fd) self.brick = brick self.default_values = get_default_values() + self.monitor_pid_file = monitor_pid_file def _update(self, mergerfunc): with LockedOpen(self.filename, 'r+') as f: @@ -254,6 +256,19 @@ class GeorepStatus(object): pass monitor_status = self.get_monitor_status() + # Verifying whether monitor process running and adjusting status + if monitor_status in ["Started", "Paused"]: + try: + with open(self.monitor_pid_file, "r+") as f: + fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB) + monitor_status = "Stopped" + except (IOError, OSError) as e: + if e.errno in (EACCES, EAGAIN): + # cannot grab. so, monitor process still running..move on + pass + else: + raise + if monitor_status in ["Created", "Paused", "Stopped"]: data["worker_status"] = monitor_status |