diff options
author | Timothy Asir <tjeyasin@redhat.com> | 2014-05-02 19:02:08 +0530 |
---|---|---|
committer | Sahina Bose <sabose@redhat.com> | 2014-05-05 04:28:43 -0700 |
commit | 41d8289255f669c5a033a34b61612300e8fe838d (patch) | |
tree | d6f7949b63587b5643755d6b156a4577bcc50e3d | |
parent | 8831c86a8acbd1c9f46719ad7b8f26ec325d1376 (diff) |
Enhanced check proc and added CTDB service check
CTDB is node based and any gluster volume can be configured
to be used by CTDB. Normally when CTDB starts it will start
Samba automatically. However one can switch off SMB at any
given time. But If SMB/NFS are not running in any node,
then CTDB is also not required.
This patch checks for CTDB process and will send an
appropriate message to nagios based on its status
using 'ctdb nodestatus' command.
Currently there is no direct way to find whether CTDB
is required to run on a given node. Its because,
the CTDB volume name / CTDB volume mount path can have
any name.
It could be easy if gluster-cli comes with any
tag/set command to know whether the given volume is
for CTDB or CTDB enabled.
Change-Id: Iccb98296ebd902838acc63805bbe20cd77cdcc61
Signed-off-by: Timothy Asir <tjeyasin@redhat.com>
Reviewed-on: http://review.gluster.org/7647
Tested-by: Timothy Asir <tim.gluster@gmail.com>
Reviewed-by: Kanagaraj M <kmayilsa@redhat.com>
Reviewed-by: Sahina Bose <sabose@redhat.com>
-rwxr-xr-x | plugins/check_proc_status.py | 63 |
1 files changed, 50 insertions, 13 deletions
diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py index bea18ab..00f3f1d 100755 --- a/plugins/check_proc_status.py +++ b/plugins/check_proc_status.py @@ -23,6 +23,7 @@ import logging import psutil import time from daemon import runner +from logging import handlers import nscautils import glusternagios @@ -37,20 +38,22 @@ _glusterVolPath = "/var/lib/glusterd/vols" _checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"] _checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "glustershd"] -_checkSmbCmd = [_checkProc.cmd, "-C", "smb"] +_checkSmbCmd = [_checkProc.cmd, "-c", "1:", "-C", "smbd"] _checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "quotad"] _checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"] _checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"] +_checkCtdbCmd = [_checkProc.cmd, "-c", "1:", "-C", "ctdbd"] _nfsService = "Glusterfs NFS Daemon" _shdService = "Glusterfs Self-Heal Daemon" _smbService = "CIFS" _brickService = "Brick Status - " _glusterdService = "Gluster Management Daemon" _quotadService = "Gluster Quota Daemon" +_ctdbdService = "CTDB" -def getBrickStatus(hostName, volInfo): +def getBrickStatus(volInfo): bricks = {} hostUuid = glustercli.hostUUIDGet() status = None @@ -86,7 +89,7 @@ def getBrickStatus(hostName, volInfo): return bricks -def getNfsStatus(hostName, volInfo): +def getNfsStatus(volInfo): # if nfs is already running we need not to check further status, msg, error = utils.execCmd(_checkNfsCmd) if status == utils.PluginStatusCode.OK: @@ -108,13 +111,39 @@ def getNfsStatus(hostName, volInfo): return status, msg -def getSmbStatus(hostName, volInfo): +def getCtdbStatus(smbStatus, nfsStatus): + if smbStatus != utils.PluginStatusCode.OK and \ + nfsStatus != utils.PluginStatusCode.OK: + return (utils.PluginStatusCode.OK, + "CTDB ignored as SMB and NFS are not running") + + status, msg, error = utils.execCmd(_checkCtdbCmd) + if status != utils.PluginStatusCode.OK: + return utils.PluginStatusCode.UNKNOWN, "CTDB not configured" + + # CTDB, SMB/NFS are running + status, msg, error = utils.execCmd(['ctdb', 'nodestatus']) + if status == utils.PluginStatusCode.OK: + if len(msg) > -1: + message = msg[0].split() + if len(message) > 1: + msg = "Node status: %s" % message[2] + if message[2] == 'UNHEALTHY': + status = utils.PluginStatusCode.WARNING + elif message[2] in ['DISCONNECTED', 'BANNED', 'INACTIVE']: + status = utils.PluginStatusCode.CRITICAL + else: + status = utils.PluginStatusCode.UNKNOWN + return status, msg + + +def getSmbStatus(volInfo): status, msg, error = utils.execCmd(_checkSmbCmd) if status == utils.PluginStatusCode.OK: return status, msg[0] if len(msg) > 0 else "" # if smb is not running and any of the volume uses smb - # then its required to alert the use + # then its required to alert the user for k, v in volInfo.iteritems(): cifsStatus = v.get('options', {}).get('user.cifs', 'enable') smbStatus = v.get('options', {}).get('user.smb', 'enable') @@ -128,7 +157,7 @@ def getSmbStatus(hostName, volInfo): return status, msg -def getQuotadStatus(hostName, volInfo): +def getQuotadStatus(volInfo): # if quota is already running we need not to check further status, msg, error = utils.execCmd(_checkQuotaCmd) if status == utils.PluginStatusCode.OK: @@ -148,7 +177,7 @@ def getQuotadStatus(hostName, volInfo): return status, msg -def getShdStatus(hostName, volInfo): +def getShdStatus(volInfo): status, msg, error = utils.execCmd(_checkShdCmd) if status == utils.PluginStatusCode.OK: return status, msg[0] if len(msg) > 0 else "" @@ -191,6 +220,7 @@ class App(): smbStatus = None shdStatus = None quotaStatus = None + ctdbStatus = None brickStatus = {} while True: if not hostName: @@ -220,31 +250,37 @@ class App(): time.sleep(sleepTime) continue - status, msg = getNfsStatus(hostName, volInfo) + status, msg = getNfsStatus(volInfo) if status != nfsStatus or \ status == utils.PluginStatusCode.CRITICAL: nfsStatus = status nscautils.send_to_nsca(hostName, _nfsService, status, msg) - status, msg = getSmbStatus(hostName, volInfo) + status, msg = getSmbStatus(volInfo) if status != smbStatus or \ status == utils.PluginStatusCode.CRITICAL: smbStatus = status nscautils.send_to_nsca(hostName, _smbService, status, msg) - status, msg = getShdStatus(hostName, volInfo) + status, msg = getCtdbStatus(smbStatus, nfsStatus) + if status != ctdbStatus or \ + status == utils.PluginStatusCode.CRITICAL: + ctdbStatus = status + nscautils.send_to_nsca(hostName, _ctdbdService, status, msg) + + status, msg = getShdStatus(volInfo) if status != shdStatus or \ status == utils.PluginStatusCode.CRITICAL: shdStatus = status nscautils.send_to_nsca(hostName, _shdService, status, msg) - status, msg = getQuotadStatus(hostName, volInfo) + status, msg = getQuotadStatus(volInfo) if status != quotaStatus or \ status == utils.PluginStatusCode.CRITICAL: quotaStatus = status nscautils.send_to_nsca(hostName, _quotadService, status, msg) - brick = getBrickStatus(hostName, volInfo) + brick = getBrickStatus(volInfo) # brickInfo contains status, and message for brickService, brickInfo in brick.iteritems(): if brickInfo[0] != brickStatus.get(brickService, [None])[0] \ @@ -260,7 +296,8 @@ if __name__ == '__main__': logger.setLevel(logging.INFO) formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") - handler = logging.FileHandler("/var/log/glusterpmd.log") + handler = handlers.TimedRotatingFileHandler( + "/var/log/glusterpmd.log", 'midnight') handler.setFormatter(formatter) logger.addHandler(handler) |