summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimothy Asir <tjeyasin@redhat.com>2014-05-02 19:02:08 +0530
committerSahina Bose <sabose@redhat.com>2014-05-05 04:28:43 -0700
commit41d8289255f669c5a033a34b61612300e8fe838d (patch)
treed6f7949b63587b5643755d6b156a4577bcc50e3d
parent8831c86a8acbd1c9f46719ad7b8f26ec325d1376 (diff)
Enhanced check proc and added CTDB service check
CTDB is node based and any gluster volume can be configured to be used by CTDB. Normally when CTDB starts it will start Samba automatically. However one can switch off SMB at any given time. But If SMB/NFS are not running in any node, then CTDB is also not required. This patch checks for CTDB process and will send an appropriate message to nagios based on its status using 'ctdb nodestatus' command. Currently there is no direct way to find whether CTDB is required to run on a given node. Its because, the CTDB volume name / CTDB volume mount path can have any name. It could be easy if gluster-cli comes with any tag/set command to know whether the given volume is for CTDB or CTDB enabled. Change-Id: Iccb98296ebd902838acc63805bbe20cd77cdcc61 Signed-off-by: Timothy Asir <tjeyasin@redhat.com> Reviewed-on: http://review.gluster.org/7647 Tested-by: Timothy Asir <tim.gluster@gmail.com> Reviewed-by: Kanagaraj M <kmayilsa@redhat.com> Reviewed-by: Sahina Bose <sabose@redhat.com>
-rwxr-xr-xplugins/check_proc_status.py63
1 files changed, 50 insertions, 13 deletions
diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py
index bea18ab..00f3f1d 100755
--- a/plugins/check_proc_status.py
+++ b/plugins/check_proc_status.py
@@ -23,6 +23,7 @@ import logging
import psutil
import time
from daemon import runner
+from logging import handlers
import nscautils
import glusternagios
@@ -37,20 +38,22 @@ _glusterVolPath = "/var/lib/glusterd/vols"
_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"]
_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
"glustershd"]
-_checkSmbCmd = [_checkProc.cmd, "-C", "smb"]
+_checkSmbCmd = [_checkProc.cmd, "-c", "1:", "-C", "smbd"]
_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
"quotad"]
_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"]
_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"]
+_checkCtdbCmd = [_checkProc.cmd, "-c", "1:", "-C", "ctdbd"]
_nfsService = "Glusterfs NFS Daemon"
_shdService = "Glusterfs Self-Heal Daemon"
_smbService = "CIFS"
_brickService = "Brick Status - "
_glusterdService = "Gluster Management Daemon"
_quotadService = "Gluster Quota Daemon"
+_ctdbdService = "CTDB"
-def getBrickStatus(hostName, volInfo):
+def getBrickStatus(volInfo):
bricks = {}
hostUuid = glustercli.hostUUIDGet()
status = None
@@ -86,7 +89,7 @@ def getBrickStatus(hostName, volInfo):
return bricks
-def getNfsStatus(hostName, volInfo):
+def getNfsStatus(volInfo):
# if nfs is already running we need not to check further
status, msg, error = utils.execCmd(_checkNfsCmd)
if status == utils.PluginStatusCode.OK:
@@ -108,13 +111,39 @@ def getNfsStatus(hostName, volInfo):
return status, msg
-def getSmbStatus(hostName, volInfo):
+def getCtdbStatus(smbStatus, nfsStatus):
+ if smbStatus != utils.PluginStatusCode.OK and \
+ nfsStatus != utils.PluginStatusCode.OK:
+ return (utils.PluginStatusCode.OK,
+ "CTDB ignored as SMB and NFS are not running")
+
+ status, msg, error = utils.execCmd(_checkCtdbCmd)
+ if status != utils.PluginStatusCode.OK:
+ return utils.PluginStatusCode.UNKNOWN, "CTDB not configured"
+
+ # CTDB, SMB/NFS are running
+ status, msg, error = utils.execCmd(['ctdb', 'nodestatus'])
+ if status == utils.PluginStatusCode.OK:
+ if len(msg) > -1:
+ message = msg[0].split()
+ if len(message) > 1:
+ msg = "Node status: %s" % message[2]
+ if message[2] == 'UNHEALTHY':
+ status = utils.PluginStatusCode.WARNING
+ elif message[2] in ['DISCONNECTED', 'BANNED', 'INACTIVE']:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ return status, msg
+
+
+def getSmbStatus(volInfo):
status, msg, error = utils.execCmd(_checkSmbCmd)
if status == utils.PluginStatusCode.OK:
return status, msg[0] if len(msg) > 0 else ""
# if smb is not running and any of the volume uses smb
- # then its required to alert the use
+ # then its required to alert the user
for k, v in volInfo.iteritems():
cifsStatus = v.get('options', {}).get('user.cifs', 'enable')
smbStatus = v.get('options', {}).get('user.smb', 'enable')
@@ -128,7 +157,7 @@ def getSmbStatus(hostName, volInfo):
return status, msg
-def getQuotadStatus(hostName, volInfo):
+def getQuotadStatus(volInfo):
# if quota is already running we need not to check further
status, msg, error = utils.execCmd(_checkQuotaCmd)
if status == utils.PluginStatusCode.OK:
@@ -148,7 +177,7 @@ def getQuotadStatus(hostName, volInfo):
return status, msg
-def getShdStatus(hostName, volInfo):
+def getShdStatus(volInfo):
status, msg, error = utils.execCmd(_checkShdCmd)
if status == utils.PluginStatusCode.OK:
return status, msg[0] if len(msg) > 0 else ""
@@ -191,6 +220,7 @@ class App():
smbStatus = None
shdStatus = None
quotaStatus = None
+ ctdbStatus = None
brickStatus = {}
while True:
if not hostName:
@@ -220,31 +250,37 @@ class App():
time.sleep(sleepTime)
continue
- status, msg = getNfsStatus(hostName, volInfo)
+ status, msg = getNfsStatus(volInfo)
if status != nfsStatus or \
status == utils.PluginStatusCode.CRITICAL:
nfsStatus = status
nscautils.send_to_nsca(hostName, _nfsService, status, msg)
- status, msg = getSmbStatus(hostName, volInfo)
+ status, msg = getSmbStatus(volInfo)
if status != smbStatus or \
status == utils.PluginStatusCode.CRITICAL:
smbStatus = status
nscautils.send_to_nsca(hostName, _smbService, status, msg)
- status, msg = getShdStatus(hostName, volInfo)
+ status, msg = getCtdbStatus(smbStatus, nfsStatus)
+ if status != ctdbStatus or \
+ status == utils.PluginStatusCode.CRITICAL:
+ ctdbStatus = status
+ nscautils.send_to_nsca(hostName, _ctdbdService, status, msg)
+
+ status, msg = getShdStatus(volInfo)
if status != shdStatus or \
status == utils.PluginStatusCode.CRITICAL:
shdStatus = status
nscautils.send_to_nsca(hostName, _shdService, status, msg)
- status, msg = getQuotadStatus(hostName, volInfo)
+ status, msg = getQuotadStatus(volInfo)
if status != quotaStatus or \
status == utils.PluginStatusCode.CRITICAL:
quotaStatus = status
nscautils.send_to_nsca(hostName, _quotadService, status, msg)
- brick = getBrickStatus(hostName, volInfo)
+ brick = getBrickStatus(volInfo)
# brickInfo contains status, and message
for brickService, brickInfo in brick.iteritems():
if brickInfo[0] != brickStatus.get(brickService, [None])[0] \
@@ -260,7 +296,8 @@ if __name__ == '__main__':
logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
- handler = logging.FileHandler("/var/log/glusterpmd.log")
+ handler = handlers.TimedRotatingFileHandler(
+ "/var/log/glusterpmd.log", 'midnight')
handler.setFormatter(formatter)
logger.addHandler(handler)