summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config/Makefile.am1
-rw-r--r--config/host-monitoring-services.in8
-rwxr-xr-xplugins/check_remote_host.py144
-rwxr-xr-xplugins/gluster_host_service_handler.py45
-rw-r--r--tests/test_check_remote_host.py47
5 files changed, 72 insertions, 173 deletions
diff --git a/config/Makefile.am b/config/Makefile.am
index 7e19136..eea3ea2 100644
--- a/config/Makefile.am
+++ b/config/Makefile.am
@@ -5,6 +5,7 @@ glusternagiosconf_DATA = \
gluster-host-services.cfg \
gluster-templates.cfg \
gluster-contacts.cfg \
+ host-monitoring-services.in \
$(NULL)
glusternagiosdefaultconfdir = $(sysconfdir)/nagios/gluster/default
diff --git a/config/host-monitoring-services.in b/config/host-monitoring-services.in
new file mode 100644
index 0000000..52ce8e0
--- /dev/null
+++ b/config/host-monitoring-services.in
@@ -0,0 +1,8 @@
+{serviceList: [
+ 'Cpu Utilization',
+ 'Disk Utilization',
+ 'Memory Utilization',
+ 'Network Utilization',
+ 'Swap Utilization',
+ ]
+}
diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py
index 7350e27..31ff9dd 100755
--- a/plugins/check_remote_host.py
+++ b/plugins/check_remote_host.py
@@ -2,13 +2,13 @@
#
# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall
# status
-# of a host. The entities considered for the status of the host are -
-# 1. Host is reachable
-# 2. LV/Inode Service status
-# 3. CPU Utilization
-# 4. Memory Utilization
-# 5. Network Utilization
-# 6. Swap Utilization
+# of a host. The services considered by default for the status of the host
+# are -
+# 1. LV/Inode Service status
+# 2. CPU Utilization
+# 3. Memory Utilization
+# 4. Network Utilization
+# 5. Swap Utilization
#
# Copyright (C) 2014 Red Hat Inc
#
@@ -29,90 +29,35 @@
import os
import sys
-import shlex
-import subprocess
-import socket
import getopt
+#import socket
+import json
+
+import livestatus
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_CRITICAL = 2
STATUS_UNKNOWN = 3
-_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping"
_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING',
STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'}
-_socketPath = '/var/spool/nagios/cmd/live'
-
-
-# Class for exception definition
-class checkPingCmdExecFailedException(Exception):
- message = "check_ping command failed"
-
- def __init__(self, rc=0, out=(), err=()):
- self.rc = rc
- self.out = out
- self.err = err
-
- def __str__(self):
- o = '\n'.join(self.out)
- e = '\n'.join(self.err)
- if o and e:
- m = o + '\n' + e
- else:
- m = o or e
- s = self.message
- if m:
- s += '\nerror: ' + m
- if self.rc:
- s += '\nreturn code: %s' % self.rc
- return s
-
-# Method to execute a command
-def execCmd(command):
- proc = subprocess.Popen(command,
- close_fds=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- (out, err) = proc.communicate()
- return (proc.returncode, out, err)
-
-
-# Method to check the ing status of the host
-def getPingStatus(hostAddr):
- cmd = "%s -H %s" % (_checkPingCommand, hostAddr)
- cmd += " -w 3000.0,80% -c 5000.0,100%"
-
- try:
- (rc, out, err) = execCmd(shlex.split(cmd))
- except (OSError, ValueError) as e:
- raise checkPingCmdExecFailedException(err=[str(e)])
-
- if rc != 0:
- raise checkPingCmdExecFailedException(rc, [out], [err])
-
- return rc
+# Load the host monitoring services list
+def loadSrvcList():
+ srvc_list = []
+ with open("/etc/nagios/gluster/host-monitoring-services.in") as data_file:
+ srvc_list = json.load(data_file)['serviceList']
+ return srvc_list
# Method to execute livestatus
def checkLiveStatus(hostAddr, srvc):
- s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- s.connect(_socketPath)
-
- # Write command to socket
- cmd = "GET services\nColumns: state\nFilter: "
- "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
- s.send(cmd)
+ cmd = "GET services\nColumns: state\nFilter: " \
+ "description = %s\n" \
+ "Filter: host_address = %s" % (srvc, hostAddr)
- # Close socket
- s.shutdown(socket.SHUT_WR)
-
- # Read the answer
- answer = s.recv(1000000)
-
- # Parse the answer into a table
- table = [line.split(';') for line in answer.split('\n')[:-1]]
+ table = livestatus.readLiveStatus(cmd)
if len(table) > 0 and len(table[0]) > 0:
return int(table[0][0])
@@ -150,43 +95,18 @@ if __name__ == "__main__":
showUsage()
sys.exit(STATUS_CRITICAL)
- # Check ping status of the node, if its not reachable exit
- try:
- pingStatus = getPingStatus(hostAddr)
- except (checkPingCmdExecFailedException) as e:
- print "Host Status %s - Host not reachable" % \
- (_commandStatusStrs[STATUS_UNKNOWN])
- sys.exit(_commandStatusStrs[STATUS_UNKNOWN])
-
- if pingStatus != STATUS_OK:
- print "Host Status %s - Host not reachable" % \
- (_commandStatusStrs[STATUS_UNKNOWN])
- sys.exit(pingStatus)
-
- # Check the various performance statuses for the host
- diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization')
- cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization')
- memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization')
- swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization')
- nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization')
-
- # Calculate the consolidated status for the host based on above status
- # details
- finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \
- memPerfStatus | swapPerfStatus | nwPerfStatus
-
- # Get the list of ciritical services
+ # Load the services list
+ srvc_list = loadSrvcList()
+
+ # Calculate the consolidated status for the host based on above
+ # status of individual services
+ finalStatus = STATUS_OK
criticalSrvcs = []
- if diskPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Disk Utilization')
- if cpuPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Cpu Utilization')
- if memPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Memory Utilization')
- if swapPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Swap Utilization')
- if nwPerfStatus == STATUS_CRITICAL:
- criticalSrvcs.append('Network Utilization')
+ for srvc in srvc_list:
+ srvc_status = checkLiveStatus(hostAddr, srvc)
+ finalStatus = finalStatus | srvc_status
+ if srvc_status == STATUS_CRITICAL:
+ criticalSrvcs.append(srvc)
# Return the status
if finalStatus == STATUS_CRITICAL:
diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py
index 283ac69..2a62108 100755
--- a/plugins/gluster_host_service_handler.py
+++ b/plugins/gluster_host_service_handler.py
@@ -23,9 +23,10 @@
import os
import sys
import datetime
-import socket
import getopt
+import livestatus
+
STATUS_OK = "OK"
STATUS_WARNING = "WARNING"
STATUS_CRITICAL = "CRITICAL"
@@ -37,14 +38,15 @@ statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2,
NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd"
SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization',
'Swap Utilization', 'Network Utilization']
-_socketPath = '/var/spool/nagios/cmd/live'
# Shows the usage of the script
def showUsage():
- usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> "
- "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> "
- "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0])
+ usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> " \
+ "-t <Service State Type (SOFT/HARD)>" \
+ " -a <No of Service attempts> " \
+ "-l <Host Address>" \
+ " -n <Service Name>\n" % os.path.basename(sys.argv[0])
sys.stderr.write(usage)
@@ -52,12 +54,13 @@ def showUsage():
def update_host_state(hostAddr, srvcName, statusCode):
now = datetime.datetime.now()
if statusCode == statusCodes[STATUS_WARNING]:
- cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - "
- "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode,
- srvcName)
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;" \
+ "Host Status WARNING - " \
+ "Service(s) ['%s'] in CRITICAL state\n" \
+ % (now, hostAddr, statusCode, srvcName)
else:
- cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - "
- "Services in good health\n" % (now, hostAddr, statusCode)
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " \
+ "Services in good health\n" % (now, hostAddr, statusCode)
f = open(NAGIOS_COMMAND_FILE, "w")
f.write(cmdStr)
@@ -66,22 +69,10 @@ def update_host_state(hostAddr, srvcName, statusCode):
# Method to execute livestatus
def checkLiveStatus(hostAddr, srvc):
- s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- s.connect(_socketPath)
-
- # Write command to socket
- cmd = "GET services\nColumns: state\nFilter: "
- "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
- s.send(cmd)
-
- # Close socket
- s.shutdown(socket.SHUT_WR)
-
- # Read the answer
- answer = s.recv(1000)
+ cmd = "GET services\nColumns: state\nFilter: " \
+ "description = %s\nFilter: host_address = %s" % (srvc, hostAddr)
- # Parse the answer into a table
- table = [line.split(';') for line in answer.split('\n')[:-1]]
+ table = livestatus.readLiveStatus(cmd)
if len(table) > 0 and len(table[0]) > 0:
return int(table[0][0])
@@ -141,8 +132,8 @@ if __name__ == "__main__":
if srvcState == STATUS_CRITICAL:
if srvcStateType == SRVC_STATE_TYPE_SOFT:
if int(attempts) == 3:
- print "Updating the host status to warning "
- "(3rd SOFT critical state)..."
+ print "Updating the host status to warning " \
+ "(3rd SOFT critical state)..."
update_host_state(hostAddr, srvcName,
statusCodes[STATUS_WARNING])
elif srvcStateType == SRVC_STATE_TYPE_HARD:
diff --git a/tests/test_check_remote_host.py b/tests/test_check_remote_host.py
index c5c602d..f7965f3 100644
--- a/tests/test_check_remote_host.py
+++ b/tests/test_check_remote_host.py
@@ -19,49 +19,28 @@
#
import mock
+import socket
+import plugins
from testrunner import PluginsTestCase as TestCaseBase
-from plugins.check_remote_host import *
-class TestHello(TestCaseBase):
- # Method to test the execCmd() method
- @mock.patch('check_remote_host.subprocess.Popen')
- def testExecCmd(self, mock_popen):
- reference = subprocess.Popen('any command', close_fds=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- out = "sample output"
- err = ""
- reference.communicate.return_value = (out, err)
- self.assertTrue(reference.communicate, "communicate called")
-
- # Method to test the getPingStatus() method
- @mock.patch('check_remote_host.execCmd')
- def testGetPingStatus(self, mock_execCmd):
- rc = 0
- out = "sample output"
- err = ""
- mock_execCmd.return_value = (rc, out, err)
- getPingStatus('dummy host')
- mock_execCmd.assert_called_with([
- '/usr/lib64/nagios/plugins/check_ping', '-H', 'dummy', 'host',
- '-w', '3000.0,80%', '-c', '5000.0,100%'])
- self.assertRaises(OSError, execCmd,
- ['/usr/lib64/nagios/plugins/check_ping', '-H',
- 'dummy', 'host', '-w', '3000.0,80%', '-c',
- '5000.0,100%'])
-
+class TestCheckRemoteHost(TestCaseBase):
# Method to test the checkLiveStatus() method
- @mock.patch('check_remote_host.socket.socket')
+ @mock.patch('plugins.check_remote_host.livestatus.socket.socket')
def testCheckLiveStatus(self, mock_socket):
reference = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
self.assertTrue(mock_socket, "called")
reference.recv.return_value = "0\n"
- checkLiveStatus("dummy host", "dummy srvc")
- reference.connect.assert_called_with('/var/spool/nagios/cmd/live')
+ plugins.check_remote_host.checkLiveStatus("dummy host", "dummy srvc")
+ reference.connect.assert_called_with("${localstatedir}/"
+ "spool/nagios/cmd/live")
reference.send.assert_called_with("GET services\nColumns: state\n"
"Filter: description = dummy srvc\n"
"Filter: host_address = "
- "dummy host\n")
- self.assertEquals(0, checkLiveStatus("dummy host", "dummy srvc"))
+ "dummy host\n"
+ "Separators: 10 124 44 59")
+ self.assertEquals(0,
+ plugins.
+ check_remote_host.
+ checkLiveStatus("dummy host", "dummy srvc"))