summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorBala.FA <barumuga@redhat.com>2014-03-13 07:39:41 +0530
committerBala.FA <barumuga@redhat.com>2014-04-29 10:14:32 +0530
commitb9c4e63f638254f7ba6cc960930cab1e15b4cd5d (patch)
treecbcd4c166503878d988b185cef9a762d93359667 /plugins
parent2977b2f34bdbb7284bcc6244e85a1fdb657447f2 (diff)
plugins: import plugins from github
All plugins are imported from https://github.com/gluster-rhsc/nagios-plugin.git Change-Id: I9880becafccc79280873039e317b4afde747806c Signed-off-by: Bala.FA <barumuga@redhat.com> Reviewed-on: https://cuckoo.blr.redhat.com:8443/4
Diffstat (limited to 'plugins')
-rw-r--r--plugins/Makefile.am8
-rw-r--r--plugins/__init__.py0
-rwxr-xr-xplugins/check_disk_and_inode.py195
-rwxr-xr-xplugins/check_remote_host.py199
-rwxr-xr-xplugins/gluster_host_service_handler.py154
-rwxr-xr-xplugins/sadf.py328
6 files changed, 884 insertions, 0 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index c12520c..12ebab7 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -1,2 +1,10 @@
dist_glusternagiosplugins_PYTHON = \
+ check_disk_and_inode.py \
+ check_remote_host.py \
+ gluster_host_service_handler.py \
+ sadf.py \
+ $(NULL)
+
+EXTRA_DIST = \
+ __init__.py \
$(NULL)
diff --git a/plugins/__init__.py b/plugins/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/plugins/__init__.py
diff --git a/plugins/check_disk_and_inode.py b/plugins/check_disk_and_inode.py
new file mode 100755
index 0000000..052df3a
--- /dev/null
+++ b/plugins/check_disk_and_inode.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# sadf.py -- nagios plugin uses sadf output for perf data
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+
+import re
+import sys
+import commands
+from optparse import OptionParser
+
+
+def getUsageAndFree(command, lvm):
+ status = commands.getstatusoutput(command)[1].split()
+ path = status[-1]
+ usagePer = status[-2]
+ availSpace = status[-3]
+ usedSpace = status[-4]
+ device = status[-6].split("-")[-1]
+ dmatch = re.compile('[0-9]+').match(usagePer)
+ if (dmatch):
+ usage = eval(dmatch.group(0))
+ return (float(usage), float(100 - usage), usedSpace,
+ availSpace, device, path)
+ else:
+ return None, None, None, None, None, None
+
+
+def getDisk(path, readable=False, lvm=False):
+ if readable:
+ return getUsageAndFree("df -m %s" % path, lvm)
+ else:
+ return getUsageAndFree("df -kh %s" % path, lvm)
+
+
+def getInode(path, readable=False, lvm=False):
+ return getUsageAndFree("df -i %s" % path, lvm)
+
+
+def appendStatus(lst, level, typ, device, mpath, usage):
+ if 2 == level:
+ level = "crit"
+ elif 1 == level:
+ level = "warn"
+ else:
+ level = "ok"
+ lst.append("%s:%s:%s;%s;%s" % (level, device, mpath, usage))
+
+
+def getMounts(searchQuery=None, excludeList=[]):
+ mountPaths = []
+ f = open("/etc/mtab")
+ for i in f.readlines():
+ if searchQuery and i.startswith(searchQuery):
+ if not excludeList:
+ mountPaths.append(i.split()[0])
+ else:
+ device = i.split()
+ if not device[0] in options.exclude and\
+ not device[1] in options.exclude:
+ mountPaths.append(device[0])
+ f.close()
+ return mountPaths
+
+
+def parse_input():
+ parser = OptionParser()
+ parser.add_option('-w', '--warning', action='store', type='int',
+ dest='warn', help='Warning count in %', default=80)
+ parser.add_option('-c', '--critical', action='store', type='int',
+ dest='crit', help='Critical count in %', default=90)
+ parser.add_option('-u', '--usage', action="store_true", dest='usage',
+ help='Output disk and inode usage', default=False)
+ parser.add_option('-l', '--lvm', action="store_true",
+ dest='lvm', help='List lvm mounts', default=False)
+ parser.add_option('-a', '--all', action="store_true",
+ dest='all', help='List all mounts', default=False)
+ parser.add_option('-n', '--ignore', action="store_true",
+ dest='ignore', help='Ignore errors', default=False)
+ parser.add_option('-i', '--include', action='append', type='string',
+ dest='mountPath', help='Mount path', default=[])
+ parser.add_option('-x', '--exclude', action="append", type='string',
+ dest='exclude', help='Exclude disk')
+ return parser.parse_args()
+
+
+if __name__ == '__main__':
+ disk = []
+ warnList = []
+ critList = []
+ diskList = []
+ mounts = []
+ level = -1
+ (options, args) = parse_input()
+
+ if len(args) > 2:
+ if args[0].isdigit() and args[1].isdigit():
+ warn = int(args[0])
+ crit = int(args[1])
+ options.mountPath = args[2:]
+ else:
+ warn = 80
+ crit = 90
+ options.mountPath = args
+ else:
+ crit = options.crit
+ warn = options.warn
+
+ if options.lvm:
+ searchQuery = "/dev/mapper"
+ elif options.all:
+ searchQuery = None
+ else:
+ searchQuery = "/"
+
+ if not options.mountPath or options.lvm or options.all:
+ options.mountPath += getMounts(searchQuery, options.exclude)
+
+ #if not options.mountPath:
+ # parser.print_help()
+ # sys.exit(1)
+
+ for path in options.mountPath:
+ diskUsage, diskFree, used, avail, dev, mpath = getDisk(path,
+ options.usage,
+ options.lvm)
+ inodeUsage, inodeFree, iused, iavail, idev, ipath = getInode(
+ path,
+ options.usage,
+ options.lvm)
+ if mpath in mounts:
+ continue
+ if not used or not iused:
+ if options.ignore:
+ continue
+ else:
+ sys.exit(3)
+
+ mounts.append(mpath)
+ if options.usage:
+ total = (float(used) + float(avail)) / 1000
+ itot = (float(iused) + float(iavail)) / 1000
+ disk.append("%s=%.1f;%.1f;%.1f;0;%.1f %s=%.1f;%.1f;%.1f;0;%.1f" % (
+ mpath, float(used)/1000, warn*total/100, crit*total/100, total,
+ ipath, float(iused)/1000, warn*itot/100, crit*itot/100, itot))
+ else:
+ disk.append("%s=%.2f;%s;%s;0;100 %s=%.2f;%s;%s;0;100" % (
+ mpath, diskUsage, warn, crit, ipath, inodeUsage, warn, crit))
+
+ if diskUsage >= crit or inodeUsage >= crit:
+ if diskUsage >= crit:
+ critList.append("crit:disk:%s;%s;%s" % (dev, mpath, diskUsage))
+ else:
+ critList.append("crit:inode:%s;%s;%s" % (idev, ipath,
+ inodeUsage))
+ if not level > 1:
+ level = 2
+ elif (diskUsage >= warn and diskUsage < crit) or (
+ inodeUsage >= warn and inodeUsage < crit):
+ if diskUsage >= warn:
+ warnList.append("warn:disk:%s;%s;%s" % (dev, mpath, diskUsage))
+ else:
+ warnList.append("warn:inode:%s;%s;%s" % (idev, ipath,
+ inodeUsage))
+ if not level > 0:
+ level = 1
+ else:
+ diskList.append("%s:%s" % (dev, mpath))
+
+ msg = " ".join(critList + warnList)
+ if not msg:
+ msg += " disks:mounts:(" + ",".join(diskList) + ")"
+
+ if 2 == level:
+ print "CRITICAL : %s | %s" % (msg, " ".join(disk))
+ sys.exit(2)
+ elif 1 == level:
+ print "WARNING : %s | %s" % (msg, " ".join(disk))
+ sys.exit(1)
+ else:
+ print "OK : %s | %s" % (msg, " ".join(disk))
diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py
new file mode 100755
index 0000000..7350e27
--- /dev/null
+++ b/plugins/check_remote_host.py
@@ -0,0 +1,199 @@
+#!/usr/bin/python
+#
+# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall
+# status
+# of a host. The entities considered for the status of the host are -
+# 1. Host is reachable
+# 2. LV/Inode Service status
+# 3. CPU Utilization
+# 4. Memory Utilization
+# 5. Network Utilization
+# 6. Swap Utilization
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA
+#
+
+import os
+import sys
+import shlex
+import subprocess
+import socket
+import getopt
+
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_CRITICAL = 2
+STATUS_UNKNOWN = 3
+_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping"
+_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING',
+ STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'}
+_socketPath = '/var/spool/nagios/cmd/live'
+
+
+# Class for exception definition
+class checkPingCmdExecFailedException(Exception):
+ message = "check_ping command failed"
+
+ def __init__(self, rc=0, out=(), err=()):
+ self.rc = rc
+ self.out = out
+ self.err = err
+
+ def __str__(self):
+ o = '\n'.join(self.out)
+ e = '\n'.join(self.err)
+ if o and e:
+ m = o + '\n' + e
+ else:
+ m = o or e
+
+ s = self.message
+ if m:
+ s += '\nerror: ' + m
+ if self.rc:
+ s += '\nreturn code: %s' % self.rc
+ return s
+
+
+# Method to execute a command
+def execCmd(command):
+ proc = subprocess.Popen(command,
+ close_fds=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (out, err) = proc.communicate()
+ return (proc.returncode, out, err)
+
+
+# Method to check the ing status of the host
+def getPingStatus(hostAddr):
+ cmd = "%s -H %s" % (_checkPingCommand, hostAddr)
+ cmd += " -w 3000.0,80% -c 5000.0,100%"
+
+ try:
+ (rc, out, err) = execCmd(shlex.split(cmd))
+ except (OSError, ValueError) as e:
+ raise checkPingCmdExecFailedException(err=[str(e)])
+
+ if rc != 0:
+ raise checkPingCmdExecFailedException(rc, [out], [err])
+
+ return rc
+
+
+# Method to execute livestatus
+def checkLiveStatus(hostAddr, srvc):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(_socketPath)
+
+ # Write command to socket
+ cmd = "GET services\nColumns: state\nFilter: "
+ "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
+ s.send(cmd)
+
+ # Close socket
+ s.shutdown(socket.SHUT_WR)
+
+ # Read the answer
+ answer = s.recv(1000000)
+
+ # Parse the answer into a table
+ table = [line.split(';') for line in answer.split('\n')[:-1]]
+
+ if len(table) > 0 and len(table[0]) > 0:
+ return int(table[0][0])
+ else:
+ return STATUS_UNKNOWN
+
+
+# Method to show the usage
+def showUsage():
+ usage = "Usage: %s -H <Host Address>\n" % os.path.basename(sys.argv[0])
+ sys.stderr.write(usage)
+
+
+# Main method
+if __name__ == "__main__":
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="])
+ except getopt.GetoptError as e:
+ print (str(e))
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ hostAddr = ''
+ if len(opts) == 0:
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+ else:
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ showUsage()
+ sys.exit()
+ elif opt in ("-H", "--host"):
+ hostAddr = arg
+ else:
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ # Check ping status of the node, if its not reachable exit
+ try:
+ pingStatus = getPingStatus(hostAddr)
+ except (checkPingCmdExecFailedException) as e:
+ print "Host Status %s - Host not reachable" % \
+ (_commandStatusStrs[STATUS_UNKNOWN])
+ sys.exit(_commandStatusStrs[STATUS_UNKNOWN])
+
+ if pingStatus != STATUS_OK:
+ print "Host Status %s - Host not reachable" % \
+ (_commandStatusStrs[STATUS_UNKNOWN])
+ sys.exit(pingStatus)
+
+ # Check the various performance statuses for the host
+ diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization')
+ cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization')
+ memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization')
+ swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization')
+ nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization')
+
+ # Calculate the consolidated status for the host based on above status
+ # details
+ finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \
+ memPerfStatus | swapPerfStatus | nwPerfStatus
+
+ # Get the list of ciritical services
+ criticalSrvcs = []
+ if diskPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Disk Utilization')
+ if cpuPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Cpu Utilization')
+ if memPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Memory Utilization')
+ if swapPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Swap Utilization')
+ if nwPerfStatus == STATUS_CRITICAL:
+ criticalSrvcs.append('Network Utilization')
+
+ # Return the status
+ if finalStatus == STATUS_CRITICAL:
+ print "Host Status %s - Service(s) %s in CRITICAL state" % \
+ (_commandStatusStrs[STATUS_WARNING], criticalSrvcs)
+ sys.exit(STATUS_WARNING)
+
+ print "Host Status %s - Services in good health" % \
+ _commandStatusStrs[STATUS_OK]
+ sys.exit(STATUS_OK)
diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py
new file mode 100755
index 0000000..283ac69
--- /dev/null
+++ b/plugins/gluster_host_service_handler.py
@@ -0,0 +1,154 @@
+#!/usr/bin/python
+#
+# gluster_host_service_handler.py -- Event handler which checks the
+# status of defined services and accordingly changes the host status
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA
+#
+
+import os
+import sys
+import datetime
+import socket
+import getopt
+
+STATUS_OK = "OK"
+STATUS_WARNING = "WARNING"
+STATUS_CRITICAL = "CRITICAL"
+STATUS_UNKNOWN = "UNKNOWN"
+SRVC_STATE_TYPE_SOFT = "SOFT"
+SRVC_STATE_TYPE_HARD = "HARD"
+statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2,
+ STATUS_UNKNOWN: 3}
+NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd"
+SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization',
+ 'Swap Utilization', 'Network Utilization']
+_socketPath = '/var/spool/nagios/cmd/live'
+
+
+# Shows the usage of the script
+def showUsage():
+ usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> "
+ "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> "
+ "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0])
+ sys.stderr.write(usage)
+
+
+# Method to change the host status
+def update_host_state(hostAddr, srvcName, statusCode):
+ now = datetime.datetime.now()
+ if statusCode == statusCodes[STATUS_WARNING]:
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - "
+ "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode,
+ srvcName)
+ else:
+ cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - "
+ "Services in good health\n" % (now, hostAddr, statusCode)
+
+ f = open(NAGIOS_COMMAND_FILE, "w")
+ f.write(cmdStr)
+ f.close()
+
+
+# Method to execute livestatus
+def checkLiveStatus(hostAddr, srvc):
+ s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ s.connect(_socketPath)
+
+ # Write command to socket
+ cmd = "GET services\nColumns: state\nFilter: "
+ "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr)
+ s.send(cmd)
+
+ # Close socket
+ s.shutdown(socket.SHUT_WR)
+
+ # Read the answer
+ answer = s.recv(1000)
+
+ # Parse the answer into a table
+ table = [line.split(';') for line in answer.split('\n')[:-1]]
+
+ if len(table) > 0 and len(table[0]) > 0:
+ return int(table[0][0])
+ else:
+ return statusCodes[STATUS_UNKNOWN]
+
+
+# Method to change the host state to UP based on other service type status
+def check_and_update_host_state_to_up(hostAddr, srvcName):
+ finalState = 0
+ for item in SRVC_LIST:
+ if item != srvcName:
+ finalState = finalState | checkLiveStatus(hostAddr, item)
+
+ if finalState == statusCodes[STATUS_OK]:
+ update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK])
+
+
+# Main method
+if __name__ == "__main__":
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:",
+ ["help", "state=", "type=",
+ "attempts=", "location=", "name="])
+ except getopt.GetoptError as e:
+ print (str(e))
+ showUsage()
+ sys.exit(STATUS_CRITICAL)
+
+ srvcState = ''
+ srvcStateType = ''
+ attempts = ''
+ hostAddr = ''
+ srvcName = ''
+ if len(opts) == 0:
+ showUsage()
+ else:
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ showUsage()
+ sys.exit()
+ elif opt in ('-s', '--state'):
+ srvcState = arg
+ elif opt in ('-t', '--type'):
+ srvcStateType = arg
+ elif opt in ('-a', '--attempts'):
+ attempts = arg
+ elif opt in ('-l', '--location'):
+ hostAddr = arg
+ elif opt in ('-n', '--name'):
+ srvcName = arg
+ else:
+ showUsage()
+ sys.exit()
+
+ # Swicth over the service state values and do the needful
+ if srvcState == STATUS_CRITICAL:
+ if srvcStateType == SRVC_STATE_TYPE_SOFT:
+ if int(attempts) == 3:
+ print "Updating the host status to warning "
+ "(3rd SOFT critical state)..."
+ update_host_state(hostAddr, srvcName,
+ statusCodes[STATUS_WARNING])
+ elif srvcStateType == SRVC_STATE_TYPE_HARD:
+ print "Updating the host status to warning..."
+ update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING])
+ elif srvcState == STATUS_OK:
+ check_and_update_host_state_to_up(hostAddr, srvcName)
+
+ sys.exit(0)
diff --git a/plugins/sadf.py b/plugins/sadf.py
new file mode 100755
index 0000000..0bafb4a
--- /dev/null
+++ b/plugins/sadf.py
@@ -0,0 +1,328 @@
+#!/usr/bin/python
+# sadf.py -- nagios plugin uses sadf output for perf data
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import sys
+import shlex
+import subprocess
+import datetime
+import argparse
+import xml.etree.ElementTree as ET
+from collections import defaultdict
+_twoMinutes = datetime.timedelta(minutes=2)
+_sadfCpuCommand = "sadf -x -- -P ALL"
+_sadfMemoryCommand = "sadf -x -- -r"
+_sadfNetworkCommand = "sadf -x -- -n DEV"
+_sadfSwapSpaceCommand = "sadf -x -- -S"
+
+
+class sadfCmdExecFailedException(Exception):
+ message = "sadf command failed"
+
+ def __init__(self, rc=0, out=(), err=()):
+ self.rc = rc
+ self.out = out
+ self.err = err
+
+ def __str__(self):
+ o = '\n'.join(self.out)
+ e = '\n'.join(self.err)
+ if o and e:
+ m = o + '\n' + e
+ else:
+ m = o or e
+
+ s = self.message
+ if m:
+ s += '\nerror: ' + m
+ if self.rc:
+ s += '\nreturn code: %s' % self.rc
+ return s
+
+
+def execCmd(command):
+ proc = subprocess.Popen(command,
+ close_fds=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ (out, err) = proc.communicate()
+ return (proc.returncode, out, err)
+
+
+def etree_to_dict(t):
+ d = {t.tag: {} if t.attrib else None}
+ children = list(t)
+ if children:
+ dd = defaultdict(list)
+ for dc in map(etree_to_dict, children):
+ for k, v in dc.iteritems():
+ dd[k].append(v)
+ x = {}
+ for k, v in dd.iteritems():
+ x[k] = v[0] if len(v) == 1 else v
+ d = {t.tag: x}
+ if t.attrib:
+ d[t.tag].update((k, v) for k, v in t.attrib.iteritems())
+ if t.text:
+ text = t.text.strip()
+ if children or t.attrib:
+ if text:
+ d[t.tag]['#text'] = text
+ else:
+ d[t.tag] = text
+ return d
+
+
+def _sadfExecCmd(sadfCmd):
+ now = datetime.datetime.now()
+ start = (now - _twoMinutes).strftime("%H:%M:%S")
+ end = now.strftime("%H:%M:%S")
+ cmd = sadfCmd + " -s %s -e %s" % (start, end)
+
+ try:
+ (rc, out, err) = execCmd(shlex.split(cmd))
+ except (OSError, ValueError) as e:
+ raise sadfCmdExecFailedException(err=[str(e)])
+
+ if rc != 0:
+ raise sadfCmdExecFailedException(rc, [out], [err])
+
+ root = ET.fromstring(out)
+ d = etree_to_dict(root)
+ return d['sysstat']['host']['statistics']['timestamp']
+
+
+def _getLatestStat(stats):
+ if not stats:
+ return {}
+ if not isinstance(stats, list):
+ return stats
+ lstat = stats[0]
+ latestTime = datetime.datetime.strptime(lstat['time'],
+ "%H:%M:%S")
+ for s in stats[1:]:
+ thisTime = datetime.datetime.strptime(s['time'],
+ "%H:%M:%S")
+ if latestTime < thisTime:
+ lstat = s
+ latestTime = thisTime
+
+ return lstat
+
+
+def getLatestSadfCpuStat():
+ return _getLatestStat(_sadfExecCmd(_sadfCpuCommand))
+
+
+def getLatestSadfMemStat():
+ return _getLatestStat(_sadfExecCmd(_sadfMemoryCommand))
+
+
+def getLatestSadfNetStat():
+ return _getLatestStat(_sadfExecCmd(_sadfNetworkCommand))
+
+
+def getLatestSadfSwapStat():
+ return _getLatestStat(_sadfExecCmd(_sadfSwapSpaceCommand))
+
+
+def showCpuStat(warnLevel, critLevel):
+ s = getLatestSadfCpuStat()
+ if not s:
+ sys.stdout.write("CPU UNKNOWN\n")
+ sys.exit(3)
+ perfLines = []
+ idleCpu = 0
+ for cpu in s['cpu-load']['cpu']:
+ if cpu['number'] == 'all':
+ idleCpu = cpu['idle']
+ perfLines.append(
+ ("cpu_%s_total=%s%%;%s;%s cpu_%s_system=%s%% "
+ "cpu_%s_user=%s%% cpu_%s_idle=%s%%" % (
+ cpu['number'], 100-float(cpu['idle']),
+ warnLevel, critLevel,
+ cpu['number'], cpu['system'],
+ cpu['number'], cpu['user'],
+ cpu['number'], cpu['idle'])))
+ if len(s['cpu-load']['cpu'])-1 == 1:
+ break
+ totalCpuUsage = 100 - float(idleCpu)
+ if totalCpuUsage > critLevel:
+ sys.stdout.write(
+ ("CPU Status CRITICAL: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+ elif totalCpuUsage > warnLevel:
+ sys.stdout.write(
+ ("CPU Status WARNING: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+ else:
+ sys.stdout.write(
+ ("CPU Status OK: Total CPU:%s%% Idle CPU:%s%% "
+ "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu,
+ len(s['cpu-load']['cpu'])-1,
+ " ".join(perfLines))))
+
+ sys.exit(0)
+
+
+def showSwapStat(warning, critical):
+ s = getLatestSadfSwapStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+ totalSwap = int(s['memory']['swpfree']) + int(s['memory']['swpused'])
+ crit_value = (totalSwap * critical) / 100
+ war_value = (totalSwap * warning) / 100
+ if int(s['memory']['swpused']) >= crit_value:
+ sys.stdout.write("CRITICAL")
+ eStat = 2
+ elif int(s['memory']['swpused']) >= war_value:
+ sys.stdout.write("WARNING")
+ eStat = 1
+ else:
+ sys.stdout.write("OK")
+ eStat = 0
+ sys.stdout.write("- %.2f%% used(%skB out of %skB)|Used=%skB;%s;"
+ "%s;0;%s\n" % (float(s['memory']['swpused-percent']),
+ s['memory']['swpused'],
+ totalSwap,
+ s['memory']['swpused'],
+ war_value,
+ crit_value,
+ totalSwap))
+ sys.exit(eStat)
+
+
+def showMemStat(warning, critical):
+ s = getLatestSadfMemStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+ totalMem = int(s['memory']['memfree']) + int(s['memory']['memused'])
+ crit_value = (totalMem * critical) / 100
+ war_value = (totalMem * warning) / 100
+ if int(s['memory']['memused']) >= crit_value:
+ sys.stdout.write("CRITICAL")
+ eStat = 2
+ elif int(s['memory']['memused']) >= war_value:
+ sys.stdout.write("WARNING")
+ eStat = 1
+ else:
+ sys.stdout.write("OK")
+ eStat = 0
+ sys.stdout.write("- %.2f%% used(%skB out of %skB)|Total=%skB;%s;%s;0;%s"
+ " Used=%skB Buffered=%skB"
+ " Cached=%skB\n" % (float(s['memory']['memused-percent']),
+ s['memory']['memused'],
+ totalMem,
+ totalMem,
+ war_value,
+ crit_value,
+ totalMem,
+ s['memory']['memused'],
+ s['memory']['buffers'],
+ s['memory']['cached']))
+ sys.exit(eStat)
+
+
+def showNetStat(iface_list=None, list_type=None):
+ s = getLatestSadfNetStat()
+ if not s:
+ sys.stdout.write("IFACE UNKNOWN\n")
+ sys.exit(3)
+
+ devNames = []
+ perfLines = []
+ for dev in s['network']['net-dev']:
+ if list_type == "exclude":
+ if dev['iface'] in iface_list:
+ continue
+ elif list_type == "include":
+ if dev['iface'] not in iface_list:
+ continue
+ devNames.append(dev['iface'])
+ perfLines.append("%s.rxpck=%s %s.txpck=%s %s.rxkB=%s %s.txkB=%s"
+ % (dev['iface'], dev['rxpck'],
+ dev['iface'], dev['txpck'],
+ dev['iface'], dev['rxkB'],
+ dev['iface'], dev['txkB']))
+
+ sys.stdout.write("IFACE OK: %s |%s\n" % (", ".join(devNames),
+ " ".join(perfLines)))
+ sys.exit(0)
+
+
+def parse_input():
+ parser = argparse.ArgumentParser(usage='%(prog)s [-h] (\
+\n-m -w <warning> -c <critical> |\n-s -w <warning> -c <critical>\
+ |\n-cp -w <warning> -c <critical> |\n-n [-e <exclude>\
+ | -i <include>])')
+ group1 = parser.add_mutually_exclusive_group(required=True)
+ group1.add_argument('-m', '--memory', action='store_true',
+ help="Gives details related to memory")
+ group1.add_argument('-s', '--swap', action='store_true',
+ help="Gives details related to swap")
+ group1.add_argument('-cp', '--cpu', action='store_true',
+ help="Gives details related to cpu")
+ group1.add_argument('-n', '--network', action='store_true',
+ help="Gives details related to network")
+ parser.add_argument("-w", "--warning", action="store", type=int,
+ help="Warning threshold in percentage")
+ parser.add_argument("-c", "--critical", action="store", type=int,
+ help="Critical threshold in percentage")
+ group2 = parser.add_mutually_exclusive_group()
+ group2.add_argument("-e", "--exclude", action="append",
+ help="Parameters to be excluded")
+ group2.add_argument("-i", "--include", action="append",
+ help="Parameters to be included")
+ args = parser.parse_args()
+ if args.memory or args.swap or args.cpu:
+ if not args.critical or not args.warning:
+ print "UNKNOWN:Missing critical/warning threshold value."
+ sys.exit(3)
+ if args.exclude or args.include:
+ print "UNKNOWN:Exclude/Include is not valid for the given option."
+ sys.exit(3)
+ if args.critical <= args.warning:
+ print "UNKNOWN:Critical must be greater than Warning."
+ sys.exit(3)
+ else:
+ if args.critical or args.warning:
+ print "UNKNOWN:Warning/Critical is not valid for the given option."
+ sys.exit(3)
+ return args
+
+
+if __name__ == '__main__':
+ args = parse_input()
+ if args.memory:
+ showMemStat(args.warning, args.critical)
+ if args.swap:
+ showSwapStat(args.warning, args.critical)
+ if args.cpu:
+ showCpuStat(args.warning, args.critical)
+ if args.network:
+ if args.exclude:
+ showNetStat(args.exclude, "exclude")
+ if args.include:
+ showNetStat(args.include, "include")
+ showNetStat()