From b9c4e63f638254f7ba6cc960930cab1e15b4cd5d Mon Sep 17 00:00:00 2001 From: "Bala.FA" Date: Thu, 13 Mar 2014 07:39:41 +0530 Subject: plugins: import plugins from github All plugins are imported from https://github.com/gluster-rhsc/nagios-plugin.git Change-Id: I9880becafccc79280873039e317b4afde747806c Signed-off-by: Bala.FA Reviewed-on: https://cuckoo.blr.redhat.com:8443/4 --- plugins/Makefile.am | 8 + plugins/__init__.py | 0 plugins/check_disk_and_inode.py | 195 +++++++++++++++++++ plugins/check_remote_host.py | 199 +++++++++++++++++++ plugins/gluster_host_service_handler.py | 154 +++++++++++++++ plugins/sadf.py | 328 ++++++++++++++++++++++++++++++++ 6 files changed, 884 insertions(+) create mode 100644 plugins/__init__.py create mode 100755 plugins/check_disk_and_inode.py create mode 100755 plugins/check_remote_host.py create mode 100755 plugins/gluster_host_service_handler.py create mode 100755 plugins/sadf.py (limited to 'plugins') diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c12520c..12ebab7 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -1,2 +1,10 @@ dist_glusternagiosplugins_PYTHON = \ + check_disk_and_inode.py \ + check_remote_host.py \ + gluster_host_service_handler.py \ + sadf.py \ + $(NULL) + +EXTRA_DIST = \ + __init__.py \ $(NULL) diff --git a/plugins/__init__.py b/plugins/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plugins/check_disk_and_inode.py b/plugins/check_disk_and_inode.py new file mode 100755 index 0000000..052df3a --- /dev/null +++ b/plugins/check_disk_and_inode.py @@ -0,0 +1,195 @@ +#!/usr/bin/python +# sadf.py -- nagios plugin uses sadf output for perf data +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + + +import re +import sys +import commands +from optparse import OptionParser + + +def getUsageAndFree(command, lvm): + status = commands.getstatusoutput(command)[1].split() + path = status[-1] + usagePer = status[-2] + availSpace = status[-3] + usedSpace = status[-4] + device = status[-6].split("-")[-1] + dmatch = re.compile('[0-9]+').match(usagePer) + if (dmatch): + usage = eval(dmatch.group(0)) + return (float(usage), float(100 - usage), usedSpace, + availSpace, device, path) + else: + return None, None, None, None, None, None + + +def getDisk(path, readable=False, lvm=False): + if readable: + return getUsageAndFree("df -m %s" % path, lvm) + else: + return getUsageAndFree("df -kh %s" % path, lvm) + + +def getInode(path, readable=False, lvm=False): + return getUsageAndFree("df -i %s" % path, lvm) + + +def appendStatus(lst, level, typ, device, mpath, usage): + if 2 == level: + level = "crit" + elif 1 == level: + level = "warn" + else: + level = "ok" + lst.append("%s:%s:%s;%s;%s" % (level, device, mpath, usage)) + + +def getMounts(searchQuery=None, excludeList=[]): + mountPaths = [] + f = open("/etc/mtab") + for i in f.readlines(): + if searchQuery and i.startswith(searchQuery): + if not excludeList: + mountPaths.append(i.split()[0]) + else: + device = i.split() + if not device[0] in options.exclude and\ + not device[1] in options.exclude: + mountPaths.append(device[0]) + f.close() + return mountPaths + + +def parse_input(): + parser = OptionParser() + parser.add_option('-w', '--warning', action='store', type='int', + dest='warn', help='Warning count in %', default=80) + parser.add_option('-c', '--critical', action='store', type='int', + dest='crit', help='Critical count in %', default=90) + parser.add_option('-u', '--usage', action="store_true", dest='usage', + help='Output disk and inode usage', default=False) + parser.add_option('-l', '--lvm', action="store_true", + dest='lvm', help='List lvm mounts', default=False) + parser.add_option('-a', '--all', action="store_true", + dest='all', help='List all mounts', default=False) + parser.add_option('-n', '--ignore', action="store_true", + dest='ignore', help='Ignore errors', default=False) + parser.add_option('-i', '--include', action='append', type='string', + dest='mountPath', help='Mount path', default=[]) + parser.add_option('-x', '--exclude', action="append", type='string', + dest='exclude', help='Exclude disk') + return parser.parse_args() + + +if __name__ == '__main__': + disk = [] + warnList = [] + critList = [] + diskList = [] + mounts = [] + level = -1 + (options, args) = parse_input() + + if len(args) > 2: + if args[0].isdigit() and args[1].isdigit(): + warn = int(args[0]) + crit = int(args[1]) + options.mountPath = args[2:] + else: + warn = 80 + crit = 90 + options.mountPath = args + else: + crit = options.crit + warn = options.warn + + if options.lvm: + searchQuery = "/dev/mapper" + elif options.all: + searchQuery = None + else: + searchQuery = "/" + + if not options.mountPath or options.lvm or options.all: + options.mountPath += getMounts(searchQuery, options.exclude) + + #if not options.mountPath: + # parser.print_help() + # sys.exit(1) + + for path in options.mountPath: + diskUsage, diskFree, used, avail, dev, mpath = getDisk(path, + options.usage, + options.lvm) + inodeUsage, inodeFree, iused, iavail, idev, ipath = getInode( + path, + options.usage, + options.lvm) + if mpath in mounts: + continue + if not used or not iused: + if options.ignore: + continue + else: + sys.exit(3) + + mounts.append(mpath) + if options.usage: + total = (float(used) + float(avail)) / 1000 + itot = (float(iused) + float(iavail)) / 1000 + disk.append("%s=%.1f;%.1f;%.1f;0;%.1f %s=%.1f;%.1f;%.1f;0;%.1f" % ( + mpath, float(used)/1000, warn*total/100, crit*total/100, total, + ipath, float(iused)/1000, warn*itot/100, crit*itot/100, itot)) + else: + disk.append("%s=%.2f;%s;%s;0;100 %s=%.2f;%s;%s;0;100" % ( + mpath, diskUsage, warn, crit, ipath, inodeUsage, warn, crit)) + + if diskUsage >= crit or inodeUsage >= crit: + if diskUsage >= crit: + critList.append("crit:disk:%s;%s;%s" % (dev, mpath, diskUsage)) + else: + critList.append("crit:inode:%s;%s;%s" % (idev, ipath, + inodeUsage)) + if not level > 1: + level = 2 + elif (diskUsage >= warn and diskUsage < crit) or ( + inodeUsage >= warn and inodeUsage < crit): + if diskUsage >= warn: + warnList.append("warn:disk:%s;%s;%s" % (dev, mpath, diskUsage)) + else: + warnList.append("warn:inode:%s;%s;%s" % (idev, ipath, + inodeUsage)) + if not level > 0: + level = 1 + else: + diskList.append("%s:%s" % (dev, mpath)) + + msg = " ".join(critList + warnList) + if not msg: + msg += " disks:mounts:(" + ",".join(diskList) + ")" + + if 2 == level: + print "CRITICAL : %s | %s" % (msg, " ".join(disk)) + sys.exit(2) + elif 1 == level: + print "WARNING : %s | %s" % (msg, " ".join(disk)) + sys.exit(1) + else: + print "OK : %s | %s" % (msg, " ".join(disk)) diff --git a/plugins/check_remote_host.py b/plugins/check_remote_host.py new file mode 100755 index 0000000..7350e27 --- /dev/null +++ b/plugins/check_remote_host.py @@ -0,0 +1,199 @@ +#!/usr/bin/python +# +# check_remote_host.py -- nagios plugin uses Mklivestatus to get the overall +# status +# of a host. The entities considered for the status of the host are - +# 1. Host is reachable +# 2. LV/Inode Service status +# 3. CPU Utilization +# 4. Memory Utilization +# 5. Network Utilization +# 6. Swap Utilization +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import shlex +import subprocess +import socket +import getopt + +STATUS_OK = 0 +STATUS_WARNING = 1 +STATUS_CRITICAL = 2 +STATUS_UNKNOWN = 3 +_checkPingCommand = "/usr/lib64/nagios/plugins/check_ping" +_commandStatusStrs = {STATUS_OK: 'OK', STATUS_WARNING: 'WARNING', + STATUS_CRITICAL: 'CRITICAL', STATUS_UNKNOWN: 'UNKNOWN'} +_socketPath = '/var/spool/nagios/cmd/live' + + +# Class for exception definition +class checkPingCmdExecFailedException(Exception): + message = "check_ping command failed" + + def __init__(self, rc=0, out=(), err=()): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + o = '\n'.join(self.out) + e = '\n'.join(self.err) + if o and e: + m = o + '\n' + e + else: + m = o or e + + s = self.message + if m: + s += '\nerror: ' + m + if self.rc: + s += '\nreturn code: %s' % self.rc + return s + + +# Method to execute a command +def execCmd(command): + proc = subprocess.Popen(command, + close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = proc.communicate() + return (proc.returncode, out, err) + + +# Method to check the ing status of the host +def getPingStatus(hostAddr): + cmd = "%s -H %s" % (_checkPingCommand, hostAddr) + cmd += " -w 3000.0,80% -c 5000.0,100%" + + try: + (rc, out, err) = execCmd(shlex.split(cmd)) + except (OSError, ValueError) as e: + raise checkPingCmdExecFailedException(err=[str(e)]) + + if rc != 0: + raise checkPingCmdExecFailedException(rc, [out], [err]) + + return rc + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return STATUS_UNKNOWN + + +# Method to show the usage +def showUsage(): + usage = "Usage: %s -H \n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hH:", ["help", "host="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + hostAddr = '' + if len(opts) == 0: + showUsage() + sys.exit(STATUS_CRITICAL) + else: + for opt, arg in opts: + if opt in ("-h", "--help"): + showUsage() + sys.exit() + elif opt in ("-H", "--host"): + hostAddr = arg + else: + showUsage() + sys.exit(STATUS_CRITICAL) + + # Check ping status of the node, if its not reachable exit + try: + pingStatus = getPingStatus(hostAddr) + except (checkPingCmdExecFailedException) as e: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(_commandStatusStrs[STATUS_UNKNOWN]) + + if pingStatus != STATUS_OK: + print "Host Status %s - Host not reachable" % \ + (_commandStatusStrs[STATUS_UNKNOWN]) + sys.exit(pingStatus) + + # Check the various performance statuses for the host + diskPerfStatus = checkLiveStatus(hostAddr, 'Disk Utilization') + cpuPerfStatus = checkLiveStatus(hostAddr, 'Cpu Utilization') + memPerfStatus = checkLiveStatus(hostAddr, 'Memory Utilization') + swapPerfStatus = checkLiveStatus(hostAddr, 'Swap Utilization') + nwPerfStatus = checkLiveStatus(hostAddr, 'Network Utilization') + + # Calculate the consolidated status for the host based on above status + # details + finalStatus = pingStatus | diskPerfStatus | cpuPerfStatus | \ + memPerfStatus | swapPerfStatus | nwPerfStatus + + # Get the list of ciritical services + criticalSrvcs = [] + if diskPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Disk Utilization') + if cpuPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Cpu Utilization') + if memPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Memory Utilization') + if swapPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Swap Utilization') + if nwPerfStatus == STATUS_CRITICAL: + criticalSrvcs.append('Network Utilization') + + # Return the status + if finalStatus == STATUS_CRITICAL: + print "Host Status %s - Service(s) %s in CRITICAL state" % \ + (_commandStatusStrs[STATUS_WARNING], criticalSrvcs) + sys.exit(STATUS_WARNING) + + print "Host Status %s - Services in good health" % \ + _commandStatusStrs[STATUS_OK] + sys.exit(STATUS_OK) diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py new file mode 100755 index 0000000..283ac69 --- /dev/null +++ b/plugins/gluster_host_service_handler.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# +# gluster_host_service_handler.py -- Event handler which checks the +# status of defined services and accordingly changes the host status +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import datetime +import socket +import getopt + +STATUS_OK = "OK" +STATUS_WARNING = "WARNING" +STATUS_CRITICAL = "CRITICAL" +STATUS_UNKNOWN = "UNKNOWN" +SRVC_STATE_TYPE_SOFT = "SOFT" +SRVC_STATE_TYPE_HARD = "HARD" +statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2, + STATUS_UNKNOWN: 3} +NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd" +SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization', + 'Swap Utilization', 'Network Utilization'] +_socketPath = '/var/spool/nagios/cmd/live' + + +# Shows the usage of the script +def showUsage(): + usage = "Usage: %s -s " + "-t -a " + "-l -n \n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Method to change the host status +def update_host_state(hostAddr, srvcName, statusCode): + now = datetime.datetime.now() + if statusCode == statusCodes[STATUS_WARNING]: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - " + "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode, + srvcName) + else: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " + "Services in good health\n" % (now, hostAddr, statusCode) + + f = open(NAGIOS_COMMAND_FILE, "w") + f.write(cmdStr) + f.close() + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return statusCodes[STATUS_UNKNOWN] + + +# Method to change the host state to UP based on other service type status +def check_and_update_host_state_to_up(hostAddr, srvcName): + finalState = 0 + for item in SRVC_LIST: + if item != srvcName: + finalState = finalState | checkLiveStatus(hostAddr, item) + + if finalState == statusCodes[STATUS_OK]: + update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK]) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:", + ["help", "state=", "type=", + "attempts=", "location=", "name="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + srvcState = '' + srvcStateType = '' + attempts = '' + hostAddr = '' + srvcName = '' + if len(opts) == 0: + showUsage() + else: + for opt, arg in opts: + if opt in ('-h', '--help'): + showUsage() + sys.exit() + elif opt in ('-s', '--state'): + srvcState = arg + elif opt in ('-t', '--type'): + srvcStateType = arg + elif opt in ('-a', '--attempts'): + attempts = arg + elif opt in ('-l', '--location'): + hostAddr = arg + elif opt in ('-n', '--name'): + srvcName = arg + else: + showUsage() + sys.exit() + + # Swicth over the service state values and do the needful + if srvcState == STATUS_CRITICAL: + if srvcStateType == SRVC_STATE_TYPE_SOFT: + if int(attempts) == 3: + print "Updating the host status to warning " + "(3rd SOFT critical state)..." + update_host_state(hostAddr, srvcName, + statusCodes[STATUS_WARNING]) + elif srvcStateType == SRVC_STATE_TYPE_HARD: + print "Updating the host status to warning..." + update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING]) + elif srvcState == STATUS_OK: + check_and_update_host_state_to_up(hostAddr, srvcName) + + sys.exit(0) diff --git a/plugins/sadf.py b/plugins/sadf.py new file mode 100755 index 0000000..0bafb4a --- /dev/null +++ b/plugins/sadf.py @@ -0,0 +1,328 @@ +#!/usr/bin/python +# sadf.py -- nagios plugin uses sadf output for perf data +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +import sys +import shlex +import subprocess +import datetime +import argparse +import xml.etree.ElementTree as ET +from collections import defaultdict +_twoMinutes = datetime.timedelta(minutes=2) +_sadfCpuCommand = "sadf -x -- -P ALL" +_sadfMemoryCommand = "sadf -x -- -r" +_sadfNetworkCommand = "sadf -x -- -n DEV" +_sadfSwapSpaceCommand = "sadf -x -- -S" + + +class sadfCmdExecFailedException(Exception): + message = "sadf command failed" + + def __init__(self, rc=0, out=(), err=()): + self.rc = rc + self.out = out + self.err = err + + def __str__(self): + o = '\n'.join(self.out) + e = '\n'.join(self.err) + if o and e: + m = o + '\n' + e + else: + m = o or e + + s = self.message + if m: + s += '\nerror: ' + m + if self.rc: + s += '\nreturn code: %s' % self.rc + return s + + +def execCmd(command): + proc = subprocess.Popen(command, + close_fds=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (out, err) = proc.communicate() + return (proc.returncode, out, err) + + +def etree_to_dict(t): + d = {t.tag: {} if t.attrib else None} + children = list(t) + if children: + dd = defaultdict(list) + for dc in map(etree_to_dict, children): + for k, v in dc.iteritems(): + dd[k].append(v) + x = {} + for k, v in dd.iteritems(): + x[k] = v[0] if len(v) == 1 else v + d = {t.tag: x} + if t.attrib: + d[t.tag].update((k, v) for k, v in t.attrib.iteritems()) + if t.text: + text = t.text.strip() + if children or t.attrib: + if text: + d[t.tag]['#text'] = text + else: + d[t.tag] = text + return d + + +def _sadfExecCmd(sadfCmd): + now = datetime.datetime.now() + start = (now - _twoMinutes).strftime("%H:%M:%S") + end = now.strftime("%H:%M:%S") + cmd = sadfCmd + " -s %s -e %s" % (start, end) + + try: + (rc, out, err) = execCmd(shlex.split(cmd)) + except (OSError, ValueError) as e: + raise sadfCmdExecFailedException(err=[str(e)]) + + if rc != 0: + raise sadfCmdExecFailedException(rc, [out], [err]) + + root = ET.fromstring(out) + d = etree_to_dict(root) + return d['sysstat']['host']['statistics']['timestamp'] + + +def _getLatestStat(stats): + if not stats: + return {} + if not isinstance(stats, list): + return stats + lstat = stats[0] + latestTime = datetime.datetime.strptime(lstat['time'], + "%H:%M:%S") + for s in stats[1:]: + thisTime = datetime.datetime.strptime(s['time'], + "%H:%M:%S") + if latestTime < thisTime: + lstat = s + latestTime = thisTime + + return lstat + + +def getLatestSadfCpuStat(): + return _getLatestStat(_sadfExecCmd(_sadfCpuCommand)) + + +def getLatestSadfMemStat(): + return _getLatestStat(_sadfExecCmd(_sadfMemoryCommand)) + + +def getLatestSadfNetStat(): + return _getLatestStat(_sadfExecCmd(_sadfNetworkCommand)) + + +def getLatestSadfSwapStat(): + return _getLatestStat(_sadfExecCmd(_sadfSwapSpaceCommand)) + + +def showCpuStat(warnLevel, critLevel): + s = getLatestSadfCpuStat() + if not s: + sys.stdout.write("CPU UNKNOWN\n") + sys.exit(3) + perfLines = [] + idleCpu = 0 + for cpu in s['cpu-load']['cpu']: + if cpu['number'] == 'all': + idleCpu = cpu['idle'] + perfLines.append( + ("cpu_%s_total=%s%%;%s;%s cpu_%s_system=%s%% " + "cpu_%s_user=%s%% cpu_%s_idle=%s%%" % ( + cpu['number'], 100-float(cpu['idle']), + warnLevel, critLevel, + cpu['number'], cpu['system'], + cpu['number'], cpu['user'], + cpu['number'], cpu['idle']))) + if len(s['cpu-load']['cpu'])-1 == 1: + break + totalCpuUsage = 100 - float(idleCpu) + if totalCpuUsage > critLevel: + sys.stdout.write( + ("CPU Status CRITICAL: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + elif totalCpuUsage > warnLevel: + sys.stdout.write( + ("CPU Status WARNING: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + else: + sys.stdout.write( + ("CPU Status OK: Total CPU:%s%% Idle CPU:%s%% " + "| num_of_cpu=%s %s\n" % (totalCpuUsage, idleCpu, + len(s['cpu-load']['cpu'])-1, + " ".join(perfLines)))) + + sys.exit(0) + + +def showSwapStat(warning, critical): + s = getLatestSadfSwapStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + totalSwap = int(s['memory']['swpfree']) + int(s['memory']['swpused']) + crit_value = (totalSwap * critical) / 100 + war_value = (totalSwap * warning) / 100 + if int(s['memory']['swpused']) >= crit_value: + sys.stdout.write("CRITICAL") + eStat = 2 + elif int(s['memory']['swpused']) >= war_value: + sys.stdout.write("WARNING") + eStat = 1 + else: + sys.stdout.write("OK") + eStat = 0 + sys.stdout.write("- %.2f%% used(%skB out of %skB)|Used=%skB;%s;" + "%s;0;%s\n" % (float(s['memory']['swpused-percent']), + s['memory']['swpused'], + totalSwap, + s['memory']['swpused'], + war_value, + crit_value, + totalSwap)) + sys.exit(eStat) + + +def showMemStat(warning, critical): + s = getLatestSadfMemStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + totalMem = int(s['memory']['memfree']) + int(s['memory']['memused']) + crit_value = (totalMem * critical) / 100 + war_value = (totalMem * warning) / 100 + if int(s['memory']['memused']) >= crit_value: + sys.stdout.write("CRITICAL") + eStat = 2 + elif int(s['memory']['memused']) >= war_value: + sys.stdout.write("WARNING") + eStat = 1 + else: + sys.stdout.write("OK") + eStat = 0 + sys.stdout.write("- %.2f%% used(%skB out of %skB)|Total=%skB;%s;%s;0;%s" + " Used=%skB Buffered=%skB" + " Cached=%skB\n" % (float(s['memory']['memused-percent']), + s['memory']['memused'], + totalMem, + totalMem, + war_value, + crit_value, + totalMem, + s['memory']['memused'], + s['memory']['buffers'], + s['memory']['cached'])) + sys.exit(eStat) + + +def showNetStat(iface_list=None, list_type=None): + s = getLatestSadfNetStat() + if not s: + sys.stdout.write("IFACE UNKNOWN\n") + sys.exit(3) + + devNames = [] + perfLines = [] + for dev in s['network']['net-dev']: + if list_type == "exclude": + if dev['iface'] in iface_list: + continue + elif list_type == "include": + if dev['iface'] not in iface_list: + continue + devNames.append(dev['iface']) + perfLines.append("%s.rxpck=%s %s.txpck=%s %s.rxkB=%s %s.txkB=%s" + % (dev['iface'], dev['rxpck'], + dev['iface'], dev['txpck'], + dev['iface'], dev['rxkB'], + dev['iface'], dev['txkB'])) + + sys.stdout.write("IFACE OK: %s |%s\n" % (", ".join(devNames), + " ".join(perfLines))) + sys.exit(0) + + +def parse_input(): + parser = argparse.ArgumentParser(usage='%(prog)s [-h] (\ +\n-m -w -c |\n-s -w -c \ + |\n-cp -w -c |\n-n [-e \ + | -i ])') + group1 = parser.add_mutually_exclusive_group(required=True) + group1.add_argument('-m', '--memory', action='store_true', + help="Gives details related to memory") + group1.add_argument('-s', '--swap', action='store_true', + help="Gives details related to swap") + group1.add_argument('-cp', '--cpu', action='store_true', + help="Gives details related to cpu") + group1.add_argument('-n', '--network', action='store_true', + help="Gives details related to network") + parser.add_argument("-w", "--warning", action="store", type=int, + help="Warning threshold in percentage") + parser.add_argument("-c", "--critical", action="store", type=int, + help="Critical threshold in percentage") + group2 = parser.add_mutually_exclusive_group() + group2.add_argument("-e", "--exclude", action="append", + help="Parameters to be excluded") + group2.add_argument("-i", "--include", action="append", + help="Parameters to be included") + args = parser.parse_args() + if args.memory or args.swap or args.cpu: + if not args.critical or not args.warning: + print "UNKNOWN:Missing critical/warning threshold value." + sys.exit(3) + if args.exclude or args.include: + print "UNKNOWN:Exclude/Include is not valid for the given option." + sys.exit(3) + if args.critical <= args.warning: + print "UNKNOWN:Critical must be greater than Warning." + sys.exit(3) + else: + if args.critical or args.warning: + print "UNKNOWN:Warning/Critical is not valid for the given option." + sys.exit(3) + return args + + +if __name__ == '__main__': + args = parse_input() + if args.memory: + showMemStat(args.warning, args.critical) + if args.swap: + showSwapStat(args.warning, args.critical) + if args.cpu: + showCpuStat(args.warning, args.critical) + if args.network: + if args.exclude: + showNetStat(args.exclude, "exclude") + if args.include: + showNetStat(args.include, "include") + showNetStat() -- cgit