summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimothy Asir <tjeyasin@redhat.com>2014-05-30 17:05:14 +0530
committerSahina Bose <sabose@redhat.com>2014-06-10 02:52:20 -0700
commit8e977e1fd0a0bed52049344765ab4581d7f3c761 (patch)
tree4580b6e64385800bc511cf2fd7a76537c2dd09da
parentab5dd8ea647fc1aa80f7ba6b43520979eb1827cc (diff)
Add status information to show disk usage
This will show the status like: for critical usage: CRITICAL: 4% used (4GB out of 100GB):mounts: (CRITICAL : <critical list> followed by WARNING if any followed by OK for disk status "WARNING": WARNING: 4% used (4GB out of 100GB):mounts: (WARNING if any followed by OK for disk status "OK": OK: 4% used (4GB out of 100GB):mounts:(<mounts>) Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1081495 Change-Id: I9dbda7a5d6ea992ba73acce2174e6d66f2e16066 Signed-off-by: Timothy Asir <tjeyasin@redhat.com> Reviewed-on: http://review.gluster.org/7936 Tested-by: Timothy Asir <tim.gluster@gmail.com> Reviewed-by: Sahina Bose <sabose@redhat.com>
-rw-r--r--config/nrpe.in1
-rw-r--r--gluster-nagios-addons.spec.in4
-rwxr-xr-xplugins/check_disk_and_inode.py250
-rw-r--r--tests/test_disk.py92
4 files changed, 222 insertions, 125 deletions
diff --git a/config/nrpe.in b/config/nrpe.in
index b0fa08c..8b4f6e9 100644
--- a/config/nrpe.in
+++ b/config/nrpe.in
@@ -1,4 +1,5 @@
Cmnd_Alias NRPE_PATHS = @sbindir@/send_nsca, \
+ @libdir@/nagios/plugins/gluster/check_disk_and_inode.py, \
@libdir@/nagios/plugins/gluster/check_vol_utilization.py, \
@libdir@/nagios/plugins/gluster/check_volume_status.py, \
@libdir@/nagios/plugins/gluster/check_gluster_proc_status.py, \
diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in
index 497f085..0ae3bed 100644
--- a/gluster-nagios-addons.spec.in
+++ b/gluster-nagios-addons.spec.in
@@ -141,12 +141,12 @@ fi
cat >> %{_sysconfdir}/nagios/nrpe.cfg <<EOF
%{_start_conf_section}
-command[check_disk_and_inode]=%{_libdir}/nagios/plugins/gluster/check_disk_and_inode.py -w 80 -c 90 -l -i /boot -i /var -i /root -n --inode
+command[check_disk_and_inode]=sudo %{_libdir}/nagios/plugins/gluster/check_disk_and_inode.py -w 80 -c 90 -l -i /boot -i /var -i /root -n --inode
command[check_memory]=%{_libdir}/nagios/plugins/gluster/memory.py -w 80 -c 90 -t 2
command[check_swap_usage]=%{_libdir}/nagios/plugins/gluster/swap.py -w 80 -c 90 -t 2
command[check_cpu_multicore]=%{_libdir}/nagios/plugins/gluster/cpu.py -w 80 -c 90 -t 2
command[check_interfaces]=%{_libdir}/nagios/plugins/gluster/network.py -e lo -e ';vdsmdummy;' -t 2
-command[check_brick_usage]=%{_libdir}/nagios/plugins/gluster/check_disk_and_inode.py -w 80 -c 90 -n -i \$ARG1\$
+command[check_brick_usage]=sudo %{_libdir}/nagios/plugins/gluster/check_disk_and_inode.py -w 80 -c 90 -n -i \$ARG1\$
command[check_vol_utilization]=sudo %{_libdir}/nagios/plugins/gluster/check_vol_utilization.py \$ARG1\$ -w \$ARG2\$ -c \$ARG3\$
command[check_vol_status]=sudo %{_libdir}/nagios/plugins/gluster/check_volume_status.py -v \$ARG1\$ -t \$ARG2\$
command[check_proc_status]=sudo %{_libdir}/nagios/plugins/gluster/check_gluster_proc_status.py -t \$ARG1\$
diff --git a/plugins/check_disk_and_inode.py b/plugins/check_disk_and_inode.py
index d7ee148..30e8035 100755
--- a/plugins/check_disk_and_inode.py
+++ b/plugins/check_disk_and_inode.py
@@ -1,5 +1,4 @@
#!/usr/bin/python
-# sadf.py -- nagios plugin uses sadf output for perf data
# Copyright (C) 2014 Red Hat Inc
#
# This program is free software; you can redistribute it and/or
@@ -18,6 +17,7 @@
#
+import os
import re
import sys
import commands
@@ -26,6 +26,7 @@ from glusternagios import utils
WARNING_LEVEL = 80
CRITICAL_LEVEL = 90
+INVALID_STATUS_CODE = -1
def getVal(val):
@@ -36,18 +37,40 @@ def getVal(val):
return 0
-def getUsageAndFree(command, lvm):
- disk = {'path': None, 'usePercent': None, 'avail': None,
- 'used': None, 'size': None, 'fs': None, 'status': None,
- 'retCode': 0}
+def getUsageAndFree(command, path, crit, warn, lvm):
+ disk = {'path': None, 'usePcent': 0, 'avail': 0,
+ 'used': 0, 'size': 0, 'fs': None,
+ 'status': None, 'msg': None, 'availPcent': 0,
+ 'statusCode': utils.PluginStatusCode.UNKNOWN}
+
+ # Check if device exists and permissions are ok
+ if not os.access(path, os.F_OK):
+ disk['status'] = "Device not found!"
+ disk['msg'] = 'no device'
+ disk['fs'] = path
+ disk['statusCode'] = utils.PluginStatusCode.CRITICAL
+ return disk
+
+ if not os.access(path, os.R_OK):
+ disk['status'] = "Unable to access the device"
+ disk['msg'] = 'no access'
+ disk['fs'] = path
+ disk['statusCode'] = utils.PluginStatusCode.CRITICAL
+ return disk
+
status = commands.getstatusoutput(command)
+ # Sample output
+ # (0, 'Filesystem 1G-blocks Used Available Use% Mounted on\n/dev/sda1
+ # 290G 196G 79G 72% /')
if status[0] != 0:
- disk['retCode'] = status[0]
+ disk['msg'] = 'error:%s' % status[0]
if status[0] == 256:
- disk['status'] = "Brick path not found!"
+ disk['status'] = "Brick/Device path not found!"
else:
disk['status'] = status[1]
+ disk['statusCode'] = utils.PluginStatusCode.CRITICAL
return disk
+
status = status[1].split()
disk['path'] = status[-1]
disk['avail'] = getVal(status[-3])
@@ -55,19 +78,29 @@ def getUsageAndFree(command, lvm):
disk['size'] = getVal(status[-5])
disk['fs'] = status[-6]
disk['usePcent'] = getVal(status[-2])
+ if disk['usePcent'] >= crit:
+ disk['statusCode'] = utils.PluginStatusCode.CRITICAL
+ elif disk['usePcent'] >= warn:
+ disk['statusCode'] = utils.PluginStatusCode.WARNING
+ elif disk['usePcent'] < warn:
+ disk['statusCode'] = utils.PluginStatusCode.OK
disk['availPcent'] = 100 - disk['usePcent']
+
return disk
-def getDisk(path, usage=None, lvm=False):
+def getDisk(path, crit, warn, usage=None, lvm=False):
if usage:
- return getUsageAndFree("df -B%s %s" % (usage, path), lvm)
+ return getUsageAndFree("df -B%s %s" % (usage, path),
+ path, crit, warn, lvm)
else:
- return getUsageAndFree("df -BG %s" % path, lvm)
+ return getUsageAndFree("df -BG %s" % path,
+ path, crit, warn, lvm)
-def getInode(path, lvm=False):
- return getUsageAndFree("df -i %s" % path, lvm)
+def getInode(path, crit, warn, lvm=False):
+ return getUsageAndFree("df -i %s" % path,
+ path, crit, warn, lvm)
def getMounts(searchQuery, excludeList=[]):
@@ -117,40 +150,60 @@ def parse_input():
return parser.parse_args()
+def _getMsg(okList, warnList, critList):
+ msg = ", ".join(critList)
+ if critList and (warnList or okList):
+ msg = "CRITICAL: " + msg
+ if warnList:
+ if msg:
+ msg += "; WARNING: "
+ msg += ", ".join(warnList)
+ if okList:
+ if msg:
+ msg += "; OK: "
+ msg += ", ".join(okList)
+ return msg
+
+
+def _getUnitAndType(val):
+ unit = utils.convertSize(val, "GB", "TB")
+ if unit >= 1:
+ return unit, "TB"
+ else:
+ return val, "GB"
+
+
def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False,
isLvm=False, ignoreError=False):
diskPerf = []
warnList = []
critList = []
- diskList = []
+ okList = []
mounts = []
- level = -1
- msg = ""
+ statusCode = INVALID_STATUS_CODE
+ totalUsed = 0
+ totalSize = 0
+ noOfMounts = len(mountPaths)
+ maxPercentUsed = 0
for path in mountPaths:
- disk = getDisk(path,
- usage,
- isLvm)
-
- inode = getInode(path,
- isLvm)
-
- if disk['retCode'] != 0 or inode['retCode'] != 0:
- return utils.PluginStatusCode.CRITICAL, disk['status'], ""
+ disk = getDisk(path, crit, warn, usage, isLvm)
+ inode = getInode(path, crit, warn, isLvm)
if disk['path'] in mounts:
continue
if not disk['used'] or not inode['used']:
- if ignoreError:
- continue
- else:
+ if not ignoreError:
sys.exit(utils.PluginStatusCode.UNKNOWN)
- mounts.append(disk['path'])
- if usage:
- data = "%s=%.1f;%.1f;%.1f;0;%.1f" % (
+ if disk['path']:
+ mounts.append(disk['path'])
+ data = ""
+ if usage and disk['path']:
+ data = "%s=%.1f%s;%.1f;%.1f;0;%.1f" % (
disk['path'],
disk['used'],
+ usage,
warn * disk['size'] / 100,
crit * disk['size'] / 100,
disk['size'])
@@ -160,9 +213,9 @@ def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False,
inode['used'],
warn * inode['used'] / 100,
crit * inode['used'] / 100,
- inode['used'])
- else:
- data = "%s=%.2f%%;%s;%s;0;%s" % (
+ inode['size'])
+ elif disk['path']:
+ data = "%s=%.2f%%;%s;%s;0;%sGB" % (
disk['path'],
disk['usePcent'],
warn,
@@ -178,41 +231,81 @@ def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False,
inode['size'])
diskPerf.append(data)
- if disk['usePcent'] >= crit or inode['usePcent'] >= crit:
- if disk['usePcent'] >= crit:
- critList.append(
- "disk:%s;%s;%s%%" % (disk['fs'],
- disk['path'],
- disk['usePcent']))
+ totalUsed += disk['used']
+ totalSize += disk['size']
+ if disk['usePcent'] > maxPercentUsed:
+ maxPercentUsed = disk['usePcent']
+
+ # adding into status message if there is any
+ # specfic status found (short msg for list of disks)
+ msg = ""
+ if disk['status'] and disk['msg']:
+ if noOfMounts == 1:
+ msg = "%s=%s(%s)" % (disk['fs'], disk['path'],
+ disk['status'])
else:
- critList.append("inode:%s;%s;%s%%" % (inode['fs'],
- inode['path'],
- inode['usePcent']))
- if not level > utils.PluginStatusCode.WARNING:
- level = utils.PluginStatusCode.CRITICAL
- elif (disk['usePcent'] >= warn and disk['usePcent'] < crit) or (
- inode['usePcent'] >= warn and inode['usePcent'] < crit):
- if disk['usePcent'] >= warn:
- warnList.append("disk:%s;%s;%s%%" % (disk['fs'],
- disk['path'],
- disk['usePcent']))
+ msg = "%s(%s)" % (disk['fs'], disk['msg'])
+ else:
+ if noOfMounts == 1:
+ msg = "%s=%s" % (disk['fs'], disk['path'])
else:
- warnList.append("inode:%s;%s;%s%%" % (inode['fs'],
- inode['path'],
- inode['usePcent']))
- if not level > utils.PluginStatusCode.OK:
- level = utils.PluginStatusCode.WARNING
+ msg = "%s" % (disk['path'])
+
+ if disk['statusCode'] == utils.PluginStatusCode.CRITICAL or \
+ inode['statusCode'] == utils.PluginStatusCode.CRITICAL:
+ statusCode = utils.PluginStatusCode.CRITICAL
+ critList.append(msg)
+ elif (disk['statusCode'] == utils.PluginStatusCode.WARNING or
+ inode['statusCode'] == utils.PluginStatusCode.WARNING):
+ # if any previous disk statusCode is not critical
+ # we should not change the statusCode into warning
+ if statusCode != utils.PluginStatusCode.CRITICAL:
+ statusCode = utils.PluginStatusCode.WARNING
+ # just adding warning values into the list
+ warnList.append(msg)
+ elif disk['statusCode'] == utils.PluginStatusCode.OK:
+ if statusCode == INVALID_STATUS_CODE or \
+ statusCode == utils.PluginStatusCode.OK:
+ statusCode = utils.PluginStatusCode.OK
+ okList.append(msg)
else:
- diskList.append("%s=%s" % (disk['fs'], disk['path']))
-
- if len(critList) > 0:
- msg += "CRITICAL: " + ",".join(critList) + " "
- if len(warnList) > 0:
- msg += "WARNING: " + ",".join(warnList) + " "
- if len(diskList) > 0:
- msg += "OK: disks:mounts:(" + ",".join(diskList) + ")"
+ # added \ to fix E125 pep8 error
+ if statusCode != utils.PluginStatusCode.CRITICAL or \
+ statusCode != utils.PluginStatusCode.WARNING:
+ statusCode = utils.PluginStatusCode.UNKNOWN
+ okList.append(msg)
+
+ msg = _getMsg(okList, warnList, critList)
+
+ if totalUsed == 0 and totalSize == 0:
+ # avoid zero div error
+ return statusCode, "mount: %s" % msg, diskPerf
+ if totalUsed == 0:
+ # avoid zero div error
+ totUsagePercent = 0
+ elif len(mounts) > 1:
+ totUsagePercent = totalUsed / totalSize * 100
+ else:
+ totUsagePercent = maxPercentUsed
+ usageMsg = ""
+ if not usage:
+ totUsedSz, totUsedSzUnit = _getUnitAndType(totalUsed)
+ totSpaceSz, totSpaceSzUnit = _getUnitAndType(totalSize)
+ usageMsg = "%.1f%% used (%s%s out of %s%s)\n" % (totUsagePercent,
+ totUsedSz,
+ totUsedSzUnit,
+ totSpaceSz,
+ totSpaceSzUnit)
+ else:
+ usageMsg = "%.1f%% used (%s%s out of %s%s)\n" % (totUsagePercent,
+ totalUsed,
+ usage,
+ totalSize,
+ usage)
- return level, msg, diskPerf
+ if usageMsg:
+ msg = "%s:mount(s): (%s)" % (usageMsg, msg)
+ return statusCode, msg, diskPerf
if __name__ == '__main__':
@@ -226,25 +319,28 @@ if __name__ == '__main__':
if not options.mountPath or options.lvm or options.all:
options.mountPath += getMounts(searchQuery, options.exclude)
- level, msg, diskPerf = showDiskUsage(options.warn,
- options.crit,
- options.mountPath,
- options.inode,
- options.usage,
- options.lvm,
- options.ignore)
-
- if utils.PluginStatusCode.CRITICAL == level:
- sys.stdout.write("%s | %s\n" % (
+ statusCode, msg, diskPerf = showDiskUsage(options.warn,
+ options.crit,
+ options.mountPath,
+ options.inode,
+ options.usage,
+ options.lvm,
+ options.ignore)
+
+ if utils.PluginStatusCode.CRITICAL == statusCode:
+ sys.stdout.write("%s : %s | %s\n" % (
+ utils.PluginStatus.CRITICAL,
msg,
" ".join(diskPerf)))
sys.exit(utils.PluginStatusCode.CRITICAL)
- elif utils.PluginStatusCode.WARNING == level:
- sys.stdout.write("%s | %s\n" % (
+ elif utils.PluginStatusCode.WARNING == statusCode:
+ sys.stdout.write("%s : %s | %s\n" % (
+ utils.PluginStatus.WARNING,
msg,
" ".join(diskPerf)))
sys.exit(utils.PluginStatusCode.WARNING)
else:
- sys.stdout.write("%s | %s\n" % (
+ sys.stdout.write("%s : %s | %s\n" % (
+ utils.PluginStatus.OK,
msg,
" ".join(diskPerf)))
diff --git a/tests/test_disk.py b/tests/test_disk.py
index a9096c2..b41d3bf 100644
--- a/tests/test_disk.py
+++ b/tests/test_disk.py
@@ -18,12 +18,16 @@
# Refer to the README and COPYING files for full details of the license
#
+import os
import commands
from testrunner import PluginsTestCase as TestCaseBase
from plugins import check_disk_and_inode as checkDisk
class TestDisk(TestCaseBase):
+ def mock_osaccess(self, path=None, osflag=None):
+ return True
+
def mock_getstatusoutput(self, i):
out = [
"Filesystem Size Used Avail Use% Mounted on",
@@ -78,28 +82,29 @@ class TestDisk(TestCaseBase):
def test_getUsageAndFree(self):
commands.getstatusoutput = self.mock_getstatusoutput
- disk = checkDisk.getUsageAndFree(1, True)
+ os.access = self.mock_osaccess
+ disk = checkDisk.getUsageAndFree(1, "", 80, 90, "")
self.assertEqual(disk['usePcent'], 64)
self.assertEqual(disk['availPcent'], 36)
self.assertEqual(disk['used'], 174)
self.assertEqual(disk['avail'], 102)
self.assertEqual(disk['path'], '/')
- disk = checkDisk.getUsageAndFree(2, True)
+ disk = checkDisk.getUsageAndFree(2, "", 80, 90, "")
self.assertEqual(disk['usePcent'], 0)
self.assertEqual(disk['availPcent'], 100)
self.assertEqual(disk['used'], 0)
self.assertEqual(disk['avail'], 3.0)
self.assertEqual(disk['path'], '/var')
- disk = checkDisk.getUsageAndFree(3, True)
+ disk = checkDisk.getUsageAndFree(3, "", 80, 90, "")
self.assertEqual(disk['usePcent'], 40)
self.assertEqual(disk['availPcent'], 60)
self.assertEqual(disk['used'], 200)
self.assertEqual(disk['avail'], 100)
self.assertEqual(disk['path'], '/mnt1')
- disk = checkDisk.getUsageAndFree(4, True)
+ disk = checkDisk.getUsageAndFree(4, "", 80, 90, "")
self.assertEqual(disk['usePcent'], 85)
self.assertEqual(disk['availPcent'], 15)
self.assertEqual(disk['used'], 1774)
@@ -123,65 +128,60 @@ class TestDisk(TestCaseBase):
def test_diskUsage(self):
commands.getstatusoutput = self.mock_getstatusoutput
checkDisk.open = self.mock_open
+ os.access = self.mock_osaccess
mounts = checkDisk.getMounts("/", [])
self.assertEqual(checkDisk.showDiskUsage(80,
90,
[mounts[1]],
True,
- usage='BGB',
- ignoreError=True),
- (-1, ' disks:mounts:(/dev/sda1=/)',
- ['/=174.0;232.0;261.0;0;290.0 '
- '/=174.0;139.2;156.6;0;174.0']))
+ usage='BGB'),
+ (0, '64.0% used (174.0BGB out of 290.0BGB)\n'
+ ':mount(s): (/dev/sda1=/)',
+ ['/=174.0BGB;232.0;261.0;0;290.0 '
+ '/=174.0;139.2;156.6;0;290.0']))
self.assertEqual(checkDisk.showDiskUsage(80,
90,
- [mounts[1]], True,
- ignoreError=True),
- (-1, ' disks:mounts:(/dev/sda1=/)',
- ['/=64.00;80;90;0;100 /=64.00;80;90;0;100']))
+ [mounts[1]],
+ True),
+ (0, '64.0% used (174.0GB out of 290.0GB)\n'
+ ':mount(s): (/dev/sda1=/)',
+ ['/=64.00%;80;90;0;290.0GB '
+ '/=64.00%;80;90;0;290.0']))
self.assertEqual(checkDisk.showDiskUsage(80,
90,
- ["/mnt/vol2"], True,
- ignoreError=True),
- (-1, ' disks:mounts:(10.70.43.190:vol2=/mnt/vol2)',
- ['/mnt/vol2=47.00;80;90;0;100 '
- '/mnt/vol2=47.00;80;90;0;100']))
+ [mounts[1]], True),
+ (0, '64.0% used (174.0GB out of 290.0GB)\n'
+ ':mount(s): (/dev/sda1=/)',
+ ['/=64.00%;80;90;0;290.0GB '
+ '/=64.00%;80;90;0;290.0']))
self.assertEqual(checkDisk.showDiskUsage(80,
90,
- ["/mnt/vol2"], True,
- usage="MB",
- ignoreError=True),
- (-1, ' disks:mounts:(10.70.43.190:vol2=/mnt/vol2)',
- ['/mnt/vol2=23228.0;42276.8;47561.4;0;52846.0 '
- '/mnt/vol2=23228.0;18582.4;20905.2;0;23228.0']))
+ ["/mnt/vol2"], True),
+ (0, '47.0% used (22.68359375TB out of '
+ '51.607421875TB)\n'
+ ':mount(s): (10.70.43.190:vol2=/mnt/vol2)',
+ ['/mnt/vol2=47.00%;80;90;0;52846.0GB '
+ '/mnt/vol2=47.00%;80;90;0;52846.0']))
self.assertEqual(checkDisk.showDiskUsage(10,
20,
- ["/mnt/vol2"], True,
- usage="MB",
- ignoreError=True),
- (2, 'crit:disk:10.70.43.190:vol2;/mnt/vol2;47.0',
- ['/mnt/vol2=23228.0;5284.6;10569.2;0;52846.0 '
- '/mnt/vol2=23228.0;2322.8;4645.6;0;23228.0']))
+ ["/mnt/vol2"], True),
+ (2, '47.0% used (22.68359375TB out of '
+ '51.607421875TB)\n'
+ ':mount(s): (10.70.43.190:vol2=/mnt/vol2)',
+ ['/mnt/vol2=47.00%;10;20;0;52846.0GB '
+ '/mnt/vol2=47.00%;10;20;0;52846.0']))
# negative test
- self.assertEqual(checkDisk.showDiskUsage(-1,
- 200,
- ["/mnt/vol2"], True,
- usage="MB",
- ignoreError=True),
- (1, 'warn:disk:10.70.43.190:vol2;/mnt/vol2;47.0',
- ['/mnt/vol2=23228.0;-528.5;105692.0;0;52846.0 '
- '/mnt/vol2=23228.0;-232.3;46456.0;0;23228.0']))
-
- # testing warning level
- self.assertEqual(checkDisk.showDiskUsage(40, 50, ["/mnt/vol2"], True,
- usage="MB",
- ignoreError=True),
- (1, 'warn:disk:10.70.43.190:vol2;/mnt/vol2;47.0',
- ['/mnt/vol2=23228.0;21138.4;26423.0;0;52846.0 '
- '/mnt/vol2=23228.0;9291.2;11614.0;0;23228.0']))
+ self.assertEqual(checkDisk.showDiskUsage(1,
+ 100,
+ ["/mnt/vol2"], True),
+ (1, '47.0% used (22.68359375TB out of '
+ '51.607421875TB)\n'
+ ':mount(s): (10.70.43.190:vol2=/mnt/vol2)',
+ ['/mnt/vol2=47.00%;1;100;0;52846.0GB '
+ '/mnt/vol2=47.00%;1;100;0;52846.0']))