From 8e977e1fd0a0bed52049344765ab4581d7f3c761 Mon Sep 17 00:00:00 2001 From: Timothy Asir Date: Fri, 30 May 2014 17:05:14 +0530 Subject: Add status information to show disk usage This will show the status like: for critical usage: CRITICAL: 4% used (4GB out of 100GB):mounts: (CRITICAL : followed by WARNING if any followed by OK for disk status "WARNING": WARNING: 4% used (4GB out of 100GB):mounts: (WARNING if any followed by OK for disk status "OK": OK: 4% used (4GB out of 100GB):mounts:() Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1081495 Change-Id: I9dbda7a5d6ea992ba73acce2174e6d66f2e16066 Signed-off-by: Timothy Asir Reviewed-on: http://review.gluster.org/7936 Tested-by: Timothy Asir Reviewed-by: Sahina Bose --- config/nrpe.in | 1 + gluster-nagios-addons.spec.in | 4 +- plugins/check_disk_and_inode.py | 250 +++++++++++++++++++++++++++------------- tests/test_disk.py | 92 +++++++-------- 4 files changed, 222 insertions(+), 125 deletions(-) diff --git a/config/nrpe.in b/config/nrpe.in index b0fa08c..8b4f6e9 100644 --- a/config/nrpe.in +++ b/config/nrpe.in @@ -1,4 +1,5 @@ Cmnd_Alias NRPE_PATHS = @sbindir@/send_nsca, \ + @libdir@/nagios/plugins/gluster/check_disk_and_inode.py, \ @libdir@/nagios/plugins/gluster/check_vol_utilization.py, \ @libdir@/nagios/plugins/gluster/check_volume_status.py, \ @libdir@/nagios/plugins/gluster/check_gluster_proc_status.py, \ diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in index 497f085..0ae3bed 100644 --- a/gluster-nagios-addons.spec.in +++ b/gluster-nagios-addons.spec.in @@ -141,12 +141,12 @@ fi cat >> %{_sysconfdir}/nagios/nrpe.cfg <= crit: + disk['statusCode'] = utils.PluginStatusCode.CRITICAL + elif disk['usePcent'] >= warn: + disk['statusCode'] = utils.PluginStatusCode.WARNING + elif disk['usePcent'] < warn: + disk['statusCode'] = utils.PluginStatusCode.OK disk['availPcent'] = 100 - disk['usePcent'] + return disk -def getDisk(path, usage=None, lvm=False): +def getDisk(path, crit, warn, usage=None, lvm=False): if usage: - return getUsageAndFree("df -B%s %s" % (usage, path), lvm) + return getUsageAndFree("df -B%s %s" % (usage, path), + path, crit, warn, lvm) else: - return getUsageAndFree("df -BG %s" % path, lvm) + return getUsageAndFree("df -BG %s" % path, + path, crit, warn, lvm) -def getInode(path, lvm=False): - return getUsageAndFree("df -i %s" % path, lvm) +def getInode(path, crit, warn, lvm=False): + return getUsageAndFree("df -i %s" % path, + path, crit, warn, lvm) def getMounts(searchQuery, excludeList=[]): @@ -117,40 +150,60 @@ def parse_input(): return parser.parse_args() +def _getMsg(okList, warnList, critList): + msg = ", ".join(critList) + if critList and (warnList or okList): + msg = "CRITICAL: " + msg + if warnList: + if msg: + msg += "; WARNING: " + msg += ", ".join(warnList) + if okList: + if msg: + msg += "; OK: " + msg += ", ".join(okList) + return msg + + +def _getUnitAndType(val): + unit = utils.convertSize(val, "GB", "TB") + if unit >= 1: + return unit, "TB" + else: + return val, "GB" + + def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False, isLvm=False, ignoreError=False): diskPerf = [] warnList = [] critList = [] - diskList = [] + okList = [] mounts = [] - level = -1 - msg = "" + statusCode = INVALID_STATUS_CODE + totalUsed = 0 + totalSize = 0 + noOfMounts = len(mountPaths) + maxPercentUsed = 0 for path in mountPaths: - disk = getDisk(path, - usage, - isLvm) - - inode = getInode(path, - isLvm) - - if disk['retCode'] != 0 or inode['retCode'] != 0: - return utils.PluginStatusCode.CRITICAL, disk['status'], "" + disk = getDisk(path, crit, warn, usage, isLvm) + inode = getInode(path, crit, warn, isLvm) if disk['path'] in mounts: continue if not disk['used'] or not inode['used']: - if ignoreError: - continue - else: + if not ignoreError: sys.exit(utils.PluginStatusCode.UNKNOWN) - mounts.append(disk['path']) - if usage: - data = "%s=%.1f;%.1f;%.1f;0;%.1f" % ( + if disk['path']: + mounts.append(disk['path']) + data = "" + if usage and disk['path']: + data = "%s=%.1f%s;%.1f;%.1f;0;%.1f" % ( disk['path'], disk['used'], + usage, warn * disk['size'] / 100, crit * disk['size'] / 100, disk['size']) @@ -160,9 +213,9 @@ def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False, inode['used'], warn * inode['used'] / 100, crit * inode['used'] / 100, - inode['used']) - else: - data = "%s=%.2f%%;%s;%s;0;%s" % ( + inode['size']) + elif disk['path']: + data = "%s=%.2f%%;%s;%s;0;%sGB" % ( disk['path'], disk['usePcent'], warn, @@ -178,41 +231,81 @@ def showDiskUsage(warn, crit, mountPaths, toListInode, usage=False, inode['size']) diskPerf.append(data) - if disk['usePcent'] >= crit or inode['usePcent'] >= crit: - if disk['usePcent'] >= crit: - critList.append( - "disk:%s;%s;%s%%" % (disk['fs'], - disk['path'], - disk['usePcent'])) + totalUsed += disk['used'] + totalSize += disk['size'] + if disk['usePcent'] > maxPercentUsed: + maxPercentUsed = disk['usePcent'] + + # adding into status message if there is any + # specfic status found (short msg for list of disks) + msg = "" + if disk['status'] and disk['msg']: + if noOfMounts == 1: + msg = "%s=%s(%s)" % (disk['fs'], disk['path'], + disk['status']) else: - critList.append("inode:%s;%s;%s%%" % (inode['fs'], - inode['path'], - inode['usePcent'])) - if not level > utils.PluginStatusCode.WARNING: - level = utils.PluginStatusCode.CRITICAL - elif (disk['usePcent'] >= warn and disk['usePcent'] < crit) or ( - inode['usePcent'] >= warn and inode['usePcent'] < crit): - if disk['usePcent'] >= warn: - warnList.append("disk:%s;%s;%s%%" % (disk['fs'], - disk['path'], - disk['usePcent'])) + msg = "%s(%s)" % (disk['fs'], disk['msg']) + else: + if noOfMounts == 1: + msg = "%s=%s" % (disk['fs'], disk['path']) else: - warnList.append("inode:%s;%s;%s%%" % (inode['fs'], - inode['path'], - inode['usePcent'])) - if not level > utils.PluginStatusCode.OK: - level = utils.PluginStatusCode.WARNING + msg = "%s" % (disk['path']) + + if disk['statusCode'] == utils.PluginStatusCode.CRITICAL or \ + inode['statusCode'] == utils.PluginStatusCode.CRITICAL: + statusCode = utils.PluginStatusCode.CRITICAL + critList.append(msg) + elif (disk['statusCode'] == utils.PluginStatusCode.WARNING or + inode['statusCode'] == utils.PluginStatusCode.WARNING): + # if any previous disk statusCode is not critical + # we should not change the statusCode into warning + if statusCode != utils.PluginStatusCode.CRITICAL: + statusCode = utils.PluginStatusCode.WARNING + # just adding warning values into the list + warnList.append(msg) + elif disk['statusCode'] == utils.PluginStatusCode.OK: + if statusCode == INVALID_STATUS_CODE or \ + statusCode == utils.PluginStatusCode.OK: + statusCode = utils.PluginStatusCode.OK + okList.append(msg) else: - diskList.append("%s=%s" % (disk['fs'], disk['path'])) - - if len(critList) > 0: - msg += "CRITICAL: " + ",".join(critList) + " " - if len(warnList) > 0: - msg += "WARNING: " + ",".join(warnList) + " " - if len(diskList) > 0: - msg += "OK: disks:mounts:(" + ",".join(diskList) + ")" + # added \ to fix E125 pep8 error + if statusCode != utils.PluginStatusCode.CRITICAL or \ + statusCode != utils.PluginStatusCode.WARNING: + statusCode = utils.PluginStatusCode.UNKNOWN + okList.append(msg) + + msg = _getMsg(okList, warnList, critList) + + if totalUsed == 0 and totalSize == 0: + # avoid zero div error + return statusCode, "mount: %s" % msg, diskPerf + if totalUsed == 0: + # avoid zero div error + totUsagePercent = 0 + elif len(mounts) > 1: + totUsagePercent = totalUsed / totalSize * 100 + else: + totUsagePercent = maxPercentUsed + usageMsg = "" + if not usage: + totUsedSz, totUsedSzUnit = _getUnitAndType(totalUsed) + totSpaceSz, totSpaceSzUnit = _getUnitAndType(totalSize) + usageMsg = "%.1f%% used (%s%s out of %s%s)\n" % (totUsagePercent, + totUsedSz, + totUsedSzUnit, + totSpaceSz, + totSpaceSzUnit) + else: + usageMsg = "%.1f%% used (%s%s out of %s%s)\n" % (totUsagePercent, + totalUsed, + usage, + totalSize, + usage) - return level, msg, diskPerf + if usageMsg: + msg = "%s:mount(s): (%s)" % (usageMsg, msg) + return statusCode, msg, diskPerf if __name__ == '__main__': @@ -226,25 +319,28 @@ if __name__ == '__main__': if not options.mountPath or options.lvm or options.all: options.mountPath += getMounts(searchQuery, options.exclude) - level, msg, diskPerf = showDiskUsage(options.warn, - options.crit, - options.mountPath, - options.inode, - options.usage, - options.lvm, - options.ignore) - - if utils.PluginStatusCode.CRITICAL == level: - sys.stdout.write("%s | %s\n" % ( + statusCode, msg, diskPerf = showDiskUsage(options.warn, + options.crit, + options.mountPath, + options.inode, + options.usage, + options.lvm, + options.ignore) + + if utils.PluginStatusCode.CRITICAL == statusCode: + sys.stdout.write("%s : %s | %s\n" % ( + utils.PluginStatus.CRITICAL, msg, " ".join(diskPerf))) sys.exit(utils.PluginStatusCode.CRITICAL) - elif utils.PluginStatusCode.WARNING == level: - sys.stdout.write("%s | %s\n" % ( + elif utils.PluginStatusCode.WARNING == statusCode: + sys.stdout.write("%s : %s | %s\n" % ( + utils.PluginStatus.WARNING, msg, " ".join(diskPerf))) sys.exit(utils.PluginStatusCode.WARNING) else: - sys.stdout.write("%s | %s\n" % ( + sys.stdout.write("%s : %s | %s\n" % ( + utils.PluginStatus.OK, msg, " ".join(diskPerf))) diff --git a/tests/test_disk.py b/tests/test_disk.py index a9096c2..b41d3bf 100644 --- a/tests/test_disk.py +++ b/tests/test_disk.py @@ -18,12 +18,16 @@ # Refer to the README and COPYING files for full details of the license # +import os import commands from testrunner import PluginsTestCase as TestCaseBase from plugins import check_disk_and_inode as checkDisk class TestDisk(TestCaseBase): + def mock_osaccess(self, path=None, osflag=None): + return True + def mock_getstatusoutput(self, i): out = [ "Filesystem Size Used Avail Use% Mounted on", @@ -78,28 +82,29 @@ class TestDisk(TestCaseBase): def test_getUsageAndFree(self): commands.getstatusoutput = self.mock_getstatusoutput - disk = checkDisk.getUsageAndFree(1, True) + os.access = self.mock_osaccess + disk = checkDisk.getUsageAndFree(1, "", 80, 90, "") self.assertEqual(disk['usePcent'], 64) self.assertEqual(disk['availPcent'], 36) self.assertEqual(disk['used'], 174) self.assertEqual(disk['avail'], 102) self.assertEqual(disk['path'], '/') - disk = checkDisk.getUsageAndFree(2, True) + disk = checkDisk.getUsageAndFree(2, "", 80, 90, "") self.assertEqual(disk['usePcent'], 0) self.assertEqual(disk['availPcent'], 100) self.assertEqual(disk['used'], 0) self.assertEqual(disk['avail'], 3.0) self.assertEqual(disk['path'], '/var') - disk = checkDisk.getUsageAndFree(3, True) + disk = checkDisk.getUsageAndFree(3, "", 80, 90, "") self.assertEqual(disk['usePcent'], 40) self.assertEqual(disk['availPcent'], 60) self.assertEqual(disk['used'], 200) self.assertEqual(disk['avail'], 100) self.assertEqual(disk['path'], '/mnt1') - disk = checkDisk.getUsageAndFree(4, True) + disk = checkDisk.getUsageAndFree(4, "", 80, 90, "") self.assertEqual(disk['usePcent'], 85) self.assertEqual(disk['availPcent'], 15) self.assertEqual(disk['used'], 1774) @@ -123,65 +128,60 @@ class TestDisk(TestCaseBase): def test_diskUsage(self): commands.getstatusoutput = self.mock_getstatusoutput checkDisk.open = self.mock_open + os.access = self.mock_osaccess mounts = checkDisk.getMounts("/", []) self.assertEqual(checkDisk.showDiskUsage(80, 90, [mounts[1]], True, - usage='BGB', - ignoreError=True), - (-1, ' disks:mounts:(/dev/sda1=/)', - ['/=174.0;232.0;261.0;0;290.0 ' - '/=174.0;139.2;156.6;0;174.0'])) + usage='BGB'), + (0, '64.0% used (174.0BGB out of 290.0BGB)\n' + ':mount(s): (/dev/sda1=/)', + ['/=174.0BGB;232.0;261.0;0;290.0 ' + '/=174.0;139.2;156.6;0;290.0'])) self.assertEqual(checkDisk.showDiskUsage(80, 90, - [mounts[1]], True, - ignoreError=True), - (-1, ' disks:mounts:(/dev/sda1=/)', - ['/=64.00;80;90;0;100 /=64.00;80;90;0;100'])) + [mounts[1]], + True), + (0, '64.0% used (174.0GB out of 290.0GB)\n' + ':mount(s): (/dev/sda1=/)', + ['/=64.00%;80;90;0;290.0GB ' + '/=64.00%;80;90;0;290.0'])) self.assertEqual(checkDisk.showDiskUsage(80, 90, - ["/mnt/vol2"], True, - ignoreError=True), - (-1, ' disks:mounts:(10.70.43.190:vol2=/mnt/vol2)', - ['/mnt/vol2=47.00;80;90;0;100 ' - '/mnt/vol2=47.00;80;90;0;100'])) + [mounts[1]], True), + (0, '64.0% used (174.0GB out of 290.0GB)\n' + ':mount(s): (/dev/sda1=/)', + ['/=64.00%;80;90;0;290.0GB ' + '/=64.00%;80;90;0;290.0'])) self.assertEqual(checkDisk.showDiskUsage(80, 90, - ["/mnt/vol2"], True, - usage="MB", - ignoreError=True), - (-1, ' disks:mounts:(10.70.43.190:vol2=/mnt/vol2)', - ['/mnt/vol2=23228.0;42276.8;47561.4;0;52846.0 ' - '/mnt/vol2=23228.0;18582.4;20905.2;0;23228.0'])) + ["/mnt/vol2"], True), + (0, '47.0% used (22.68359375TB out of ' + '51.607421875TB)\n' + ':mount(s): (10.70.43.190:vol2=/mnt/vol2)', + ['/mnt/vol2=47.00%;80;90;0;52846.0GB ' + '/mnt/vol2=47.00%;80;90;0;52846.0'])) self.assertEqual(checkDisk.showDiskUsage(10, 20, - ["/mnt/vol2"], True, - usage="MB", - ignoreError=True), - (2, 'crit:disk:10.70.43.190:vol2;/mnt/vol2;47.0', - ['/mnt/vol2=23228.0;5284.6;10569.2;0;52846.0 ' - '/mnt/vol2=23228.0;2322.8;4645.6;0;23228.0'])) + ["/mnt/vol2"], True), + (2, '47.0% used (22.68359375TB out of ' + '51.607421875TB)\n' + ':mount(s): (10.70.43.190:vol2=/mnt/vol2)', + ['/mnt/vol2=47.00%;10;20;0;52846.0GB ' + '/mnt/vol2=47.00%;10;20;0;52846.0'])) # negative test - self.assertEqual(checkDisk.showDiskUsage(-1, - 200, - ["/mnt/vol2"], True, - usage="MB", - ignoreError=True), - (1, 'warn:disk:10.70.43.190:vol2;/mnt/vol2;47.0', - ['/mnt/vol2=23228.0;-528.5;105692.0;0;52846.0 ' - '/mnt/vol2=23228.0;-232.3;46456.0;0;23228.0'])) - - # testing warning level - self.assertEqual(checkDisk.showDiskUsage(40, 50, ["/mnt/vol2"], True, - usage="MB", - ignoreError=True), - (1, 'warn:disk:10.70.43.190:vol2;/mnt/vol2;47.0', - ['/mnt/vol2=23228.0;21138.4;26423.0;0;52846.0 ' - '/mnt/vol2=23228.0;9291.2;11614.0;0;23228.0'])) + self.assertEqual(checkDisk.showDiskUsage(1, + 100, + ["/mnt/vol2"], True), + (1, '47.0% used (22.68359375TB out of ' + '51.607421875TB)\n' + ':mount(s): (10.70.43.190:vol2=/mnt/vol2)', + ['/mnt/vol2=47.00%;1;100;0;52846.0GB ' + '/mnt/vol2=47.00%;1;100;0;52846.0'])) -- cgit