diff options
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | gluster-nagios-addons.spec.in | 3 | ||||
-rw-r--r-- | plugins/Makefile.am | 2 | ||||
-rwxr-xr-x | plugins/check_proc_status.py | 195 | ||||
-rw-r--r-- | plugins/gluster-proc.crontab | 1 | ||||
-rw-r--r-- | tests/Makefile.am | 2 | ||||
-rw-r--r-- | tests/check_proc_test_data.py | 196 | ||||
-rw-r--r-- | tests/test_gluster_proc.py | 42 |
8 files changed, 442 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac index 6d3c6c7..24c04c2 100644 --- a/configure.ac +++ b/configure.ac @@ -99,6 +99,7 @@ AC_CONFIG_FILES([ config/glusternagios.conf plugins/Makefile plugins/nscautils.py + plugins/gluster-proc.crontab plugins/volcap/Makefile tests/Makefile tests/run_tests_local.sh diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in index 8278bb4..da2f558 100644 --- a/gluster-nagios-addons.spec.in +++ b/gluster-nagios-addons.spec.in @@ -65,6 +65,7 @@ Requires(post): iptables Requires: gluster-nagios-common Requires: libselinux-python Requires: nagios-plugins-ide_smart +Requires: nagios-plugins-procs Requires: nrpe Requires: nsca-client Requires: python-argparse @@ -73,6 +74,7 @@ Requires: python-inotify Requires: python-netaddr Requires: python-pthreading Requires: python-cpopen >= 1.3 +Requires: python-psutil Requires: sysstat %description @@ -159,6 +161,7 @@ sed -i '/check_vol_quota_status/d' %{_sysconfdir}/nagios/nrpe.cfg %{_sysconfdir}/cron.d/gluster-sysstat.crontab %{_sysconfdir}/rsyslog.d/glusternagios.conf %{_sysconfdir}/nagios/nagios_server.conf +%{_sysconfdir}/cron.d/gluster-proc.crontab %files tests %defattr(-,root,root,-) diff --git a/plugins/Makefile.am b/plugins/Makefile.am index 9bba2d4..b1592de 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -5,6 +5,7 @@ SUBDIRS = \ cronddir = $(sysconfdir)/cron.d crond_DATA = \ gluster-sysstat.crontab \ + gluster-proc.crontab \ $(NULL) dist_glusternagiosplugins_PYTHON = \ @@ -13,6 +14,7 @@ dist_glusternagiosplugins_PYTHON = \ check_vol_utilization.py \ check_vol_status.py \ check_volume_status.py \ + check_proc_status.py \ cpu.py \ discoverpeers.py \ discoverlogicalcomponents.py \ diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py new file mode 100755 index 0000000..80a41c8 --- /dev/null +++ b/plugins/check_proc_status.py @@ -0,0 +1,195 @@ +#!/usr/bin/python +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# + +import sys +import errno +import socket +import psutil +import nscautils +import glusternagios + +from glusternagios import utils +from glusternagios import glustercli + + +_checkProc = utils.CommandPath('check_proc', + '/usr/lib64/nagios/plugins/check_procs') + +_glusterVolPath = "/var/lib/glusterd/vols" +_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"] +_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", + "glustershd"] +_checkSmbCmd = [_checkProc.cmd, "-C", "smb"] +_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", + "quotad"] +_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"] +_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"] +_nfsService = "Glusterfs NFS Daemon" +_shdService = "Glusterfs Self-Heal Daemon" +_smbService = "CIFS" +_brickService = "Brick Status - " +_glusterdService = "Gluster Management Daemon" +_quotadService = "Gluster Quota Daemon" + + +def sendBrickStatus(hostName, volInfo): + hostUuid = glustercli.hostUUIDGet() + status = None + for volumeName, volumeInfo in volInfo.iteritems(): + if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE: + continue + for brick in volumeInfo['bricksInfo']: + if brick.get('hostUuid') != hostUuid: + continue + brickService = "Brick Status - %s" % brick['name'] + pidFile = brick['name'].replace( + ":/", "-").replace("/", "-") + ".pid" + try: + with open("%s/%s/run/%s" % ( + _glusterVolPath, volumeName, pidFile)) as f: + if psutil.pid_exists(int(f.read().strip())): + status = utils.PluginStatusCode.OK + else: + status = utils.PluginStatusCode.CRITICAL + except IOError, e: + if e.errno == errno.ENOENT: + status = utils.PluginStatusCode.CRITICAL + else: + status = utils.PluginStatusCode.UNKNOWN + msg = "UNKNOWN: Brick %s: %s" % (brick['name'], str(e)) + finally: + if status == utils.PluginStatusCode.OK: + msg = "OK: Brick %s" % brick['name'] + elif status != utils.PluginStatusCode.UNKNOWN: + msg = "CRITICAL: Brick %s is down" % brick['name'] + nscautils.send_to_nsca(hostName, brickService, status, msg) + + +def sendNfsStatus(hostName, volInfo): + # if nfs is already running we need not to check further + status, msg, error = utils.execCmd(_checkNfsCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _nfsService, status, msg) + return + + # if nfs is not running and any of the volume uses nfs + # then its required to alert the user + for k, v in volInfo.iteritems(): + nfsStatus = v.get('options', {}).get('nfs.disable', 'off') + if nfsStatus == 'off': + msg = "CRITICAL: Process glusterfs-nfs is not running" + status = utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: No gluster volume uses nfs" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _nfsService, status, msg) + + +def sendSmbStatus(hostName, volInfo): + status, msg, error = utils.execCmd(_checkSmbCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _smbService, status, msg) + return + + # if smb is not running and any of the volume uses smb + # then its required to alert the use + for k, v in volInfo.iteritems(): + cifsStatus = v.get('options', {}).get('user.cifs', '') + smbStatus = v.get('options', {}).get('user.smb', '') + if cifsStatus == 'disable' or smbStatus == 'disable': + msg = "CRITICAL: Process smb is not running" + status = utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: No gluster volume uses smb" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _smbService, status, msg) + + +def sendQuotadStatus(hostName, volInfo): + # if quota is already running we need not to check further + status, msg, error = utils.execCmd(_checkQuotaCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _quotadService, status, msg) + return + + # if quota is not running and any of the volume uses quota + # then the quotad process should be running in the host + for k, v in volInfo.iteritems(): + quotadStatus = v.get('options', {}).get('features.quota', '') + if quotadStatus == 'on': + msg = "CRITICAL: Process quotad is not running" + utils.PluginStatusCode.CRITICAL + break + else: + msg = "OK: Quota not enabled" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _quotadService, status, msg) + + +def sendShdStatus(hostName, volInfo): + status, msg, error = utils.execCmd(_checkShdCmd) + if status == utils.PluginStatusCode.OK: + nscautils.send_to_nsca(hostName, _shdService, status, msg) + return + + hostUuid = glustercli.hostUUIDGet() + for volumeName, volumeInfo in volInfo.iteritems(): + if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE: + continue + for brick in volumeInfo['bricksInfo']: + if brick['hostUuid'] == hostUuid and \ + int(volumeInfo['replicaCount']) > 1: + status = utils.PluginStatusCode.CRITICAL + msg = "CRITICAL: Gluster Self Heal Daemon not running" + break + else: + msg = "OK: Process Gluster Self Heal Daemon" + status = utils.PluginStatusCode.OK + nscautils.send_to_nsca(hostName, _shdService, status, msg) + + +if __name__ == '__main__': + #Get the volume status + #status = 0 + hostName = socket.getfqdn() + if hostName == "localhost.localdomain" or hostName == "localhost": + sys.stderr.write("failed to find localhost fqdn") + + ### service check ### + status, msg, error = utils.execCmd(_checkGlusterdCmd) + nscautils.send_to_nsca(hostName, _glusterdService, status, msg) + + # Get the volume status only if glusterfs is running to avoid + # unusual delay + if status != utils.PluginStatusCode.OK: + sys.exit(status) + + try: + volInfo = glustercli.volumeInfo() + except glusternagios.glustercli.GlusterCmdFailedException as e: + sys.exit(utils.PluginStatusCode.UNKNOWN) + + sendNfsStatus(hostName, volInfo) + sendSmbStatus(hostName, volInfo) + sendShdStatus(hostName, volInfo) + sendQuotadStatus(hostName, volInfo) + sendBrickStatus(hostName, volInfo) + + sys.exit(utils.PluginStatusCode.OK) diff --git a/plugins/gluster-proc.crontab b/plugins/gluster-proc.crontab new file mode 100644 index 0000000..7344ea2 --- /dev/null +++ b/plugins/gluster-proc.crontab @@ -0,0 +1 @@ +*/1 * * * * root /usr/lib64/nagios/gluster/plugins/check_proc_status.py diff --git a/tests/Makefile.am b/tests/Makefile.am index a540f11..0712feb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -21,8 +21,10 @@ test_modules = \ test_check_gluster_syslog.py \ test_check_volume_status.py \ + check_proc_test_data.py \ test_cpu.py \ test_cpu_dataFile.py \ + test_gluster_proc.py \ test_memory.py \ test_memory_dataFile.py \ test_network.py \ diff --git a/tests/check_proc_test_data.py b/tests/check_proc_test_data.py new file mode 100644 index 0000000..b4d4ac4 --- /dev/null +++ b/tests/check_proc_test_data.py @@ -0,0 +1,196 @@ +nfsEnabled1 = {'rep1': {'brickCount': '2', + 'bricks': ['10.70.43.33:/bricks/rep1_1', + '10.70.43.33:/bricks/rep1_2'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5', + 'volumeName': 'rep1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'REPLICATE'}, + 'vol1': {'brickCount': '3', + 'bricks': ['10.70.43.33:/bricks/b1', + '10.70.43.33:/bricks/my-vol1-add-brck-b2.new', + '10.70.43.33:/bricks/vol1/my-new-brick_b3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99', + 'volumeName': 'vol1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}, + 'vol2': {'brickCount': '1', + 'bricks': ['10.70.43.190:/exports/vol2'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '12817858-77cd-450d-8a70-f8d617db838e', + 'volumeName': 'vol2', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}, + 'vol3': {'brickCount': '1', + 'bricks': ['10.70.43.190:/exports/vol3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'a3d0f537-761f-46d3-addd-58c5eeb66b58', + 'volumeName': 'vol3', + 'volumeStatus': 'OFFLINE', + 'volumeType': 'DISTRIBUTE'}, + 'vol4': {'brickCount': '2', + 'bricks': ['10.70.43.190:/bricks/vol3_a', + '10.70.43.190:/bricks/vol3_b'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '81201e32-8f14-4fa0-b4ef-23852f2f9a1d', + 'volumeName': 'vol4', + 'volumeStatus': 'OFFLINE', + 'volumeType': 'REPLICATE'}, + 'vol5': {'brickCount': '3', + 'bricks': ['10.70.43.190:/bricks/vol5_a', + '10.70.42.228:/bricks/vol5_b', + '10.70.43.33:/bricks/vol5_c'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '69e039dd-e0b1-4069-ad65-99cb26c2d157', + 'volumeName': 'vol5', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}} + +# nfs enabled by default +nfsEnabled2 = {'rep1': {'brickCount': '2', + 'bricks': ['10.70.43.133:/bricks/rep1_1', + '10.70.43.133:/bricks/rep1_2'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5', + 'volumeName': 'rep1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'REPLICATE'}} + +#NFS enabled using set option +nfsEnabled3 = {'vol1': {'brickCount': '3', + 'bricks': ['10.70.43.33:/bricks/b1', + '10.70.43.33:/bricks/my-vol1-add-brck-b2.new', + '10.70.43.33:/bricks/vol1/my-new-brick_b3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on', 'nfs.disable': 'off'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99', + 'volumeName': 'vol1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}} + + +nfsDisabled1 = {'vol1': {'brickCount': '3', + 'bricks': ['10.70.43.33:/bricks/b1', + '10.70.43.33:/bricks/my-vol1-add-brck-b2.new', + '10.70.43.33:/bricks/vol1/my-new-brick_b3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99', + 'volumeName': 'vol1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}, + 'rep1': {'brickCount': '2', + 'bricks': ['10.70.43.133:/bricks/rep1_1', + '10.70.43.133:/bricks/rep1_2'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5', + 'volumeName': 'rep1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'REPLICATE'}} + + +brickTst1 = {'vol1': {'brickCount': '3', + 'bricks': ['10.70.43.33:/bricks/b1', + '10.70.43.433:/bricks/my-vol1-add-brck-b2.new', + '10.70.43.33:/bricks/vol1/my-new-brick_b3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99', + 'volumeName': 'vol1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}, + 'rep1': {'brickCount': '2', + 'bricks': ['10.70.43.133:/bricks/rep1_1', + '10.70.43.133:/bricks/rep1_2'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5', + 'volumeName': 'rep1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'REPLICATE'}} + + +brickTst2 = {'vol1': {'brickCount': '3', + 'bricks': ['10.70.43.133:/bricks/b1', + '10.70.43.233:/bricks/my-vol1-add-brck-b2.new', + '10.70.43.333:/bricks/vol1/my-new-brick_b3'], + 'bricksInfo': [], + 'distCount': '1', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '1', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99', + 'volumeName': 'vol1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'DISTRIBUTE'}, + 'rep1': {'brickCount': '2', + 'bricks': ['10.70.43.133:/bricks/rep1_1', + '10.70.43.133:/bricks/rep1_2'], + 'bricksInfo': [], + 'distCount': '2', + 'options': {'features.quota': 'on', 'nfs.disable': 'on'}, + 'replicaCount': '2', + 'stripeCount': '1', + 'transportType': ['TCP'], + 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5', + 'volumeName': 'rep1', + 'volumeStatus': 'ONLINE', + 'volumeType': 'REPLICATE'}} diff --git a/tests/test_gluster_proc.py b/tests/test_gluster_proc.py new file mode 100644 index 0000000..36a60fd --- /dev/null +++ b/tests/test_gluster_proc.py @@ -0,0 +1,42 @@ +# +# Copyright 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# Refer to the README and COPYING files for full details of the license +# + +from testrunner import PluginsTestCase as TestCaseBase +from plugins import check_proc_status as gProc +from plugins import nscautils +from glusternagios import utils +import check_proc_test_data as gData + + +class TestProc(TestCaseBase): + def _maskGetoutputSuccess(self, val): + return 0, "PROCS OK:", "" + + def _maskGetoutputCritical(self, val): + return 0, "PROCS CRITICAL:", "" + + def _maskSendToNsca(self, hostName, service, status, msg): + return service, status, msg + + def test_Nfs(self): + nscautils.send_to_nsca = self._maskSendToNsca + utils.execCmd = self._maskGetoutputCritical + + assert(gProc.sendNfsStatus("10.70.43.33", gData.nfsEnabled1) == None) |