summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configure.ac1
-rw-r--r--gluster-nagios-addons.spec.in3
-rw-r--r--plugins/Makefile.am2
-rwxr-xr-xplugins/check_proc_status.py195
-rw-r--r--plugins/gluster-proc.crontab1
-rw-r--r--tests/Makefile.am2
-rw-r--r--tests/check_proc_test_data.py196
-rw-r--r--tests/test_gluster_proc.py42
8 files changed, 442 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac
index 6d3c6c7..24c04c2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -99,6 +99,7 @@ AC_CONFIG_FILES([
config/glusternagios.conf
plugins/Makefile
plugins/nscautils.py
+ plugins/gluster-proc.crontab
plugins/volcap/Makefile
tests/Makefile
tests/run_tests_local.sh
diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in
index 8278bb4..da2f558 100644
--- a/gluster-nagios-addons.spec.in
+++ b/gluster-nagios-addons.spec.in
@@ -65,6 +65,7 @@ Requires(post): iptables
Requires: gluster-nagios-common
Requires: libselinux-python
Requires: nagios-plugins-ide_smart
+Requires: nagios-plugins-procs
Requires: nrpe
Requires: nsca-client
Requires: python-argparse
@@ -73,6 +74,7 @@ Requires: python-inotify
Requires: python-netaddr
Requires: python-pthreading
Requires: python-cpopen >= 1.3
+Requires: python-psutil
Requires: sysstat
%description
@@ -159,6 +161,7 @@ sed -i '/check_vol_quota_status/d' %{_sysconfdir}/nagios/nrpe.cfg
%{_sysconfdir}/cron.d/gluster-sysstat.crontab
%{_sysconfdir}/rsyslog.d/glusternagios.conf
%{_sysconfdir}/nagios/nagios_server.conf
+%{_sysconfdir}/cron.d/gluster-proc.crontab
%files tests
%defattr(-,root,root,-)
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index 9bba2d4..b1592de 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -5,6 +5,7 @@ SUBDIRS = \
cronddir = $(sysconfdir)/cron.d
crond_DATA = \
gluster-sysstat.crontab \
+ gluster-proc.crontab \
$(NULL)
dist_glusternagiosplugins_PYTHON = \
@@ -13,6 +14,7 @@ dist_glusternagiosplugins_PYTHON = \
check_vol_utilization.py \
check_vol_status.py \
check_volume_status.py \
+ check_proc_status.py \
cpu.py \
discoverpeers.py \
discoverlogicalcomponents.py \
diff --git a/plugins/check_proc_status.py b/plugins/check_proc_status.py
new file mode 100755
index 0000000..80a41c8
--- /dev/null
+++ b/plugins/check_proc_status.py
@@ -0,0 +1,195 @@
+#!/usr/bin/python
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import sys
+import errno
+import socket
+import psutil
+import nscautils
+import glusternagios
+
+from glusternagios import utils
+from glusternagios import glustercli
+
+
+_checkProc = utils.CommandPath('check_proc',
+ '/usr/lib64/nagios/plugins/check_procs')
+
+_glusterVolPath = "/var/lib/glusterd/vols"
+_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"]
+_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "glustershd"]
+_checkSmbCmd = [_checkProc.cmd, "-C", "smb"]
+_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "quotad"]
+_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"]
+_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"]
+_nfsService = "Glusterfs NFS Daemon"
+_shdService = "Glusterfs Self-Heal Daemon"
+_smbService = "CIFS"
+_brickService = "Brick Status - "
+_glusterdService = "Gluster Management Daemon"
+_quotadService = "Gluster Quota Daemon"
+
+
+def sendBrickStatus(hostName, volInfo):
+ hostUuid = glustercli.hostUUIDGet()
+ status = None
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick.get('hostUuid') != hostUuid:
+ continue
+ brickService = "Brick Status - %s" % brick['name']
+ pidFile = brick['name'].replace(
+ ":/", "-").replace("/", "-") + ".pid"
+ try:
+ with open("%s/%s/run/%s" % (
+ _glusterVolPath, volumeName, pidFile)) as f:
+ if psutil.pid_exists(int(f.read().strip())):
+ status = utils.PluginStatusCode.OK
+ else:
+ status = utils.PluginStatusCode.CRITICAL
+ except IOError, e:
+ if e.errno == errno.ENOENT:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ msg = "UNKNOWN: Brick %s: %s" % (brick['name'], str(e))
+ finally:
+ if status == utils.PluginStatusCode.OK:
+ msg = "OK: Brick %s" % brick['name']
+ elif status != utils.PluginStatusCode.UNKNOWN:
+ msg = "CRITICAL: Brick %s is down" % brick['name']
+ nscautils.send_to_nsca(hostName, brickService, status, msg)
+
+
+def sendNfsStatus(hostName, volInfo):
+ # if nfs is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkNfsCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+ return
+
+ # if nfs is not running and any of the volume uses nfs
+ # then its required to alert the user
+ for k, v in volInfo.iteritems():
+ nfsStatus = v.get('options', {}).get('nfs.disable', 'off')
+ if nfsStatus == 'off':
+ msg = "CRITICAL: Process glusterfs-nfs is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses nfs"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _nfsService, status, msg)
+
+
+def sendSmbStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkSmbCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+ return
+
+ # if smb is not running and any of the volume uses smb
+ # then its required to alert the use
+ for k, v in volInfo.iteritems():
+ cifsStatus = v.get('options', {}).get('user.cifs', '')
+ smbStatus = v.get('options', {}).get('user.smb', '')
+ if cifsStatus == 'disable' or smbStatus == 'disable':
+ msg = "CRITICAL: Process smb is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses smb"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _smbService, status, msg)
+
+
+def sendQuotadStatus(hostName, volInfo):
+ # if quota is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkQuotaCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+ return
+
+ # if quota is not running and any of the volume uses quota
+ # then the quotad process should be running in the host
+ for k, v in volInfo.iteritems():
+ quotadStatus = v.get('options', {}).get('features.quota', '')
+ if quotadStatus == 'on':
+ msg = "CRITICAL: Process quotad is not running"
+ utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: Quota not enabled"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _quotadService, status, msg)
+
+
+def sendShdStatus(hostName, volInfo):
+ status, msg, error = utils.execCmd(_checkShdCmd)
+ if status == utils.PluginStatusCode.OK:
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+ return
+
+ hostUuid = glustercli.hostUUIDGet()
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ for brick in volumeInfo['bricksInfo']:
+ if brick['hostUuid'] == hostUuid and \
+ int(volumeInfo['replicaCount']) > 1:
+ status = utils.PluginStatusCode.CRITICAL
+ msg = "CRITICAL: Gluster Self Heal Daemon not running"
+ break
+ else:
+ msg = "OK: Process Gluster Self Heal Daemon"
+ status = utils.PluginStatusCode.OK
+ nscautils.send_to_nsca(hostName, _shdService, status, msg)
+
+
+if __name__ == '__main__':
+ #Get the volume status
+ #status = 0
+ hostName = socket.getfqdn()
+ if hostName == "localhost.localdomain" or hostName == "localhost":
+ sys.stderr.write("failed to find localhost fqdn")
+
+ ### service check ###
+ status, msg, error = utils.execCmd(_checkGlusterdCmd)
+ nscautils.send_to_nsca(hostName, _glusterdService, status, msg)
+
+ # Get the volume status only if glusterfs is running to avoid
+ # unusual delay
+ if status != utils.PluginStatusCode.OK:
+ sys.exit(status)
+
+ try:
+ volInfo = glustercli.volumeInfo()
+ except glusternagios.glustercli.GlusterCmdFailedException as e:
+ sys.exit(utils.PluginStatusCode.UNKNOWN)
+
+ sendNfsStatus(hostName, volInfo)
+ sendSmbStatus(hostName, volInfo)
+ sendShdStatus(hostName, volInfo)
+ sendQuotadStatus(hostName, volInfo)
+ sendBrickStatus(hostName, volInfo)
+
+ sys.exit(utils.PluginStatusCode.OK)
diff --git a/plugins/gluster-proc.crontab b/plugins/gluster-proc.crontab
new file mode 100644
index 0000000..7344ea2
--- /dev/null
+++ b/plugins/gluster-proc.crontab
@@ -0,0 +1 @@
+*/1 * * * * root /usr/lib64/nagios/gluster/plugins/check_proc_status.py
diff --git a/tests/Makefile.am b/tests/Makefile.am
index a540f11..0712feb 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -21,8 +21,10 @@
test_modules = \
test_check_gluster_syslog.py \
test_check_volume_status.py \
+ check_proc_test_data.py \
test_cpu.py \
test_cpu_dataFile.py \
+ test_gluster_proc.py \
test_memory.py \
test_memory_dataFile.py \
test_network.py \
diff --git a/tests/check_proc_test_data.py b/tests/check_proc_test_data.py
new file mode 100644
index 0000000..b4d4ac4
--- /dev/null
+++ b/tests/check_proc_test_data.py
@@ -0,0 +1,196 @@
+nfsEnabled1 = {'rep1': {'brickCount': '2',
+ 'bricks': ['10.70.43.33:/bricks/rep1_1',
+ '10.70.43.33:/bricks/rep1_2'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5',
+ 'volumeName': 'rep1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'REPLICATE'},
+ 'vol1': {'brickCount': '3',
+ 'bricks': ['10.70.43.33:/bricks/b1',
+ '10.70.43.33:/bricks/my-vol1-add-brck-b2.new',
+ '10.70.43.33:/bricks/vol1/my-new-brick_b3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99',
+ 'volumeName': 'vol1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'vol2': {'brickCount': '1',
+ 'bricks': ['10.70.43.190:/exports/vol2'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '12817858-77cd-450d-8a70-f8d617db838e',
+ 'volumeName': 'vol2',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'vol3': {'brickCount': '1',
+ 'bricks': ['10.70.43.190:/exports/vol3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'a3d0f537-761f-46d3-addd-58c5eeb66b58',
+ 'volumeName': 'vol3',
+ 'volumeStatus': 'OFFLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'vol4': {'brickCount': '2',
+ 'bricks': ['10.70.43.190:/bricks/vol3_a',
+ '10.70.43.190:/bricks/vol3_b'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '81201e32-8f14-4fa0-b4ef-23852f2f9a1d',
+ 'volumeName': 'vol4',
+ 'volumeStatus': 'OFFLINE',
+ 'volumeType': 'REPLICATE'},
+ 'vol5': {'brickCount': '3',
+ 'bricks': ['10.70.43.190:/bricks/vol5_a',
+ '10.70.42.228:/bricks/vol5_b',
+ '10.70.43.33:/bricks/vol5_c'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '69e039dd-e0b1-4069-ad65-99cb26c2d157',
+ 'volumeName': 'vol5',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'}}
+
+# nfs enabled by default
+nfsEnabled2 = {'rep1': {'brickCount': '2',
+ 'bricks': ['10.70.43.133:/bricks/rep1_1',
+ '10.70.43.133:/bricks/rep1_2'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5',
+ 'volumeName': 'rep1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'REPLICATE'}}
+
+#NFS enabled using set option
+nfsEnabled3 = {'vol1': {'brickCount': '3',
+ 'bricks': ['10.70.43.33:/bricks/b1',
+ '10.70.43.33:/bricks/my-vol1-add-brck-b2.new',
+ '10.70.43.33:/bricks/vol1/my-new-brick_b3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'off'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99',
+ 'volumeName': 'vol1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'}}
+
+
+nfsDisabled1 = {'vol1': {'brickCount': '3',
+ 'bricks': ['10.70.43.33:/bricks/b1',
+ '10.70.43.33:/bricks/my-vol1-add-brck-b2.new',
+ '10.70.43.33:/bricks/vol1/my-new-brick_b3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99',
+ 'volumeName': 'vol1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'rep1': {'brickCount': '2',
+ 'bricks': ['10.70.43.133:/bricks/rep1_1',
+ '10.70.43.133:/bricks/rep1_2'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5',
+ 'volumeName': 'rep1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'REPLICATE'}}
+
+
+brickTst1 = {'vol1': {'brickCount': '3',
+ 'bricks': ['10.70.43.33:/bricks/b1',
+ '10.70.43.433:/bricks/my-vol1-add-brck-b2.new',
+ '10.70.43.33:/bricks/vol1/my-new-brick_b3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99',
+ 'volumeName': 'vol1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'rep1': {'brickCount': '2',
+ 'bricks': ['10.70.43.133:/bricks/rep1_1',
+ '10.70.43.133:/bricks/rep1_2'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5',
+ 'volumeName': 'rep1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'REPLICATE'}}
+
+
+brickTst2 = {'vol1': {'brickCount': '3',
+ 'bricks': ['10.70.43.133:/bricks/b1',
+ '10.70.43.233:/bricks/my-vol1-add-brck-b2.new',
+ '10.70.43.333:/bricks/vol1/my-new-brick_b3'],
+ 'bricksInfo': [],
+ 'distCount': '1',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '1',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': 'f96856f1-b2c0-4e24-aef0-7f2a01f37b99',
+ 'volumeName': 'vol1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'DISTRIBUTE'},
+ 'rep1': {'brickCount': '2',
+ 'bricks': ['10.70.43.133:/bricks/rep1_1',
+ '10.70.43.133:/bricks/rep1_2'],
+ 'bricksInfo': [],
+ 'distCount': '2',
+ 'options': {'features.quota': 'on', 'nfs.disable': 'on'},
+ 'replicaCount': '2',
+ 'stripeCount': '1',
+ 'transportType': ['TCP'],
+ 'uuid': '53496a0c-94eb-46a2-9312-eeb700e9fba5',
+ 'volumeName': 'rep1',
+ 'volumeStatus': 'ONLINE',
+ 'volumeType': 'REPLICATE'}}
diff --git a/tests/test_gluster_proc.py b/tests/test_gluster_proc.py
new file mode 100644
index 0000000..36a60fd
--- /dev/null
+++ b/tests/test_gluster_proc.py
@@ -0,0 +1,42 @@
+#
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+from testrunner import PluginsTestCase as TestCaseBase
+from plugins import check_proc_status as gProc
+from plugins import nscautils
+from glusternagios import utils
+import check_proc_test_data as gData
+
+
+class TestProc(TestCaseBase):
+ def _maskGetoutputSuccess(self, val):
+ return 0, "PROCS OK:", ""
+
+ def _maskGetoutputCritical(self, val):
+ return 0, "PROCS CRITICAL:", ""
+
+ def _maskSendToNsca(self, hostName, service, status, msg):
+ return service, status, msg
+
+ def test_Nfs(self):
+ nscautils.send_to_nsca = self._maskSendToNsca
+ utils.execCmd = self._maskGetoutputCritical
+
+ assert(gProc.sendNfsStatus("10.70.43.33", gData.nfsEnabled1) == None)