summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSahina Bose <sabose@redhat.com>2014-04-01 17:08:18 +0530
committerBala.FA <barumuga@redhat.com>2014-04-29 10:14:33 +0530
commited8efbc2adb78ab756081b1f57dd23328123a1d5 (patch)
treec42214c1778c1fea724a4479e95f2f9d506ff53d
parent86a404d7ce805a25762cd66c310b1ad9e3a2a779 (diff)
plugins: Plugin to act on syslog message
This plugin integrates with syslog messages and sends the output to Nagios server using NSCA Refactored code to separate out common NSCA related code to nscautils Change-Id: I6a874da7f2296310060742acb6b1cd6562f5d0f0 Signed-off-by: Sahina Bose <sabose@redhat.com>
-rw-r--r--Makefile.am1
-rw-r--r--config/Makefile.am14
-rw-r--r--config/glusternagios.conf.in17
-rw-r--r--config/nagios_server.conf13
-rw-r--r--configure.ac4
-rw-r--r--gluster-nagios-addons.spec.in3
-rw-r--r--plugins/Makefile.am2
-rwxr-xr-xplugins/check_gluster_syslog.py117
-rwxr-xr-xplugins/check_vol_status.py44
-rw-r--r--plugins/nscautils.py.in53
-rw-r--r--tests/Makefile.am1
-rw-r--r--tests/test_check_gluster_syslog.py46
12 files changed, 280 insertions, 35 deletions
diff --git a/Makefile.am b/Makefile.am
index 5ffc3af..fd78c86 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,6 +20,7 @@
# keep sorted
SUBDIRS = \
+ config \
plugins \
$(NULL)
diff --git a/config/Makefile.am b/config/Makefile.am
new file mode 100644
index 0000000..428ac43
--- /dev/null
+++ b/config/Makefile.am
@@ -0,0 +1,14 @@
+rsyslogconfdir = $(sysconfdir)/rsyslog.d
+rsyslogconf_DATA = \
+ glusternagios.conf \
+ $(NULL)
+
+nagiosconfdir = $(sysconfdir)/nagios
+nagiosconf_DATA = \
+ nagios_server.conf \
+ $(NULL)
+
+EXTRA_DIST = \
+ $(nagiosconf_DATA) \
+ $(rsyslogconf_DATA) \
+ $(NULL)
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in
new file mode 100644
index 0000000..004b375
--- /dev/null
+++ b/config/glusternagios.conf.in
@@ -0,0 +1,17 @@
+##### glusternagios.conf #####
+
+$ModLoad omprog
+$actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py
+
+#
+## Pass logs to omprog if app-name is 'gluster'
+#
+$template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+if $app-name contains 'glusterfsd' then :omprog:;GLFS_NAG_Template
+
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+#if $app-name contains 'gluster' then ~
diff --git a/config/nagios_server.conf b/config/nagios_server.conf
new file mode 100644
index 0000000..56c8995
--- /dev/null
+++ b/config/nagios_server.conf
@@ -0,0 +1,13 @@
+# NAGIOS SERVER
+# The nagios server IP address or FQDN to which the NSCA command
+# needs to be sent
+[NAGIOS-SERVER]
+nagios_server=
+
+
+# CLUSTER NAME
+# The host name of the logical cluster configured in Nagios under which
+# the gluster volume services reside
+[NAGIOS-DEFINTIONS]
+cluster_name=
+
diff --git a/configure.ac b/configure.ac
index db4147e..dffb3ea 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,7 @@ AC_SUBST([nagiospluginsdir], ['${libdir}/nagios/plugins'])
AC_SUBST([glusternagiospluginsdir], ['${nagiospluginsdir}/gluster'])
AC_SUBST([glusternagioscommonpylibdir], ['${pyexecdir}/glusternagios'])
AC_SUBST([glusternagiosaddonstestsdir], ['${datarootdir}/${PACKAGE_NAME}/tests'])
+AC_SUBST([nagiosconf], ['/etc/nagios'])
# Checking for pyflakes
AC_PATH_PROG([PYFLAKES], [pyflakes])
@@ -94,7 +95,10 @@ AX_PYTHON_MODULE([selinux], [fatal])
AC_CONFIG_FILES([
Makefile
gluster-nagios-addons.spec
+ config/Makefile
+ config/glusternagios.conf
plugins/Makefile
+ plugins/nscautils.py
plugins/volcap/Makefile
tests/Makefile
tests/run_tests_local.sh
diff --git a/gluster-nagios-addons.spec.in b/gluster-nagios-addons.spec.in
index 390d6e5..969bded 100644
--- a/gluster-nagios-addons.spec.in
+++ b/gluster-nagios-addons.spec.in
@@ -139,6 +139,7 @@ command[discoverlogicalcomponents]=/usr/lib64/nagios/plugins/gluster/discoverlog
EOF
%_init_enable nrpe
%_init_restart crond
+%_init_restart rsyslog
%preun
@@ -155,6 +156,8 @@ sed -i '/check_vol_quota_status/d' %{_sysconfdir}/nagios/nrpe.cfg
%defattr(-,root,root,-)
%attr(0755, -, -) %{_libdir}/nagios/plugins/gluster/*
%{_sysconfdir}/cron.d/gluster-sysstat.crontab
+%{_sysconfdir}/rsyslog.d/glusternagios.conf
+%{_sysconfdir}/nagios/nagios_server.conf
%files tests
%defattr(-,root,root,-)
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index c74cc3e..9bba2d4 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -9,6 +9,7 @@ crond_DATA = \
dist_glusternagiosplugins_PYTHON = \
check_disk_and_inode.py \
+ check_gluster_syslog.py \
check_vol_utilization.py \
check_vol_status.py \
check_volume_status.py \
@@ -19,6 +20,7 @@ dist_glusternagiosplugins_PYTHON = \
__init__.py \
memory.py \
network.py \
+ nscautils.py \
sadf.py \
swap.py \
$(NULL)
diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py
new file mode 100755
index 0000000..a52667c
--- /dev/null
+++ b/plugins/check_gluster_syslog.py
@@ -0,0 +1,117 @@
+#! /usr/bin/python
+# check_gluster_syslog.py
+# Script to act on syslog messages related to gluster
+# and send output to Nagios via nsca
+#
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+import re
+import sys
+import select
+
+import nscautils
+from glusternagios import utils
+
+# skeleton config parameters
+__pollPeriod = 0.75 # the number of seconds between polling for new messages
+__maxAtOnce = 1024 # max no of messages that are processed within one batch
+
+
+def findVolName(pattern):
+ # pattern is of the form <graphid>-<volume name>-<translator name>
+ return pattern[pattern.find('-') + 1:pattern.rfind('-')]
+
+
+def getStatusCode(alertlevel):
+ if alertlevel == 'ALERT':
+ return utils.PluginStatusCode.CRITICAL
+ else:
+ return utils.PluginStatusCode.WARNING
+
+
+def processQuotaMsg(msg, alertlevel):
+ quotapat = re.compile(r'\b\d*-[a-zA-Z0-9_-]*-quota\b')
+ matches = quotapat.search(msg)
+ if matches:
+ volname = findVolName(matches.group())
+ # Now get the actual msg
+ alertMsg = "QUOTA: " + msg[msg.rfind(matches.group()) +
+ len(matches.group()) + 1:]
+ serviceName = nscautils.vol_service_name(volname, "Quota")
+ nscautils.send_to_nsca(nscautils.getNagiosClusterName(),
+ serviceName,
+ getStatusCode(alertlevel),
+ alertMsg)
+
+
+def processMsg(msg):
+ 'Check if msg is indeed from gluster app'
+ custom_logvars = msg[:msg.find(' ')]
+ level = custom_logvars.split('/')[2]
+ appname = custom_logvars.split('/')[3]
+ if appname != 'GLUSTERFSD':
+ return
+ # For gluster messages, need to check the source of message
+ logsource = msg[msg.rfind('['):msg.rfind(']')]
+ if logsource.find('quota') > -1:
+ processQuotaMsg(msg, level)
+
+
+def onReceive(msgs):
+ """This is the entry point where actual work needs to be done. It receives
+ a list with all messages pulled from rsyslog. The list is of variable
+ length, but contains all messages that are currently available. It is
+ suggested NOT to use any further buffering, as we do not know when the
+ next message will arrive. It may be in a nanosecond from now, but it
+ may also be in three hours...
+ """
+ for msg in msgs:
+ processMsg(msg)
+
+
+"""
+-------------------------------------------------------
+This is plumbing that DOES NOT need to be CHANGED
+-------------------------------------------------------
+Implementor's note: Python seems to very agressively
+buffer stdouot. The end result was that rsyslog does not
+receive the script's messages in a timely manner (sometimes
+even never, probably due to races). To prevent this, we
+flush stdout after we have done processing. This is especially
+important once we get to the point where the plugin does
+two-way conversations with rsyslog. Do NOT change this!
+See also: https://github.com/rsyslog/rsyslog/issues/22
+"""
+if __name__ == '__main__':
+ keepRunning = 1
+ while keepRunning == 1:
+ while keepRunning and sys.stdin in \
+ select.select([sys.stdin], [], [], __pollPeriod)[0]:
+ msgs = []
+ while keepRunning and sys.stdin in \
+ select.select([sys.stdin], [], [], 0)[0]:
+ line = sys.stdin.readline()
+ if line:
+ msgs.append(line)
+ else: # an empty line means stdin has been closed
+ keepRunning = 0
+ if len(msgs) >= __maxAtOnce:
+ break
+ if len(msgs) > 0:
+ onReceive(msgs)
+ sys.stdout.flush() # important,Python buffers far too much
+ sys.exit(0)
diff --git a/plugins/check_vol_status.py b/plugins/check_vol_status.py
index 9e526da..33e26ee 100755
--- a/plugins/check_vol_status.py
+++ b/plugins/check_vol_status.py
@@ -1,9 +1,11 @@
#!/usr/bin/python
+
import re
-import commands
import argparse
+import commands
import xml.etree.ElementTree as ET
from glusternagios import utils
+import nscautils
def parseXml(xmldoc, searchStr):
@@ -25,37 +27,6 @@ def getVolumeStatus(vol_status_out):
return vol_status
-def getNagiosServerIP():
- nagiosIP = ""
- nscaConfig = open("/etc/nagios/nagios_server.cfg", "r+")
- for line in nscaConfig.readlines():
- if "nagios_server" in line:
- #print line.rstrip()
- line = line.rstrip()
- nagiosIP = line.rpartition('=')[2]
- #print nagiosIP
- return nagiosIP
-
-
-def send_to_nsca(hostName, serviceName, exitStatus, resultString):
- #print hostName
- #print serviceName
- #print exitStatus
- #print resultString
- f = open('out.txt', 'w')
- print >> f, '%s\t%s\t%s\t%s' % (hostName,
- serviceName,
- exitStatus,
- resultString)
- f.close()
- nagiosIP = getNagiosServerIP()
- command_send_nsca = "send_nsca -H " + nagiosIP + \
- " -c /etc/nagios/send_nsca.cfg < out.txt"
- #print command_send_nsca
- commands.getoutput(command_send_nsca)
- #print nsca_stat
-
-
def showBrickStatus(vol_status_out):
ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
xmlElemList = []
@@ -83,7 +54,10 @@ def showBrickStatus(vol_status_out):
else:
exitStatus = utils.PluginStatusCode.CRITICAL
resultString = "Brick Status: CRITICAL"
- send_to_nsca(brickIP, brickName, exitStatus, resultString)
+ nscautils.send_to_nsca(brickIP,
+ brickName,
+ exitStatus,
+ resultString)
def showVolumeStatus(vol_status_out, volName, clusterName):
@@ -94,7 +68,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName):
#brick_list = []
resultString = ""
exitStatus = utils.PluginStatusCode.OK
- serviceName = "Volume-%s-Status" % volName
+ serviceName = nscautils.vol_service_name(volName)
ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
vol_status = getVolumeStatus(vol_status_out)
if vol_status == "Started":
@@ -132,7 +106,7 @@ def showVolumeStatus(vol_status_out, volName, clusterName):
brick_online)
exitStatus = utils.PluginStatusCode.OK
- send_to_nsca(clusterName, serviceName, exitStatus, resultString)
+ nscautils.send_to_nsca(clusterName, serviceName, exitStatus, resultString)
def parse_input():
diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in
new file mode 100644
index 0000000..289d0d1
--- /dev/null
+++ b/plugins/nscautils.py.in
@@ -0,0 +1,53 @@
+# nscautils.py --utility methods to interact with Nagios NSCA
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import ConfigParser
+
+from glusternagios import utils
+
+__NAGIOSSERVER_CONF = "@nagiosconf@/nagios_server.conf"
+__NSCA_CONF_PATH = "@nagiosconf@/send_nsca.cfg"
+__NSCA_CMD_PATH = utils.CommandPath("nsca", "/usr/sbin/send_nsca")
+
+
+def getNagiosServerIP():
+ config = ConfigParser.ConfigParser()
+ config.read(__NAGIOSSERVER_CONF)
+ return config.get('NAGIOS-SERVER', 'nagios_server')
+
+
+def getNagiosClusterName():
+ config = ConfigParser.ConfigParser()
+ config.read(__NAGIOSSERVER_CONF)
+ return config.get('NAGIOS-DEFINTIONS', 'cluster_name')
+
+
+def send_to_nsca(hostName, serviceName, exitStatus, resultString):
+ cmddata = '%s\t%s\t%s\t%s' % (hostName,
+ serviceName,
+ exitStatus,
+ resultString)
+ nagiosIP = getNagiosServerIP()
+ command_send_nsca = [__NSCA_CMD_PATH, '-H', nagiosIP,
+ '-c', __NSCA_CONF_PATH]
+ ret, out, err = utils.execCmd(command_send_nsca, data=cmddata)
+ return ret
+
+
+def vol_service_name(volName, statusType=None):
+ return "Volume Status %s - %s" % (statusType, volName)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index e8ab026..a540f11 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -19,6 +19,7 @@
#
test_modules = \
+ test_check_gluster_syslog.py \
test_check_volume_status.py \
test_cpu.py \
test_cpu_dataFile.py \
diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py
new file mode 100644
index 0000000..a6dce45
--- /dev/null
+++ b/tests/test_check_gluster_syslog.py
@@ -0,0 +1,46 @@
+#
+# Copyright 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+#
+# Refer to the README and COPYING files for full details of the license
+#
+
+import mock
+
+from testrunner import PluginsTestCase as TestCaseBase
+from plugins import check_gluster_syslog
+from glusternagios import utils
+
+
+class TestGlusterSyslog(TestCaseBase):
+
+ # Method to test volume perf data when no matching host method
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca')
+ def test_checkProcessMsg(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/USER/CRIT/GLUSTERFSD [2014-04-06T21:45:33.378443+05:30] "
+ "glusterfsd: [2014-04-06 15:46:59.390038] "
+ "A [quota.c:3670:quota_log_usage] 0-test-vol-quota:"
+ "Usage is above soft limit: 300.0KB used by /test/")
+ check_gluster_syslog.processMsg(message)
+ mock_send_to_nsca.assert_called_with("test-cluster",
+ "Volume Status Quota - test-vol",
+ utils.PluginStatusCode.WARNING,
+ "QUOTA: Usage is "
+ "above soft limit: "
+ "300.0KB used by /test/")