summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config/glusternagios.conf.in2
-rwxr-xr-xplugins/check_gluster_syslog.py35
-rw-r--r--plugins/nscautils.py.in20
-rw-r--r--tests/test_check_gluster_syslog.py34
4 files changed, 78 insertions, 13 deletions
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in
index 1327c31..e3a8bc0 100644
--- a/config/glusternagios.conf.in
+++ b/config/glusternagios.conf.in
@@ -8,7 +8,7 @@ $actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py
#
$template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
-if ($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee') then :omprog:;GLFS_NAG_Template
+if (($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee')) or ($msg contains 'quorum') then :omprog:;GLFS_NAG_Template
#
diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py
index 62464dd..e71647a 100755
--- a/plugins/check_gluster_syslog.py
+++ b/plugins/check_gluster_syslog.py
@@ -22,6 +22,7 @@
import re
import sys
import select
+import logging
import nscautils
from glusternagios import utils
@@ -58,14 +59,44 @@ def processQuotaMsg(msg, alertlevel):
alertMsg)
+def processQuorumMsg(msgid, msg, level):
+ logger = logging.getLogger("processQuorumMsg")
+ pluginstatus = None
+ #if msgid == 106002:
+ if "[MSGID: 106002]" in msg or "[MSGID: 106001]" in msg:
+ # [MSGID: 106002] Server quorum lost for volume dist.
+ # Stopping local bricks.
+ # [MSGID: 106001] Server quorum not met. Rejecting operation.
+ pluginstatus = utils.PluginStatusCode.CRITICAL
+ #elif msgid == 106003:
+ elif "[MSGID: 106003]" in msg:
+ # [MSGID: 106003] Server quorum regained for volume dist.
+ # Starting local bricks.
+ pluginstatus = utils.PluginStatusCode.OK
+
+ if pluginstatus >= 0:
+ serviceName = "Cluster - Quorum"
+ alertMsg = "QUORUM:" + msg[msg.rfind(':') + 1:]
+ ret = nscautils.send_to_nsca_subproc(nscautils.getNagiosClusterName(),
+ serviceName,
+ pluginstatus,
+ alertMsg)
+ logger.debug(" nsca ret code for alertMsg %s - %s" % (alertMsg, ret))
+
+
def processMsg(msg):
'Check if msg is indeed from gluster app'
custom_logvars = msg[:msg.find(' ')]
level = custom_logvars.split('/')[2]
+ msgid = custom_logvars.split('/')[0]
+ # if msgid in ([106001,106002,106003]):
+ # msgid is not populated correctly, so for now use below
+ if "[MSGID: 10600" in msg:
+ return processQuorumMsg(msgid, msg, level)
# For gluster messages, need to check the source of message
logsource = msg[msg.rfind('['):msg.rfind(']')]
if logsource.find('quota') > -1:
- processQuotaMsg(msg, level)
+ return processQuotaMsg(msg, level)
def onReceive(msgs):
@@ -94,6 +125,8 @@ two-way conversations with rsyslog. Do NOT change this!
See also: https://github.com/rsyslog/rsyslog/issues/22
"""
if __name__ == '__main__':
+ logging.basicConfig()
+ logger = logging.getLogger(__name__)
keepRunning = 1
while keepRunning == 1:
while keepRunning and sys.stdin in \
diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in
index cf5f58b..703e13f 100644
--- a/plugins/nscautils.py.in
+++ b/plugins/nscautils.py.in
@@ -69,15 +69,15 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):
resultString)
nagiosIP = getNagiosServerIP()
p = subprocess.Popen(
- args=(
- __NSCA_CMD_PATH.cmd,
- '-c', __NSCA_CONF_PATH,
- '-H', nagiosIP,
- ),
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True,
+ args=(
+ __NSCA_CMD_PATH.cmd,
+ '-c', __NSCA_CONF_PATH,
+ '-H', nagiosIP,
+ ),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True,
)
stdout, stderr = p.communicate(input=cmddata)
@@ -86,4 +86,4 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):
def vol_service_name(volName, statusType=None):
- return "Volume Status %s - %s" % (statusType, volName)
+ return "Volume %s - %s" % (statusType, volName)
diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py
index c8c5d6a..bfa6ec1 100644
--- a/tests/test_check_gluster_syslog.py
+++ b/tests/test_check_gluster_syslog.py
@@ -39,8 +39,40 @@ class TestGlusterSyslog(TestCaseBase):
"Usage is above soft limit: 300.0KB used by /test/")
check_gluster_syslog.processMsg(message)
mock_send_to_nsca.assert_called_with("test-cluster",
- "Volume Status Quota - test-vol",
+ "Volume Quota - test-vol",
utils.PluginStatusCode.WARNING,
"QUOTA: Usage is "
"above soft limit: "
"300.0KB used by /test/")
+
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca_subproc')
+ def test_checkProcessMsgForQuorum(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL "
+ "[2014-05-02T12:40:14.562509+00:00] "
+ "[2014-05-02 12:40:14.559662] C [MSGID: 106002] "
+ "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] "
+ "0-management: Server quorum lost for volume dist. "
+ "Stopping local bricks. ")
+ check_gluster_syslog.processMsg(message)
+ mock_send_to_nsca.assert_called_with("test-cluster",
+ "Cluster - Quorum",
+ utils.PluginStatusCode.CRITICAL,
+ "QUORUM: Server quorum lost "
+ "for volume dist. "
+ "Stopping local bricks. ")
+
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca_subproc')
+ def test_checkProcessInvalidMsgForQuorum(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL "
+ "[2014-05-02T12:40:14.562509+00:00] "
+ "[2014-05-02 12:40:14.559662] C "
+ "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] "
+ "0-management: Random quorum message ")
+ check_gluster_syslog.processMsg(message)
+ assert not mock_send_to_nsca.called, "send nsca should not be called"