diff options
-rw-r--r-- | config/glusternagios.conf.in | 2 | ||||
-rwxr-xr-x | plugins/check_gluster_syslog.py | 35 | ||||
-rw-r--r-- | plugins/nscautils.py.in | 20 | ||||
-rw-r--r-- | tests/test_check_gluster_syslog.py | 34 |
4 files changed, 78 insertions, 13 deletions
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in index 1327c31..e3a8bc0 100644 --- a/config/glusternagios.conf.in +++ b/config/glusternagios.conf.in @@ -8,7 +8,7 @@ $actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py # $template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n" -if ($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee') then :omprog:;GLFS_NAG_Template +if (($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee')) or ($msg contains 'quorum') then :omprog:;GLFS_NAG_Template # diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py index 62464dd..e71647a 100755 --- a/plugins/check_gluster_syslog.py +++ b/plugins/check_gluster_syslog.py @@ -22,6 +22,7 @@ import re import sys import select +import logging import nscautils from glusternagios import utils @@ -58,14 +59,44 @@ def processQuotaMsg(msg, alertlevel): alertMsg) +def processQuorumMsg(msgid, msg, level): + logger = logging.getLogger("processQuorumMsg") + pluginstatus = None + #if msgid == 106002: + if "[MSGID: 106002]" in msg or "[MSGID: 106001]" in msg: + # [MSGID: 106002] Server quorum lost for volume dist. + # Stopping local bricks. + # [MSGID: 106001] Server quorum not met. Rejecting operation. + pluginstatus = utils.PluginStatusCode.CRITICAL + #elif msgid == 106003: + elif "[MSGID: 106003]" in msg: + # [MSGID: 106003] Server quorum regained for volume dist. + # Starting local bricks. + pluginstatus = utils.PluginStatusCode.OK + + if pluginstatus >= 0: + serviceName = "Cluster - Quorum" + alertMsg = "QUORUM:" + msg[msg.rfind(':') + 1:] + ret = nscautils.send_to_nsca_subproc(nscautils.getNagiosClusterName(), + serviceName, + pluginstatus, + alertMsg) + logger.debug(" nsca ret code for alertMsg %s - %s" % (alertMsg, ret)) + + def processMsg(msg): 'Check if msg is indeed from gluster app' custom_logvars = msg[:msg.find(' ')] level = custom_logvars.split('/')[2] + msgid = custom_logvars.split('/')[0] + # if msgid in ([106001,106002,106003]): + # msgid is not populated correctly, so for now use below + if "[MSGID: 10600" in msg: + return processQuorumMsg(msgid, msg, level) # For gluster messages, need to check the source of message logsource = msg[msg.rfind('['):msg.rfind(']')] if logsource.find('quota') > -1: - processQuotaMsg(msg, level) + return processQuotaMsg(msg, level) def onReceive(msgs): @@ -94,6 +125,8 @@ two-way conversations with rsyslog. Do NOT change this! See also: https://github.com/rsyslog/rsyslog/issues/22 """ if __name__ == '__main__': + logging.basicConfig() + logger = logging.getLogger(__name__) keepRunning = 1 while keepRunning == 1: while keepRunning and sys.stdin in \ diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in index cf5f58b..703e13f 100644 --- a/plugins/nscautils.py.in +++ b/plugins/nscautils.py.in @@ -69,15 +69,15 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString): resultString) nagiosIP = getNagiosServerIP() p = subprocess.Popen( - args=( - __NSCA_CMD_PATH.cmd, - '-c', __NSCA_CONF_PATH, - '-H', nagiosIP, - ), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True, + args=( + __NSCA_CMD_PATH.cmd, + '-c', __NSCA_CONF_PATH, + '-H', nagiosIP, + ), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, ) stdout, stderr = p.communicate(input=cmddata) @@ -86,4 +86,4 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString): def vol_service_name(volName, statusType=None): - return "Volume Status %s - %s" % (statusType, volName) + return "Volume %s - %s" % (statusType, volName) diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py index c8c5d6a..bfa6ec1 100644 --- a/tests/test_check_gluster_syslog.py +++ b/tests/test_check_gluster_syslog.py @@ -39,8 +39,40 @@ class TestGlusterSyslog(TestCaseBase): "Usage is above soft limit: 300.0KB used by /test/") check_gluster_syslog.processMsg(message) mock_send_to_nsca.assert_called_with("test-cluster", - "Volume Status Quota - test-vol", + "Volume Quota - test-vol", utils.PluginStatusCode.WARNING, "QUOTA: Usage is " "above soft limit: " "300.0KB used by /test/") + + @mock.patch('plugins.nscautils.getNagiosClusterName') + @mock.patch('plugins.nscautils.send_to_nsca_subproc') + def test_checkProcessMsgForQuorum(self, mock_send_to_nsca, + mock_getNagiosClusterName): + mock_getNagiosClusterName.return_value = "test-cluster" + message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " + "[2014-05-02T12:40:14.562509+00:00] " + "[2014-05-02 12:40:14.559662] C [MSGID: 106002] " + "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " + "0-management: Server quorum lost for volume dist. " + "Stopping local bricks. ") + check_gluster_syslog.processMsg(message) + mock_send_to_nsca.assert_called_with("test-cluster", + "Cluster - Quorum", + utils.PluginStatusCode.CRITICAL, + "QUORUM: Server quorum lost " + "for volume dist. " + "Stopping local bricks. ") + + @mock.patch('plugins.nscautils.getNagiosClusterName') + @mock.patch('plugins.nscautils.send_to_nsca_subproc') + def test_checkProcessInvalidMsgForQuorum(self, mock_send_to_nsca, + mock_getNagiosClusterName): + mock_getNagiosClusterName.return_value = "test-cluster" + message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " + "[2014-05-02T12:40:14.562509+00:00] " + "[2014-05-02 12:40:14.559662] C " + "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " + "0-management: Random quorum message ") + check_gluster_syslog.processMsg(message) + assert not mock_send_to_nsca.called, "send nsca should not be called" |