From 4d66729bfae280e3765f11b34c34617e499ab25e Mon Sep 17 00:00:00 2001 From: Sahina Bose Date: Fri, 2 May 2014 16:49:16 +0530 Subject: plugins: Plugin to handle server quorum Plugin to handle log messages related to server quorum Message is parsed for msgid as the msgid is not correctly populated now Fixed pep8 errors in nscautils and added tests Also changed the service name to remove "Status" -it now reads Volume Quota, for instance Change-Id: I339ed16b3806f853ae3555a998974c47157c95ad Signed-off-by: Sahina Bose Reviewed-on: http://review.gluster.org/7648 Reviewed-by: Shubhendu Tripathi Reviewed-by: Ramesh N Reviewed-by: Kanagaraj M --- config/glusternagios.conf.in | 2 +- plugins/check_gluster_syslog.py | 35 ++++++++++++++++++++++++++++++++++- plugins/nscautils.py.in | 20 ++++++++++---------- tests/test_check_gluster_syslog.py | 34 +++++++++++++++++++++++++++++++++- 4 files changed, 78 insertions(+), 13 deletions(-) diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in index 1327c31..e3a8bc0 100644 --- a/config/glusternagios.conf.in +++ b/config/glusternagios.conf.in @@ -8,7 +8,7 @@ $actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py # $template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n" -if ($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee') then :omprog:;GLFS_NAG_Template +if (($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee')) or ($msg contains 'quorum') then :omprog:;GLFS_NAG_Template # diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py index 62464dd..e71647a 100755 --- a/plugins/check_gluster_syslog.py +++ b/plugins/check_gluster_syslog.py @@ -22,6 +22,7 @@ import re import sys import select +import logging import nscautils from glusternagios import utils @@ -58,14 +59,44 @@ def processQuotaMsg(msg, alertlevel): alertMsg) +def processQuorumMsg(msgid, msg, level): + logger = logging.getLogger("processQuorumMsg") + pluginstatus = None + #if msgid == 106002: + if "[MSGID: 106002]" in msg or "[MSGID: 106001]" in msg: + # [MSGID: 106002] Server quorum lost for volume dist. + # Stopping local bricks. + # [MSGID: 106001] Server quorum not met. Rejecting operation. + pluginstatus = utils.PluginStatusCode.CRITICAL + #elif msgid == 106003: + elif "[MSGID: 106003]" in msg: + # [MSGID: 106003] Server quorum regained for volume dist. + # Starting local bricks. + pluginstatus = utils.PluginStatusCode.OK + + if pluginstatus >= 0: + serviceName = "Cluster - Quorum" + alertMsg = "QUORUM:" + msg[msg.rfind(':') + 1:] + ret = nscautils.send_to_nsca_subproc(nscautils.getNagiosClusterName(), + serviceName, + pluginstatus, + alertMsg) + logger.debug(" nsca ret code for alertMsg %s - %s" % (alertMsg, ret)) + + def processMsg(msg): 'Check if msg is indeed from gluster app' custom_logvars = msg[:msg.find(' ')] level = custom_logvars.split('/')[2] + msgid = custom_logvars.split('/')[0] + # if msgid in ([106001,106002,106003]): + # msgid is not populated correctly, so for now use below + if "[MSGID: 10600" in msg: + return processQuorumMsg(msgid, msg, level) # For gluster messages, need to check the source of message logsource = msg[msg.rfind('['):msg.rfind(']')] if logsource.find('quota') > -1: - processQuotaMsg(msg, level) + return processQuotaMsg(msg, level) def onReceive(msgs): @@ -94,6 +125,8 @@ two-way conversations with rsyslog. Do NOT change this! See also: https://github.com/rsyslog/rsyslog/issues/22 """ if __name__ == '__main__': + logging.basicConfig() + logger = logging.getLogger(__name__) keepRunning = 1 while keepRunning == 1: while keepRunning and sys.stdin in \ diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in index cf5f58b..703e13f 100644 --- a/plugins/nscautils.py.in +++ b/plugins/nscautils.py.in @@ -69,15 +69,15 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString): resultString) nagiosIP = getNagiosServerIP() p = subprocess.Popen( - args=( - __NSCA_CMD_PATH.cmd, - '-c', __NSCA_CONF_PATH, - '-H', nagiosIP, - ), - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - close_fds=True, + args=( + __NSCA_CMD_PATH.cmd, + '-c', __NSCA_CONF_PATH, + '-H', nagiosIP, + ), + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, ) stdout, stderr = p.communicate(input=cmddata) @@ -86,4 +86,4 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString): def vol_service_name(volName, statusType=None): - return "Volume Status %s - %s" % (statusType, volName) + return "Volume %s - %s" % (statusType, volName) diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py index c8c5d6a..bfa6ec1 100644 --- a/tests/test_check_gluster_syslog.py +++ b/tests/test_check_gluster_syslog.py @@ -39,8 +39,40 @@ class TestGlusterSyslog(TestCaseBase): "Usage is above soft limit: 300.0KB used by /test/") check_gluster_syslog.processMsg(message) mock_send_to_nsca.assert_called_with("test-cluster", - "Volume Status Quota - test-vol", + "Volume Quota - test-vol", utils.PluginStatusCode.WARNING, "QUOTA: Usage is " "above soft limit: " "300.0KB used by /test/") + + @mock.patch('plugins.nscautils.getNagiosClusterName') + @mock.patch('plugins.nscautils.send_to_nsca_subproc') + def test_checkProcessMsgForQuorum(self, mock_send_to_nsca, + mock_getNagiosClusterName): + mock_getNagiosClusterName.return_value = "test-cluster" + message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " + "[2014-05-02T12:40:14.562509+00:00] " + "[2014-05-02 12:40:14.559662] C [MSGID: 106002] " + "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " + "0-management: Server quorum lost for volume dist. " + "Stopping local bricks. ") + check_gluster_syslog.processMsg(message) + mock_send_to_nsca.assert_called_with("test-cluster", + "Cluster - Quorum", + utils.PluginStatusCode.CRITICAL, + "QUORUM: Server quorum lost " + "for volume dist. " + "Stopping local bricks. ") + + @mock.patch('plugins.nscautils.getNagiosClusterName') + @mock.patch('plugins.nscautils.send_to_nsca_subproc') + def test_checkProcessInvalidMsgForQuorum(self, mock_send_to_nsca, + mock_getNagiosClusterName): + mock_getNagiosClusterName.return_value = "test-cluster" + message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " + "[2014-05-02T12:40:14.562509+00:00] " + "[2014-05-02 12:40:14.559662] C " + "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " + "0-management: Random quorum message ") + check_gluster_syslog.processMsg(message) + assert not mock_send_to_nsca.called, "send nsca should not be called" -- cgit