diff options
| -rw-r--r-- | config/glusternagios.conf.in | 2 | ||||
| -rwxr-xr-x | plugins/check_gluster_syslog.py | 35 | ||||
| -rw-r--r-- | plugins/nscautils.py.in | 20 | ||||
| -rw-r--r-- | tests/test_check_gluster_syslog.py | 34 | 
4 files changed, 78 insertions, 13 deletions
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in index 1327c31..e3a8bc0 100644 --- a/config/glusternagios.conf.in +++ b/config/glusternagios.conf.in @@ -8,7 +8,7 @@ $actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py  #  $template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n" -if ($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee') then :omprog:;GLFS_NAG_Template +if (($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee')) or ($msg contains 'quorum') then :omprog:;GLFS_NAG_Template  # diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py index 62464dd..e71647a 100755 --- a/plugins/check_gluster_syslog.py +++ b/plugins/check_gluster_syslog.py @@ -22,6 +22,7 @@  import re  import sys  import select +import logging  import nscautils  from glusternagios import utils @@ -58,14 +59,44 @@ def processQuotaMsg(msg, alertlevel):                                         alertMsg) +def processQuorumMsg(msgid, msg, level): +    logger = logging.getLogger("processQuorumMsg") +    pluginstatus = None +    #if msgid == 106002: +    if "[MSGID: 106002]" in msg or "[MSGID: 106001]" in msg: +        # [MSGID: 106002] Server quorum lost for volume dist. +        #  Stopping local bricks. +        # [MSGID: 106001] Server quorum not met. Rejecting operation. +        pluginstatus = utils.PluginStatusCode.CRITICAL +    #elif msgid == 106003: +    elif "[MSGID: 106003]" in msg: +        # [MSGID: 106003] Server quorum regained for volume dist. +        #  Starting local bricks. +        pluginstatus = utils.PluginStatusCode.OK + +    if pluginstatus >= 0: +        serviceName = "Cluster - Quorum" +        alertMsg = "QUORUM:" + msg[msg.rfind(':') + 1:] +        ret = nscautils.send_to_nsca_subproc(nscautils.getNagiosClusterName(), +                                             serviceName, +                                             pluginstatus, +                                             alertMsg) +        logger.debug(" nsca ret code for alertMsg %s - %s" % (alertMsg, ret)) + +  def processMsg(msg):      'Check if msg is indeed from gluster app'      custom_logvars = msg[:msg.find(' ')]      level = custom_logvars.split('/')[2] +    msgid = custom_logvars.split('/')[0] +    # if msgid in ([106001,106002,106003]): +    # msgid is not populated correctly, so for now use below +    if "[MSGID: 10600" in msg: +        return processQuorumMsg(msgid, msg, level)      # For gluster messages, need to check the source of message      logsource = msg[msg.rfind('['):msg.rfind(']')]      if logsource.find('quota') > -1: -        processQuotaMsg(msg, level) +        return processQuotaMsg(msg, level)  def onReceive(msgs): @@ -94,6 +125,8 @@ two-way conversations with rsyslog. Do NOT change this!  See also: https://github.com/rsyslog/rsyslog/issues/22  """  if __name__ == '__main__': +    logging.basicConfig() +    logger = logging.getLogger(__name__)      keepRunning = 1      while keepRunning == 1:          while keepRunning and sys.stdin in \ diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in index cf5f58b..703e13f 100644 --- a/plugins/nscautils.py.in +++ b/plugins/nscautils.py.in @@ -69,15 +69,15 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):                                      resultString)      nagiosIP = getNagiosServerIP()      p = subprocess.Popen( -            args=( -                  __NSCA_CMD_PATH.cmd, -                  '-c',  __NSCA_CONF_PATH, -                  '-H', nagiosIP, -            ), -            stdin=subprocess.PIPE, -            stdout=subprocess.PIPE, -            stderr=subprocess.PIPE, -            close_fds=True, +        args=( +            __NSCA_CMD_PATH.cmd, +            '-c',  __NSCA_CONF_PATH, +            '-H', nagiosIP, +        ), +        stdin=subprocess.PIPE, +        stdout=subprocess.PIPE, +        stderr=subprocess.PIPE, +        close_fds=True,          )      stdout, stderr = p.communicate(input=cmddata) @@ -86,4 +86,4 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):  def vol_service_name(volName, statusType=None): -    return "Volume Status %s - %s" % (statusType, volName) +    return "Volume %s - %s" % (statusType, volName) diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py index c8c5d6a..bfa6ec1 100644 --- a/tests/test_check_gluster_syslog.py +++ b/tests/test_check_gluster_syslog.py @@ -39,8 +39,40 @@ class TestGlusterSyslog(TestCaseBase):                     "Usage is above soft limit: 300.0KB used by /test/")          check_gluster_syslog.processMsg(message)          mock_send_to_nsca.assert_called_with("test-cluster", -                                             "Volume Status Quota - test-vol", +                                             "Volume Quota - test-vol",                                               utils.PluginStatusCode.WARNING,                                               "QUOTA: Usage is "                                               "above soft limit: "                                               "300.0KB used by /test/") + +    @mock.patch('plugins.nscautils.getNagiosClusterName') +    @mock.patch('plugins.nscautils.send_to_nsca_subproc') +    def test_checkProcessMsgForQuorum(self, mock_send_to_nsca, +                                      mock_getNagiosClusterName): +        mock_getNagiosClusterName.return_value = "test-cluster" +        message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " +                   "[2014-05-02T12:40:14.562509+00:00]  " +                   "[2014-05-02 12:40:14.559662] C [MSGID: 106002] " +                   "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " +                   "0-management: Server quorum lost for volume dist. " +                   "Stopping local bricks. ") +        check_gluster_syslog.processMsg(message) +        mock_send_to_nsca.assert_called_with("test-cluster", +                                             "Cluster - Quorum", +                                             utils.PluginStatusCode.CRITICAL, +                                             "QUORUM: Server quorum lost " +                                             "for volume dist. " +                                             "Stopping local bricks. ") + +    @mock.patch('plugins.nscautils.getNagiosClusterName') +    @mock.patch('plugins.nscautils.send_to_nsca_subproc') +    def test_checkProcessInvalidMsgForQuorum(self, mock_send_to_nsca, +                                             mock_getNagiosClusterName): +        mock_getNagiosClusterName.return_value = "test-cluster" +        message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL " +                   "[2014-05-02T12:40:14.562509+00:00]  " +                   "[2014-05-02 12:40:14.559662] C " +                   "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] " +                   "0-management: Random quorum message ") +        check_gluster_syslog.processMsg(message) +        assert not mock_send_to_nsca.called, "send nsca should not be called"  | 
