summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSahina Bose <sabose@redhat.com>2014-05-02 16:49:16 +0530
committerSahina Bose <sabose@redhat.com>2014-05-05 02:38:50 -0700
commit4d66729bfae280e3765f11b34c34617e499ab25e (patch)
tree93160454593090200c04533357ab7d6a333f7175
parentec287fcc84f8bebb52ccaad63f894689fb79862d (diff)
plugins: Plugin to handle server quorum
Plugin to handle log messages related to server quorum Message is parsed for msgid as the msgid is not correctly populated now Fixed pep8 errors in nscautils and added tests Also changed the service name to remove "Status" -it now reads Volume Quota, for instance Change-Id: I339ed16b3806f853ae3555a998974c47157c95ad Signed-off-by: Sahina Bose <sabose@redhat.com> Reviewed-on: http://review.gluster.org/7648 Reviewed-by: Shubhendu Tripathi <shtripat@redhat.com> Reviewed-by: Ramesh N <rnachimu@redhat.com> Reviewed-by: Kanagaraj M <kmayilsa@redhat.com>
-rw-r--r--config/glusternagios.conf.in2
-rwxr-xr-xplugins/check_gluster_syslog.py35
-rw-r--r--plugins/nscautils.py.in20
-rw-r--r--tests/test_check_gluster_syslog.py34
4 files changed, 78 insertions, 13 deletions
diff --git a/config/glusternagios.conf.in b/config/glusternagios.conf.in
index 1327c31..e3a8bc0 100644
--- a/config/glusternagios.conf.in
+++ b/config/glusternagios.conf.in
@@ -8,7 +8,7 @@ $actionomprogbinary @glusternagiospluginsdir@/check_gluster_syslog.py
#
$template GLFS_NAG_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase%/%app-name:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
-if ($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee') then :omprog:;GLFS_NAG_Template
+if (($app-name contains 'glusterfsd' or $programname startswith 'bricks') and not ($msg contains '@cee')) or ($msg contains 'quorum') then :omprog:;GLFS_NAG_Template
#
diff --git a/plugins/check_gluster_syslog.py b/plugins/check_gluster_syslog.py
index 62464dd..e71647a 100755
--- a/plugins/check_gluster_syslog.py
+++ b/plugins/check_gluster_syslog.py
@@ -22,6 +22,7 @@
import re
import sys
import select
+import logging
import nscautils
from glusternagios import utils
@@ -58,14 +59,44 @@ def processQuotaMsg(msg, alertlevel):
alertMsg)
+def processQuorumMsg(msgid, msg, level):
+ logger = logging.getLogger("processQuorumMsg")
+ pluginstatus = None
+ #if msgid == 106002:
+ if "[MSGID: 106002]" in msg or "[MSGID: 106001]" in msg:
+ # [MSGID: 106002] Server quorum lost for volume dist.
+ # Stopping local bricks.
+ # [MSGID: 106001] Server quorum not met. Rejecting operation.
+ pluginstatus = utils.PluginStatusCode.CRITICAL
+ #elif msgid == 106003:
+ elif "[MSGID: 106003]" in msg:
+ # [MSGID: 106003] Server quorum regained for volume dist.
+ # Starting local bricks.
+ pluginstatus = utils.PluginStatusCode.OK
+
+ if pluginstatus >= 0:
+ serviceName = "Cluster - Quorum"
+ alertMsg = "QUORUM:" + msg[msg.rfind(':') + 1:]
+ ret = nscautils.send_to_nsca_subproc(nscautils.getNagiosClusterName(),
+ serviceName,
+ pluginstatus,
+ alertMsg)
+ logger.debug(" nsca ret code for alertMsg %s - %s" % (alertMsg, ret))
+
+
def processMsg(msg):
'Check if msg is indeed from gluster app'
custom_logvars = msg[:msg.find(' ')]
level = custom_logvars.split('/')[2]
+ msgid = custom_logvars.split('/')[0]
+ # if msgid in ([106001,106002,106003]):
+ # msgid is not populated correctly, so for now use below
+ if "[MSGID: 10600" in msg:
+ return processQuorumMsg(msgid, msg, level)
# For gluster messages, need to check the source of message
logsource = msg[msg.rfind('['):msg.rfind(']')]
if logsource.find('quota') > -1:
- processQuotaMsg(msg, level)
+ return processQuotaMsg(msg, level)
def onReceive(msgs):
@@ -94,6 +125,8 @@ two-way conversations with rsyslog. Do NOT change this!
See also: https://github.com/rsyslog/rsyslog/issues/22
"""
if __name__ == '__main__':
+ logging.basicConfig()
+ logger = logging.getLogger(__name__)
keepRunning = 1
while keepRunning == 1:
while keepRunning and sys.stdin in \
diff --git a/plugins/nscautils.py.in b/plugins/nscautils.py.in
index cf5f58b..703e13f 100644
--- a/plugins/nscautils.py.in
+++ b/plugins/nscautils.py.in
@@ -69,15 +69,15 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):
resultString)
nagiosIP = getNagiosServerIP()
p = subprocess.Popen(
- args=(
- __NSCA_CMD_PATH.cmd,
- '-c', __NSCA_CONF_PATH,
- '-H', nagiosIP,
- ),
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- close_fds=True,
+ args=(
+ __NSCA_CMD_PATH.cmd,
+ '-c', __NSCA_CONF_PATH,
+ '-H', nagiosIP,
+ ),
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ close_fds=True,
)
stdout, stderr = p.communicate(input=cmddata)
@@ -86,4 +86,4 @@ def send_to_nsca_subproc(hostName, serviceName, exitStatus, resultString):
def vol_service_name(volName, statusType=None):
- return "Volume Status %s - %s" % (statusType, volName)
+ return "Volume %s - %s" % (statusType, volName)
diff --git a/tests/test_check_gluster_syslog.py b/tests/test_check_gluster_syslog.py
index c8c5d6a..bfa6ec1 100644
--- a/tests/test_check_gluster_syslog.py
+++ b/tests/test_check_gluster_syslog.py
@@ -39,8 +39,40 @@ class TestGlusterSyslog(TestCaseBase):
"Usage is above soft limit: 300.0KB used by /test/")
check_gluster_syslog.processMsg(message)
mock_send_to_nsca.assert_called_with("test-cluster",
- "Volume Status Quota - test-vol",
+ "Volume Quota - test-vol",
utils.PluginStatusCode.WARNING,
"QUOTA: Usage is "
"above soft limit: "
"300.0KB used by /test/")
+
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca_subproc')
+ def test_checkProcessMsgForQuorum(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL "
+ "[2014-05-02T12:40:14.562509+00:00] "
+ "[2014-05-02 12:40:14.559662] C [MSGID: 106002] "
+ "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] "
+ "0-management: Server quorum lost for volume dist. "
+ "Stopping local bricks. ")
+ check_gluster_syslog.processMsg(message)
+ mock_send_to_nsca.assert_called_with("test-cluster",
+ "Cluster - Quorum",
+ utils.PluginStatusCode.CRITICAL,
+ "QUORUM: Server quorum lost "
+ "for volume dist. "
+ "Stopping local bricks. ")
+
+ @mock.patch('plugins.nscautils.getNagiosClusterName')
+ @mock.patch('plugins.nscautils.send_to_nsca_subproc')
+ def test_checkProcessInvalidMsgForQuorum(self, mock_send_to_nsca,
+ mock_getNagiosClusterName):
+ mock_getNagiosClusterName.return_value = "test-cluster"
+ message = ("-/DAEMON/CRIT/ETC-GLUSTERFS-GLUSTERD.VOL "
+ "[2014-05-02T12:40:14.562509+00:00] "
+ "[2014-05-02 12:40:14.559662] C "
+ "[glusterd-utils.c:3376:glusterd_do_volume_quorum_action] "
+ "0-management: Random quorum message ")
+ check_gluster_syslog.processMsg(message)
+ assert not mock_send_to_nsca.called, "send nsca should not be called"