diff options
Diffstat (limited to 'plugins/gluster_host_service_handler.py')
-rwxr-xr-x | plugins/gluster_host_service_handler.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/plugins/gluster_host_service_handler.py b/plugins/gluster_host_service_handler.py new file mode 100755 index 0000000..283ac69 --- /dev/null +++ b/plugins/gluster_host_service_handler.py @@ -0,0 +1,154 @@ +#!/usr/bin/python +# +# gluster_host_service_handler.py -- Event handler which checks the +# status of defined services and accordingly changes the host status +# +# Copyright (C) 2014 Red Hat Inc +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,USA +# + +import os +import sys +import datetime +import socket +import getopt + +STATUS_OK = "OK" +STATUS_WARNING = "WARNING" +STATUS_CRITICAL = "CRITICAL" +STATUS_UNKNOWN = "UNKNOWN" +SRVC_STATE_TYPE_SOFT = "SOFT" +SRVC_STATE_TYPE_HARD = "HARD" +statusCodes = {STATUS_OK: 0, STATUS_WARNING: 1, STATUS_CRITICAL: 2, + STATUS_UNKNOWN: 3} +NAGIOS_COMMAND_FILE = "/var/spool/nagios/cmd/nagios.cmd" +SRVC_LIST = ['Disk Utilization', 'Cpu Utilization', 'Memory Utilization', + 'Swap Utilization', 'Network Utilization'] +_socketPath = '/var/spool/nagios/cmd/live' + + +# Shows the usage of the script +def showUsage(): + usage = "Usage: %s -s <Service State (OK/WARNING/CRITICAL/UNKNOWN)> " + "-t <Service State Type (SOFT/HARD)> -a <No of Service attempts> " + "-l <Host Address> -n <Service Name>\n" % os.path.basename(sys.argv[0]) + sys.stderr.write(usage) + + +# Method to change the host status +def update_host_state(hostAddr, srvcName, statusCode): + now = datetime.datetime.now() + if statusCode == statusCodes[STATUS_WARNING]: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status WARNING - " + "Service(s) ['%s'] in CRITICAL state\n" % (now, hostAddr, statusCode, + srvcName) + else: + cmdStr = "[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;Host Status OK - " + "Services in good health\n" % (now, hostAddr, statusCode) + + f = open(NAGIOS_COMMAND_FILE, "w") + f.write(cmdStr) + f.close() + + +# Method to execute livestatus +def checkLiveStatus(hostAddr, srvc): + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.connect(_socketPath) + + # Write command to socket + cmd = "GET services\nColumns: state\nFilter: " + "description = %s\nFilter: host_address = %s\n" % (srvc, hostAddr) + s.send(cmd) + + # Close socket + s.shutdown(socket.SHUT_WR) + + # Read the answer + answer = s.recv(1000) + + # Parse the answer into a table + table = [line.split(';') for line in answer.split('\n')[:-1]] + + if len(table) > 0 and len(table[0]) > 0: + return int(table[0][0]) + else: + return statusCodes[STATUS_UNKNOWN] + + +# Method to change the host state to UP based on other service type status +def check_and_update_host_state_to_up(hostAddr, srvcName): + finalState = 0 + for item in SRVC_LIST: + if item != srvcName: + finalState = finalState | checkLiveStatus(hostAddr, item) + + if finalState == statusCodes[STATUS_OK]: + update_host_state(hostAddr, srvcName, statusCodes[STATUS_OK]) + + +# Main method +if __name__ == "__main__": + try: + opts, args = getopt.getopt(sys.argv[1:], "hs:t:a:l:n:", + ["help", "state=", "type=", + "attempts=", "location=", "name="]) + except getopt.GetoptError as e: + print (str(e)) + showUsage() + sys.exit(STATUS_CRITICAL) + + srvcState = '' + srvcStateType = '' + attempts = '' + hostAddr = '' + srvcName = '' + if len(opts) == 0: + showUsage() + else: + for opt, arg in opts: + if opt in ('-h', '--help'): + showUsage() + sys.exit() + elif opt in ('-s', '--state'): + srvcState = arg + elif opt in ('-t', '--type'): + srvcStateType = arg + elif opt in ('-a', '--attempts'): + attempts = arg + elif opt in ('-l', '--location'): + hostAddr = arg + elif opt in ('-n', '--name'): + srvcName = arg + else: + showUsage() + sys.exit() + + # Swicth over the service state values and do the needful + if srvcState == STATUS_CRITICAL: + if srvcStateType == SRVC_STATE_TYPE_SOFT: + if int(attempts) == 3: + print "Updating the host status to warning " + "(3rd SOFT critical state)..." + update_host_state(hostAddr, srvcName, + statusCodes[STATUS_WARNING]) + elif srvcStateType == SRVC_STATE_TYPE_HARD: + print "Updating the host status to warning..." + update_host_state(hostAddr, srvcName, statusCodes[STATUS_WARNING]) + elif srvcState == STATUS_OK: + check_and_update_host_state_to_up(hostAddr, srvcName) + + sys.exit(0) |