summaryrefslogtreecommitdiffstats
path: root/plugins/check_vol_status.py
blob: 9e526dac0e664069169a86dddbafb14d929391b0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/python
import re
import commands
import argparse
import xml.etree.ElementTree as ET
from glusternagios import utils


def parseXml(xmldoc, searchStr):
    root = ET.fromstring(xmldoc)
    #statusStr = root.findall("./volInfo/volumes/volume/bricks/brick")
    statusStr = root.findall(searchStr)
    return statusStr


def getVolumeStatus(vol_status_out):
    xmlElemList = parseXml(vol_status_out, "./opRet")
    #print xmlElemList[0].text
    if xmlElemList[0].text == "0":
        #print "Started"
        vol_status = "Started"
    else:
        #print "Stopped"
        vol_status = "Stopped"
    return vol_status


def getNagiosServerIP():
    nagiosIP = ""
    nscaConfig = open("/etc/nagios/nagios_server.cfg", "r+")
    for line in nscaConfig.readlines():
        if "nagios_server" in line:
            #print line.rstrip()
            line = line.rstrip()
            nagiosIP = line.rpartition('=')[2]
            #print nagiosIP
    return nagiosIP


def send_to_nsca(hostName, serviceName, exitStatus, resultString):
    #print hostName
    #print  serviceName
    #print  exitStatus
    #print resultString
    f = open('out.txt', 'w')
    print >> f, '%s\t%s\t%s\t%s' % (hostName,
                                    serviceName,
                                    exitStatus,
                                    resultString)
    f.close()
    nagiosIP = getNagiosServerIP()
    command_send_nsca = "send_nsca -H " + nagiosIP + \
                        " -c /etc/nagios/send_nsca.cfg < out.txt"
    #print command_send_nsca
    commands.getoutput(command_send_nsca)
    #print nsca_stat


def showBrickStatus(vol_status_out):
    ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
    xmlElemList = []
    brickName = ""
    #brickStatus = ""
    exitStatus = utils.PluginStatusCode.OK
    resultString = ""
    brickIP = ""
    vol_status = getVolumeStatus(vol_status_out)
    if vol_status == "Started":
        xmlElemList = parseXml(vol_status_out,
                               "./volStatus/volumes/volume/node")
        for node in xmlElemList:
            if ipPat.match(node.find('hostname').text):
                brickIP = node.find('hostname').text
                brickName = "Brick-"
                brickName += brickIP
                brickName += ":"
                brickName += node.find('path').text
                brickName += "-Status"
                #print brickName
                if node.find('status').text == "1":
                    exitStatus = utils.PluginStatusCode.OK
                    resultString = "Brick Status: OK"
                else:
                    exitStatus = utils.PluginStatusCode.CRITICAL
                    resultString = "Brick Status: CRITICAL"
                send_to_nsca(brickIP, brickName, exitStatus, resultString)


def showVolumeStatus(vol_status_out, volName, clusterName):
    xmlElemList = []
    no_of_bricks = 0
    brick_online = 0
    brick_offline = 0
    #brick_list = []
    resultString = ""
    exitStatus = utils.PluginStatusCode.OK
    serviceName = "Volume-%s-Status" % volName
    ipPat = re.compile("^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$")
    vol_status = getVolumeStatus(vol_status_out)
    if vol_status == "Started":
        xmlElemList = parseXml(vol_status_out,
                               "./volStatus/volumes/volume/node")
        for node in xmlElemList:
            if ipPat.match(node.find('hostname').text):
                #brick_list.insert(0,node.find('hostname').text)
                no_of_bricks += 1
                if node.find('status').text == "1":
                    brick_online += 1
                else:
                    brick_offline += 1
        #no_of_bricks = len(brick_list)
        #print len(brick_list)

    if vol_status != "Started":
        resultString = "Volume Status CRITICAL: Volume Stopped Total" \
                       " Bricks: %s|Bricks Online=%s" % (no_of_bricks,
                                                         brick_online)
        exitStatus = utils.PluginStatusCode.CRITICAL
    elif brick_offline == no_of_bricks:
        resultString = "Volume Status CRITICAL: All Bricks are Down Total" \
                       " Bricks: %s|Bricks Online=%s" % (no_of_bricks,
                                                         brick_online)
        exitStatus = utils.PluginStatusCode.CRITICAL
    elif brick_online != no_of_bricks:
        resultString = "Volume Status WARNING: Some Bricks are Down Total" \
                       " Bricks: %s|Bricks Online=%s" % (no_of_bricks,
                                                         brick_online)
        exitStatus = utils.PluginStatusCode.WARNING
    else:
        resultString = "Volume Status OK: Total" \
                       " Bricks: %s|Bricks Online=%s" % (no_of_bricks,
                                                         brick_online)
        exitStatus = utils.PluginStatusCode.OK

    send_to_nsca(clusterName, serviceName, exitStatus, resultString)


def parse_input():

    parser = argparse.ArgumentParser(usage='%(prog)s [-h] <volume> <cluster>')
    parser.add_argument("volume", help="Name of the volume to get the Status")
    parser.add_argument("cluster",
                        help="Name of the cluster, volume belongs to")
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_input()
    #Get the volume status
    command_vol_status = "sudo gluster volume status " + args.volume + " --xml"
    vol_status_out = commands.getoutput(command_vol_status)
    showVolumeStatus(vol_status_out, args.volume, args.cluster)
    showBrickStatus(vol_status_out)