summaryrefslogtreecommitdiffstats
path: root/plugins/discovery.py
blob: 60e8b267076c83782fa48e796c2d96272707af00 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
#!/usr/bin/python
# discovery.py Nagios plugin to discover Gluster entities using NRPE
# Copyright (C) 2014 Red Hat Inc
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
import argparse
import json

import os
import shutil
import sys

from glusternagios import utils
from config_generator import GlusterNagiosConfManager
import server_utils
from constants import DEFAULT_AUTO_CONFIG_DIR
from constants import NRPE_PATH


from config_generator import CHANGE_MODE_ADD
from config_generator import CHANGE_MODE_REMOVE
from config_generator import CHANGE_MODE_UPDATE


nrpeCmdPath = utils.CommandPath("nrpe", NRPE_PATH, )


def excecNRPECommand(host, command):
    (returncode, outputStr, err) = utils.execCmd([nrpeCmdPath.cmd,
                                                  "-H", host, "-c", command])
    #convert to dictionary
    try:
        output = json.loads(outputStr[0])
    except Exception as e:
        e.args += (outputStr[0])
        raise
    return output


def discoverCluster(hostip, cluster):
    """
    This method helps to discover the nodes, volumes and bricks in the given
    gluster. It uses NRPE commands to contact the gluster nodes.

    Assumptions:
    First node returned by the "discoverpeers" NRPE command should be the same
    node where "discoverpeers" is executed.

    Parameters
    ----------
    hostip: Address of a node in the gluster cluster.
    cluster: Cluster Name

    Returns
    ---------
     Returns cluster details in the following dictinary format
    {
      'name': cluster-name,
     'volumes': [list-volumes],
    'host': [list-hosts]
    }
    each host in the list will a have list of bricks from the host.
    """

    clusterdata = {}
    #Discover the logical components
    componentlist = excecNRPECommand(hostip, "discoverlogicalcomponents")
    #Discover the peers
    hostlist = excecNRPECommand(hostip, "discoverpeers")
    #Add the ip address of the root node given by the user to the peer list
    hostlist[0]['hostip'] = hostip
    for host in hostlist:
        #Get host names
        hostDetails = excecNRPECommand(host['hostip'], "discoverhostparams")
        host.update(hostDetails)
        #Get the list of bricks for this host and add to dictionary
        host['bricks'] = []
        for volume in componentlist['volumes']:
            for brick in volume['bricks']:
                if brick['hostUuid'] == host['uuid']:
                    brick['volumeName'] = volume['name']
                    host['bricks'].append(brick)
    clusterdata['hosts'] = hostlist
    clusterdata['volumes'] = componentlist['volumes']
    clusterdata['name'] = cluster
    #Host names returned by "discoverhostparams" supposed to be unique. So host
    #name can be used to configure the host_name in nagios host.
    #But if host names are not unique then we have to use IP address to
    #configure host_name in nagios.
    if not isHostsNamesUnique(clusterdata):
        setHostNameWithIP(clusterdata)
    return clusterdata


# Set host address as the hostname
def setHostNameWithIP(clusterdata):
    for host in clusterdata['hosts']:
        host['hostname'] = host['hostip']


# Check host names are unique
def isHostsNamesUnique(clusterdata):
    hostnames = {}
    for host in clusterdata['hosts']:
        if hostnames.get(host['hostname']) is None:
            hostnames[host['hostname']] = host['hostip']
        else:
            return False
    return True


def getConfigManager(args):
    configDir = DEFAULT_AUTO_CONFIG_DIR
    if args.configDir is not None:
        configDir = args.configDir
    clusterConfigDir = configDir + "/" + args.cluster
    configManager = GlusterNagiosConfManager(clusterConfigDir)
    return configManager


#Find the given service in service list.
def findServiceInList(serviceList, serviceDescription):
    for service in serviceList:
        if service['service_description'] == serviceDescription:
            return service
    return None


#Find the given host in host list
def findHostInList(hostList, hostName):
    for host in hostList:
        if host['host_name'] == hostName:
            return host
    return None


#Find all deleted services in the host.
def findDeletedServices(host):
    deletedService = []
    serviceConfigs = server_utils.getServiceConfigByHost(host['host_name'])
    for serviceConfig in serviceConfigs:
        service = findServiceInList(host['host_services'],
                                    serviceConfig['service_description'])
        if service is None:
            deletedService.append(
                {'service_description': serviceConfig['service_description'],
                 'changeMode': CHANGE_MODE_REMOVE})
    return deletedService


#Check if auto config is changed. IP address in the check command will change
#when user runs the auto config using different host.
def findChangeInAutoConfig(newService, oldService):
    if newService['check_command'] != oldService['check_command']:
        changes = {}
        changes['check_command'] = newService['check_command']
        changes['service_description'] = newService['service_description']
        changes['host_name'] = newService['host_name']
        changes['changeMode'] = CHANGE_MODE_UPDATE
        return changes
    return None


#Find all Added/Deleted services in the given host.
#Note: 'Cluster Auto Config' is a special service. When user runs the
#auto-config using different host instead what is used previously then we
#have to update the host ip in existing auto-config service.
def findServiceDelta(host):
    serviceDelta = []
    for service in host['host_services']:
        serviceConfig = server_utils.getServiceConfig(
            service['service_description'], service['host_name'])
        if serviceConfig is None:
            service['changeMode'] = CHANGE_MODE_ADD
            serviceDelta.append(service)
        elif serviceConfig['service_description'] == "Cluster Auto Config":
            changes = findChangeInAutoConfig(service, serviceConfig)
            if changes:
                serviceDelta.append(changes)
    serviceDelta.extend(findDeletedServices(host))
    return serviceDelta


#Find newly added hosts and newly added services to the existing hosts
def findAddUpdateHosts(hosts):
    delta = []
    for host in hosts:
        hostConfing = server_utils.getHostConfigByName(host['host_name'])
        if hostConfing is None:
            host['changeMode'] = CHANGE_MODE_ADD
            delta.append(host)
        else:
            serviceDelta = findServiceDelta(host)
            if serviceDelta:
                host['changeMode'] = CHANGE_MODE_UPDATE
                host['host_services'] = serviceDelta
                delta.append(host)
    return delta


#Find deleted hosts in the given cluster.
def findDeletedHosts(hostgroup, hosts):
    deletedHosts = []
    hostConfigs = server_utils.getHostConfigsForCluster(hostgroup)
    for hostConfig in hostConfigs:
        host = findHostInList(hosts, hostConfig['host_name'])
        if host is None:
            deletedHosts.append({'host_name': hostConfig['host_name'],
                                 'changeMode': CHANGE_MODE_REMOVE})
    return deletedHosts


#Find Added/Deleted/Updated hosts in cluster
def findHostDelta(clusterConfig):
    hostDelta = []
    updated = findAddUpdateHosts(clusterConfig['_hosts'])
    hostDelta.extend(updated)
    hostDelta.extend(findDeletedHosts(clusterConfig['hostgroup_name'],
                                      clusterConfig['_hosts']))
    return hostDelta


#Find changes to the cluster
def findDelta(clusterConfig):
    delta = {}
    delta['hostgroup_name'] = clusterConfig['hostgroup_name']
    delta['alias'] = clusterConfig['alias']

    hostgroup = server_utils.getHostGroup(clusterConfig['hostgroup_name'])
    if hostgroup is None:
        delta['changeMode'] = CHANGE_MODE_ADD
        delta['_hosts'] = clusterConfig['_hosts']
        return delta

    hostDelta = findHostDelta(clusterConfig)
    delta['_hosts'] = hostDelta
    if hostDelta:
        delta['changeMode'] = CHANGE_MODE_UPDATE
    return delta


def parse_input():
    parser = argparse.ArgumentParser(description="Gluster Auto Discover Tool")
    parser.add_argument('-c', '--cluster', action='store', dest='cluster',
                        type=str, required=True, help='Cluster name')
    parser.add_argument('-H', '--hostip', action='store', dest='hostip',
                        type=str, required=True, help='Host IP')
    parser.add_argument('-m', '--mode', action='store', dest='mode',
                        choices=['auto', 'manual'], required=False,
                        default='manual', help='Mode')
    parser.add_argument('-d', '--configdir', action='store', dest='configDir',
                        type=str, required=False,
                        default=DEFAULT_AUTO_CONFIG_DIR,
                        help='Configuration directory where'
                             ' output files will be written')
    parser.add_argument('-f', '--force', action='store_true', dest='force',
                        help="Force sync the Cluster configuration")
    args = parser.parse_args()
    return args


#Clean the config directory
def cleanConfigDir(dir):
    if os.path.exists(dir):
        # Deleting the config dir to write new configs
        shutil.rmtree(dir)
    os.mkdir(dir)


#Preview the changes to cluster config
def previewChanges(clusterDelta):
    print "Changes :"
    clusterChangeMode = clusterDelta['changeMode']
    print "Hostgroup %s - %s" % (clusterDelta['hostgroup_name'],
                                 clusterChangeMode)
    for host in clusterDelta['_hosts']:
        if host.get('changeMode'):
            changeMode = host.get('changeMode')
        else:
            changeMode = clusterChangeMode
        print "Host %s - %s" % (host['host_name'], changeMode)
        for service in host.get('host_services'):
            if service.get('changeMode'):
                changeMode = service.get('changeMode')
            print "\t Service - %s -%s " % (service['service_description'],
                                            changeMode)


#Write the cluster configurations. If force mode is used then it will clean
#the config directory before writing the changes.
def writeDelta(clusterDelta, configManager, force):
    if force:
        cleanConfigDir(configManager.configDir)
    configManager.generateConfigFiles(clusterDelta)


def getConfirmation(message, default):
    while True:
        ans = raw_input("%s (Yes, No) [%s]: " % (message, default))
        if not ans:
            ans = default
        ans = ans.upper()
        if ans not in ['YES', 'NO']:
            print 'please enter Yes or No'
        if ans == 'YES':
            return True
        if ans == 'NO':
            return False

if __name__ == '__main__':
    args = parse_input()
    clusterdata = discoverCluster(args.hostip, args.cluster)
    configManager = getConfigManager(args)
    clusterDelta = configManager.generateNagiosConfig(clusterdata)
    if args.force:
        clusterDelta['changeMode'] = CHANGE_MODE_ADD
    else:
        clusterDelta = findDelta(clusterDelta)

    if clusterDelta.get('changeMode') is None:
        print "Cluster configurations are in sync"
        sys.exit(utils.PluginStatusCode.OK)
    #When auto config is run in manual mode, we will ask confirmation
    #before writing the config file and before restarting the Nagios
    if args.mode == "manual":
        print "Cluster configurations changed"
        previewChanges(clusterDelta)
        confirmation = getConfirmation(
            "Are you sure, you want to commit the changes?", "Yes")
        if confirmation:
            writeDelta(clusterDelta, configManager, args.force)
            print "Cluster configurations synced successfully from host %s" % \
                  (args.hostip)
            #If Nagios is running then try to restart. Otherwise don't do
            #anything.
            if server_utils.isNagiosRunning():
                confirmation = getConfirmation(
                    "Do you want to restart Nagios to start monitoring newly "
                    "discovered entities?", "Yes")
                if confirmation:
                    server_utils.restartNagios()
                    print "Nagios re-started successfully"
            else:
                print "Start the Nagios service to monitor"
    #auto mode means write the configurations without asking confirmation
    elif args.mode == "auto":
        writeDelta(clusterDelta, configManager, args.force)
        print "Cluster configurations synced successfully from host %s" % \
              (args.hostip)
        if server_utils.isNagiosRunning():
            server_utils.restartNagios()
    sys.exit(utils.PluginStatusCode.OK)