From 4d6eb013493642736b9168c047f793888ded864c Mon Sep 17 00:00:00 2001 From: Nishanth Thomas Date: Fri, 13 Jun 2014 16:39:37 +0530 Subject: nagios-server-addons:Fixed issues with executeRandomHost When a node in the cluster is down, nrpe request sent to the node gets timed out resulting a failure in executeRandomHost. This is fixed by selecting only those nodes which are UP Change-Id: I5b22dcb30c0e2a006a549dc642b16db278c9c0f1 Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1109025 Signed-off-by: Nishanth Thomas Reviewed-on: http://review.gluster.org/8061 Reviewed-by: Ramesh N Reviewed-by: Kanagaraj M Tested-by: Nishanth Thomas Reviewed-by: Sahina Bose --- plugins/check_vol_server.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/plugins/check_vol_server.py b/plugins/check_vol_server.py index faaa50b..05b3e2d 100755 --- a/plugins/check_vol_server.py +++ b/plugins/check_vol_server.py @@ -10,12 +10,14 @@ import server_utils def _getListHosts(hostgroup): - table = livestatus.readLiveStatus("GET hostgroups\nColumns: members\n" - "Filter: name = " - + hostgroup + "\n") - tab1 = table[0] - list_hosts = tab1[0].split(",") - #First take a random host from the group and send the request + list_hosts = [] + table = json.loads(livestatus.readLiveStatusAsJSON( + "GET hostgroups\nColumns: members_with_state\n" + "Filter: name = " + hostgroup + "\n"))[0][0] + #Get the only those nodes which are UP + for row in table: + if row[1] == utils.HostStatusCode.UP: + list_hosts.append(row[0]) return list_hosts @@ -190,6 +192,10 @@ def execNRPECommand(command): def _executeRandomHost(hostgroup, command): list_hosts = _getListHosts(hostgroup) + if not list_hosts: + status = utils.PluginStatusCode.UNKNOWN + output = " UNKNOWN: No hosts(with state UP) found in the cluster" + return status, output host = random.choice(list_hosts) #Get the address of the host host_address = _getHostAddress(host) -- cgit