From 446c43a73be31baa3a2adaef0a198fc50c18a9f9 Mon Sep 17 00:00:00 2001 From: Sahina Bose Date: Fri, 21 Aug 2015 16:23:33 +0530 Subject: gluster-nagios: geo rep status plugin Changed the georep status plugin to reflect the changes in georep status output Change-Id: I7f8f7c07d3da552283758a221c819744c616777b Bug-Url: https://bugzilla.redhat.com/1240959 Signed-off-by: Sahina Bose Reviewed-on: http://review.gluster.org/11983 Reviewed-by: darshan n --- glusternagios/glustercli.py | 121 ++++++++++++------------- tests/test_glustercli.py | 216 +++++++++++++++++++++++++++++--------------- 2 files changed, 203 insertions(+), 134 deletions(-) diff --git a/glusternagios/glustercli.py b/glusternagios/glustercli.py index 4c2f564..fec1927 100755 --- a/glusternagios/glustercli.py +++ b/glusternagios/glustercli.py @@ -515,80 +515,75 @@ def _parseVolumeSelfHealInfo(out): return value -def _parseVolumeGeoRepStatus(volumeName, out): - # https://bugzilla.redhat.com/show_bug.cgi?id=1090910 - opened for xml - # output. For now parsing below string output format - # MASTER NODE MASTER VOL MASTER BRICK - # SLAVE STATUS CHECKPOINT STATUS CRAWL STATUS +def _parseVolumeGeoRepStatus(volumeName, tree): slaves = {} - other_status = ['ACTIVE', 'INITIALIZING...'] - for line in out[3:]: - tempstatus = None - nodeline = line.split() - node = nodeline[0] - brick = nodeline[2] - slave = nodeline[3][nodeline[3].find('::') + 2:] - if slaves.get(slave) is None: - slaves[slave] = {'nodecount': 0, - 'faulty': 0, - 'notstarted': 0, - 'stopped': 0, - 'passive': 0, - 'detail': '', - 'status': GeoRepStatus.OK, - 'name': nodeline[3] - } - slaves[slave]['nodecount'] += 1 - if GeoRepStatus.FAULTY in line.upper(): - slaves[slave]['faulty'] += 1 - tempstatus = GeoRepStatus.FAULTY - elif "NOT STARTED" in line.upper(): - slaves[slave]['notstarted'] += 1 - tempstatus = GeoRepStatus.NOT_STARTED - elif "PASSIVE" in line.upper(): - slaves[slave]['passive'] += 1 - tempstatus = "PASSIVE" - elif GeoRepStatus.STOPPED in line.upper(): - slaves[slave]['stopped'] += 1 - tempstatus = GeoRepStatus.STOPPED - elif not any(gstatus in line.upper() for gstatus in other_status): - tempstatus = nodeline[4] - - if tempstatus: - slaves[slave]['detail'] += ("%s:%s - %s;" % (node, - brick, - tempstatus)) volumes = volumeInfo(volumeName) - brickCount = int(volumes[volumeName]["brickCount"]) if "REPLICATE" in volumes[volumeName]["volumeType"]: replicaCount = int(volumes[volumeName]["replicaCount"]) else: replicaCount = 1 - - for slave, count_dict in slaves.iteritems(): - if count_dict['nodecount'] > brickCount: - # There are multiple slave volumes with same name, the output - # may be wrong - slaves[slave]['detail'] += "NOTE:Multiple slave session aggregated" - if count_dict['faulty'] > 0: + other_status = ['ACTIVE', 'INITIALIZING'] + volume = tree.find('geoRep/volume') + for session in volume.findall('sessions/session'): + sessionKey = session.find('session_slave').text + slave = sessionKey.split("::")[-1] + slaves[slave] = {'nodecount': 0, + 'faulty': 0, + 'notstarted': 0, + 'stopped': 0, + 'passive': 0, + 'detail': '', + 'status': GeoRepStatus.OK, + 'name': sessionKey.split(":", 1)[1] + } + for pair in session.findall('pair'): + tempstatus = None + slaves[slave]['nodecount'] += 1 + status = pair.find('status').text.upper() + if "FAULTY" in status: + slaves[slave]['faulty'] += 1 + tempstatus = GeoRepStatus.FAULTY + elif "CREATED" in status: + slaves[slave]['notstarted'] += 1 + tempstatus = GeoRepStatus.NOT_STARTED + elif "PASSIVE" in status: + slaves[slave]['passive'] += 1 + tempstatus = "PASSIVE" + elif GeoRepStatus.STOPPED in status: + slaves[slave]['stopped'] += 1 + tempstatus = GeoRepStatus.STOPPED + # elif not any(gstatus in status for gstatus in other_status): + elif status not in other_status: + tempstatus = status + if tempstatus: + slaves[slave]['detail'] += \ + ("%s:%s - %s;" % + (pair.find('master_node').text, + pair.find('master_brick').text, + tempstatus)) + if slaves[slave]['faulty'] > 0: if replicaCount > 1: - # georep cli status does not give the node name in the same - # way as gluster volume info - there's no way to compare and - # get the subvolume. So if fault+passive > than num of primary - # bricks, moving to faulty - if (count_dict['faulty'] + count_dict['passive'] - > count_dict['nodecount']/replicaCount): + # georep cli status does not give the + # node name in the same way as + # gluster volume info - there's no way + # to compare and get the + # subvolume. So if fault+passive > + # than num of primary bricks, + # moving to faulty + if (slaves[slave]['faulty'] + slaves[slave]['passive'] + > slaves[slave]['nodecount']/replicaCount): slaves[slave]['status'] = GeoRepStatus.FAULTY else: slaves[slave]['status'] = GeoRepStatus.PARTIAL_FAULTY else: slaves[slave]['status'] = GeoRepStatus.FAULTY - elif (count_dict['notstarted'] > 0 and + elif (slaves[slave]['notstarted'] > 0 and slaves[slave]['status'] == GeoRepStatus.OK): slaves[slave]['status'] = GeoRepStatus.NOT_STARTED - elif (count_dict['stopped'] > 0 and + elif (slaves[slave]['stopped'] > 0 and slaves[slave]['status'] == GeoRepStatus.OK): slaves[slave]['status'] = GeoRepStatus.STOPPED + return {volumeName: {'slaves': slaves}} @@ -611,12 +606,12 @@ def volumeGeoRepStatus(volumeName, remoteServer=None): command = _getGlusterVolCmd() + ["geo-replication", volumeName, "status"] if remoteServer: command += ['--remote-host=%s' % remoteServer] + xmltree = _execGlusterXml(command) - rc, out, err = _execGluster(command) - - if rc == 0: - return _parseVolumeGeoRepStatus(volumeName, out) - raise GlusterCmdFailedException(rc=rc, out=out, err=err) + try: + return _parseVolumeGeoRepStatus(volumeName, xmltree) + except _etreeExceptions: + raise GlusterCmdFailedException(err=[etree.tostring(xmltree)]) def volumeHealStatus(volumeName, remoteServer=None): diff --git a/tests/test_glustercli.py b/tests/test_glustercli.py index 2f3f8cb..59504b0 100644 --- a/tests/test_glustercli.py +++ b/tests/test_glustercli.py @@ -1141,17 +1141,12 @@ class GlusterCliTests(TestCaseBase): print(status) self.assertEquals(status, expectedOut) - @mock.patch('glusternagios.utils.execCmd') @mock.patch('glusternagios.glustercli._getGlusterVolCmd') @mock.patch('glusternagios.glustercli.volumeInfo') def test_getVolumeGeoRepStatus(self, mock_volumeInfo, - mock_glusterVolCmd, - mock_execCmd,): + mock_glusterVolCmd,): mock_glusterVolCmd.return_value = ["gluster", "volume"] - mock_execCmd.return_value = (0, - self.__getGlusterGeoRepStatusResult(), - None) mock_volumeInfo.return_value = {'test-vol': {'volumeType': 'REPLICATE', 'replicaCount': 2, @@ -1170,23 +1165,19 @@ class GlusterCliTests(TestCaseBase): 'rhs3-2.novalocal:' '/bricks/b3 - FAULTY;', 'status': gcli.GeoRepStatus.FAULTY, - 'name': '10.70.43.68::slave-vol'} + 'name': 'ssh://10.70.47.165::slave-vol'} }}} - status = gcli.volumeGeoRepStatus("test-vol") + tree = etree.fromstring(self.__getGlusterGeoRepStatusResult()) + status = gcli._parseVolumeGeoRepStatus("test-vol", tree) print(status) self.assertEquals(status, expectedOut) - @mock.patch('glusternagios.utils.execCmd') @mock.patch('glusternagios.glustercli._getGlusterVolCmd') @mock.patch('glusternagios.glustercli.volumeInfo') def test_getVolumeGeoRepStatusMultiSlave(self, mock_volumeInfo, - mock_glusterVolCmd, - mock_execCmd,): + mock_glusterVolCmd,): mock_glusterVolCmd.return_value = ["gluster", "volume"] - mock_execCmd.return_value = (0, - self.__getGlusterGeoRepStatusResult2(), - None) mock_volumeInfo.return_value = {'test-vol': {'volumeType': 'REPLICATE', 'replicaCount': 2, @@ -1201,31 +1192,33 @@ class GlusterCliTests(TestCaseBase): 'notstarted': 0, 'stopped': 0, 'passive': 1, - 'detail': 'rhs3.novalocal:/bricks/b3 ' + 'detail': 'casino-vm3.lab.eng.blr.redhat.com:' + '/bricks/b5 ' '- PASSIVE;' - 'rhs3-2.novalocal:/bricks/b3 ' + 'casino-vm4.lab.eng.blr.redhat.com:' + '/bricks/b5 ' '- FAULTY;', 'status': gcli.GeoRepStatus.FAULTY, - 'name': '10.70.43.68::slave-vol'}, + 'name': 'ssh://10.70.43.68::slave-vol'}, 'slave-vol2': {'faulty': 0, 'nodecount': 2, 'notstarted': 2, 'stopped': 0, 'passive': 0, - 'detail': 'rhs3.novalocal:/bricks/b3 ' + 'detail': 'casino-vm3.lab.eng.blr.redhat.com:' + '/bricks/b5 ' '- NOT_STARTED;' - 'rhs3-2.novalocal:/bricks/b3 ' + 'casino-vm4.lab.eng.blr.redhat.com:' + '/bricks/b5 ' '- NOT_STARTED;', 'status': gcli.GeoRepStatus.NOT_STARTED, - 'name': '10.70.43.68::slave-vol2'} + 'name': 'ssh://10.70.43.68::slave-vol2'} }}} - status = gcli.volumeGeoRepStatus("test-vol") + tree = etree.fromstring(self.__getGlusterGeoRepStatusResult2()) + status = gcli._parseVolumeGeoRepStatus("test-vol", tree) print(status) self.assertEquals(status, expectedOut) - mock_execCmd.return_value = (0, - self.__getGlusterGeoRepStatusResult3(), - None) expectedOut = {'test-vol': {'slaves': {'slave-vol': @@ -1240,9 +1233,10 @@ class GlusterCliTests(TestCaseBase): '/bricks/b3 ' '- NOT_STARTED;', 'status': gcli.GeoRepStatus.NOT_STARTED, - 'name': '10.70.43.68::slave-vol' + 'name': 'ssh://10.70.47.165::slave-vol' }}}} - status = gcli.volumeGeoRepStatus("test-vol") + tree = etree.fromstring(self.__getGlusterGeoRepStatusResult3()) + status = gcli._parseVolumeGeoRepStatus("test-vol", tree) print(status) self.assertEquals(status, expectedOut) @@ -1279,52 +1273,132 @@ class GlusterCliTests(TestCaseBase): "/dir.7/file.4"] def __getGlusterGeoRepStatusResult(self): - return [" ", - "MASTER NODE MASTER VOL MASTER BRICK " - "SLAVE STATUS CHECKPOINT STATUS " - "CRAWL STATUS", - "--------------------------------------------------------" - "--------------------------------------------------------" - "----------------", - "rhs3.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol faulty " - "N/A N/A", - "rhs3-2.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol faulty " - "N/A N/A "] + return """ + + 0 + 0 + + + + rep + + + ce7387db-bff5-4719-bfdd-89736f5c15d2:ssh://10.70.47.165::slave-vol + + rhs3.novalocal + /bricks/b3 + root + ssh://10.70.47.165::slave-vol + N/A + faulty + N/A + + + rhs3-2.novalocal + /bricks/b3 + root + ssh://10.70.47.165::slave-vol + N/A + Faulty + N/A + + + + + + +""" def __getGlusterGeoRepStatusResult2(self): - return [" ", - "MASTER NODE MASTER VOL MASTER BRICK " - "SLAVE STATUS CHECKPOINT STATUS " - "CRAWL STATUS", - "--------------------------------------------------------" - "--------------------------------------------------------" - "----------------", - "rhs3.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol Passive " - "N/A N/A", - "rhs3-2.novalocal rep /bricks/b3 " - "10.70.43.69::slave-vol faulty " - "N/A N/A ", - "rhs3.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol2 Not Started " - "N/A N/A", - "rhs3-2.novalocal rep /bricks/b3 " - "10.70.43.69::slave-vol2 Not Started " - "N/A N/A "] + return """ + + 0 + 0 + + + + geo-rep-test + + + ce7387db-bff5-4719-bfdd-89736f5c15d2:ssh://10.70.43.68::slave-vol + + casino-vm3.lab.eng.blr.redhat.com + /bricks/b5 + root + ssh://10.70.43.68::slave-vol + N/A + PASSIVE + N/A + + + casino-vm4.lab.eng.blr.redhat.com + /bricks/b5 + root + ssh://10.70.43.68::slave-vol + N/A + FAULTY + N/A + + + + ce7387db-bff5-4719-bfdd-89736f5c15d2:ssh://10.70.43.68::slave-vol2 + + casino-vm3.lab.eng.blr.redhat.com + /bricks/b5 + root + ssh://10.70.43.68::slave-vol2 + N/A + CREATED + N/A + + + casino-vm4.lab.eng.blr.redhat.com + /bricks/b5 + root + ssh://10.70.43.68::slave-vol2 + N/A + CREATED + N/A + + + + + +""" def __getGlusterGeoRepStatusResult3(self): - return [" ", - "MASTER NODE MASTER VOL MASTER BRICK " - "SLAVE STATUS CHECKPOINT STATUS " - "CRAWL STATUS", - "--------------------------------------------------------" - "--------------------------------------------------------" - "----------------", - "rhs3.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol Passive " - "N/A N/A", - "rhs3-2.novalocal rep /bricks/b3 " - "10.70.43.68::slave-vol Not Started " - "N/A N/A "] + return """ + + 0 + 0 + + + + rep + + + ce7387db-bff5-4719-bfdd-89736f5c15d2:ssh://10.70.47.165::slave-vol + + rhs3.novalocal + /bricks/b3 + root + ssh://10.70.47.165::slave-vol + N/A + Passive + N/A + + + rhs3-2.novalocal + /bricks/b3 + root + ssh://10.70.47.165::slave-vol + N/A + Created + N/A + + + + + + +""" -- cgit