summaryrefslogtreecommitdiffstats
path: root/plugins/check_proc_util.py
diff options
context:
space:
mode:
authorRamesh Nachimuthu <rnachimu@redhat.com>2014-05-07 15:23:55 +0530
committerSahina Bose <sabose@redhat.com>2014-05-07 06:40:00 -0700
commit4c63ceaf48a3641adb4e087125d6698fa857ad37 (patch)
tree78a9a13f971d7e02811115e36756a3c2759200dd /plugins/check_proc_util.py
parenta4f97c15206c4930e6336c7a1fba67a231615486 (diff)
nagios-addons: Add NRPE command for gluster processes
Adding NRPE command for all process related services. We already have passive checks through NSCA for all these services and bricks. But passive checks are submitted only when there is a state change and Nagios may miss those check results if those services are not cofigured when we submit passive checks. So, we should have active checks running atleast for the first time to determine the initial status of the service Command 'check_proc_status' helps to check the status of all gluster related process Command 'check_brick_status' helps to check the status of bricks in the host. Change-Id: I1f442c0c1e54d606576bc0814044f2f149bca220 Signed-off-by: Ramesh Nachimuthu <rnachimu@redhat.com> Reviewed-on: http://review.gluster.org/7694 Reviewed-by: Kanagaraj M <kmayilsa@redhat.com> Reviewed-by: Sahina Bose <sabose@redhat.com>
Diffstat (limited to 'plugins/check_proc_util.py')
-rwxr-xr-xplugins/check_proc_util.py196
1 files changed, 196 insertions, 0 deletions
diff --git a/plugins/check_proc_util.py b/plugins/check_proc_util.py
new file mode 100755
index 0000000..20f57eb
--- /dev/null
+++ b/plugins/check_proc_util.py
@@ -0,0 +1,196 @@
+#!/usr/bin/python
+# Copyright (C) 2014 Red Hat Inc
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+#
+
+import errno
+import psutil
+
+
+from glusternagios import utils
+from glusternagios import glustercli
+from glusternagios import storage
+
+
+_checkProc = utils.CommandPath('check_proc',
+ '/usr/lib64/nagios/plugins/check_procs')
+
+_glusterVolPath = "/var/lib/glusterd/vols"
+_checkNfsCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a", "nfs"]
+_checkShdCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "glustershd"]
+_checkSmbCmd = [_checkProc.cmd, "-c", "1:", "-C", "smbd"]
+_checkQuotaCmd = [_checkProc.cmd, "-c", "1:", "-C", "glusterfs", "-a",
+ "quotad"]
+_checkBrickCmd = [_checkProc.cmd, "-C", "glusterfsd"]
+_checkGlusterdCmd = [_checkProc.cmd, "-c", "1:", "-w", "1:1", "-C", "glusterd"]
+_checkCtdbCmd = [_checkProc.cmd, "-c", "1:", "-C", "ctdbd"]
+checkIdeSmartCmdPath = utils.CommandPath(
+ 'check_ide_smart', '/usr/lib64/nagios/plugins/check_ide_smart')
+
+
+def getBrickStatus(volumeName, brickName):
+ status = None
+ brickPath = brickName.split(':')[1]
+ pidFile = brickName.replace(":/", "-").replace("/", "-") + ".pid"
+ try:
+ with open("%s/%s/run/%s" % (
+ _glusterVolPath, volumeName, pidFile)) as f:
+ if psutil.pid_exists(int(f.read().strip())):
+ status = utils.PluginStatusCode.OK
+ brickDevice = storage.getBrickDeviceName(brickPath)
+ disk = storage.getDisksForBrick(brickDevice)
+ cmd = [checkIdeSmartCmdPath.cmd, "-d", disk, "-n"]
+ rc, out, err = utils.execCmd(cmd)
+ if rc == utils.PluginStatusCode.CRITICAL and \
+ "tests failed" in out[0]:
+ status = utils.PluginStatusCode.WARNING
+ msg = "WARNING: Brick %s: %s" % (brickPath, out[0])
+ else:
+ status = utils.PluginStatusCode.CRITICAL
+ except IOError as e:
+ if e.errno == errno.ENOENT:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ msg = "UNKNOWN: Brick %s: %s" % (brickPath, str(e))
+ finally:
+ if status == utils.PluginStatusCode.OK:
+ msg = "OK: Brick %s" % brickPath
+ elif status == utils.PluginStatusCode.CRITICAL:
+ msg = "CRITICAL: Brick %s is down" % brickPath
+ return status, msg
+
+
+def getNfsStatus(volInfo):
+ # if nfs is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkNfsCmd)
+ if status == utils.PluginStatusCode.OK:
+ return status, msg[0] if len(msg) > 0 else ""
+
+ # if nfs is not running and any of the volume uses nfs
+ # then its required to alert the user
+ for volume, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ nfsStatus = volumeInfo.get('options', {}).get('nfs.disable', 'off')
+ if nfsStatus == 'off':
+ msg = "CRITICAL: Process glusterfs-nfs is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses nfs"
+ status = utils.PluginStatusCode.OK
+ return status, msg
+
+
+def getCtdbStatus(smbStatus, nfsStatus):
+ if smbStatus != utils.PluginStatusCode.OK and \
+ nfsStatus != utils.PluginStatusCode.OK:
+ return (utils.PluginStatusCode.OK,
+ "CTDB ignored as SMB and NFS are not running")
+
+ status, msg, error = utils.execCmd(_checkCtdbCmd)
+ if status != utils.PluginStatusCode.OK:
+ return utils.PluginStatusCode.UNKNOWN, "CTDB not configured"
+
+ # CTDB, SMB/NFS are running
+ status, msg, error = utils.execCmd(['ctdb', 'nodestatus'])
+ if status == utils.PluginStatusCode.OK:
+ if len(msg) > -1:
+ message = msg[0].split()
+ if len(message) > 1:
+ msg = "Node status: %s" % message[2]
+ if message[2] == 'UNHEALTHY':
+ status = utils.PluginStatusCode.WARNING
+ elif message[2] in ['DISCONNECTED', 'BANNED', 'INACTIVE']:
+ status = utils.PluginStatusCode.CRITICAL
+ else:
+ status = utils.PluginStatusCode.UNKNOWN
+ return status, msg
+
+
+def getSmbStatus(volInfo):
+ status, msg, error = utils.execCmd(_checkSmbCmd)
+ if status == utils.PluginStatusCode.OK:
+ return status, msg[0] if len(msg) > 0 else ""
+
+ # if smb is not running and any of the volume uses smb
+ # then its required to alert the user
+ for k, v in volInfo.iteritems():
+ cifsStatus = v.get('options', {}).get('user.cifs', 'enable')
+ smbStatus = v.get('options', {}).get('user.smb', 'enable')
+ if cifsStatus == 'enable' and smbStatus == 'enable':
+ msg = "CRITICAL: Process smb is not running"
+ status = utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: No gluster volume uses smb"
+ status = utils.PluginStatusCode.OK
+ return status, msg
+
+
+def getQuotadStatus(volInfo):
+ # if quota is already running we need not to check further
+ status, msg, error = utils.execCmd(_checkQuotaCmd)
+ if status == utils.PluginStatusCode.OK:
+ return status, msg[0] if len(msg) > 0 else ""
+
+ # if quota is not running and any of the volume uses quota
+ # then the quotad process should be running in the host
+ for k, v in volInfo.iteritems():
+ quotadStatus = v.get('options', {}).get('features.quota', '')
+ if quotadStatus == 'on':
+ msg = "CRITICAL: Process quotad is not running"
+ utils.PluginStatusCode.CRITICAL
+ break
+ else:
+ msg = "OK: Quota not enabled"
+ status = utils.PluginStatusCode.OK
+ return status, msg
+
+
+def getShdStatus(volInfo):
+ status, msg, error = utils.execCmd(_checkShdCmd)
+ if status == utils.PluginStatusCode.OK:
+ return status, msg[0] if len(msg) > 0 else ""
+
+ hostUuid = glustercli.hostUUIDGet()
+ for volumeName, volumeInfo in volInfo.iteritems():
+ if volumeInfo['volumeStatus'] == glustercli.VolumeStatus.OFFLINE:
+ continue
+ if hasBricks(hostUuid, volumeInfo['bricksInfo']) and \
+ int(volumeInfo['replicaCount']) > 1:
+ status = utils.PluginStatusCode.CRITICAL
+ msg = "CRITICAL: Gluster Self Heal Daemon not running"
+ break
+ else:
+ msg = "OK: Process Gluster Self Heal Daemon"
+ status = utils.PluginStatusCode.OK
+ return status, msg
+
+
+def getGlusterdStatus():
+ status, msg, error = utils.execCmd(_checkGlusterdCmd)
+ msg = msg[0] if len(msg) > 0 else ""
+ return status, msg
+
+
+def hasBricks(hostUuid, bricks):
+ for brick in bricks:
+ if brick['hostUuid'] == hostUuid:
+ return True
+ return False