From bc31d1f4f278dea2f0c0892a412734302c6bf308 Mon Sep 17 00:00:00 2001 From: Sahina Bose Date: Tue, 25 Nov 2014 12:18:45 +0530 Subject: plugins: Handle volume locked errors When plugin returns unknown status due to transaction in progress, the plugin should try again after some time to avoid propogating this error to Nagios Change-Id: I83fa7c3ec7382d1263e36995662df83b5ae39aa6 Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1109752 Signed-off-by: Sahina Bose Reviewed-on: http://review.gluster.org/9192 Reviewed-by: Ramesh N Reviewed-by: Kanagaraj M Reviewed-by: Bala FA --- plugins/check_vol_server.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/plugins/check_vol_server.py b/plugins/check_vol_server.py index f6c11d3..4e42025 100755 --- a/plugins/check_vol_server.py +++ b/plugins/check_vol_server.py @@ -4,6 +4,7 @@ import json import random import argparse import livestatus +import time from glusternagios import utils import server_utils @@ -224,9 +225,14 @@ def _executeRandomHost(hostgroup, command): #in the host group and send the command until #the command is successful - #No need to send it to host which we already sent - list_hosts.remove(host) + #No need to send it to host which we already sent unless volume locked + if not output.contains("UNKNOWN: temporary error"): + # if volume locked,we can try on same host + list_hosts.remove(host) for host in list_hosts: + if output.contains("UNKNOWN: temporary error"): + # volume locked, so wait before trying again + time.sleep(2) # sleep for 2 seconds host_address = _getHostAddress(host) status, output = execNRPECommand(server_utils.getNRPEBaseCommand( host_address, -- cgit