summaryrefslogtreecommitdiffstats
path: root/glustolibs-io
diff options
context:
space:
mode:
authorkshithijiyer <kshithij.ki@gmail.com>2020-10-06 09:05:44 +0530
committerArthy Loganathan <aloganat@redhat.com>2020-10-21 05:21:42 +0000
commit08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
treef8998f6e8304e786f2d96eefc6a82e8f1cfe67b9 /glustolibs-io
parentcd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)
[Test] Add 2 memory leak tests and fix library issues
Scenarios added: ---------------- Test case: 1. Create a volume, start it and mount it. 2. Start I/O from mount point. 3. Check if there are any memory leaks and OOM killers. Test case: 1. Create a volume, start it and mount it. 2. Set features.cache-invalidation to ON. 3. Start I/O from mount point. 4. Run gluster volume heal command in a loop 5. Check if there are any memory leaks and OOM killers on servers. Design change: -------------- - self.id() is moved into test class as it was hitting bound errors in the original logic. - Logic changed for checking leaks fuse. - Fixed breakage in methods where ever needed. Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7 Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
Diffstat (limited to 'glustolibs-io')
-rw-r--r--glustolibs-io/glustolibs/io/memory_and_cpu_utils.py79
1 files changed, 52 insertions, 27 deletions
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
index 3d105bf5e..4e1dadbd7 100644
--- a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -363,7 +363,7 @@ def compute_data_usage_stats_on_servers(nodes, test_name):
# Generate a dataframe from the csv file
dataframe = create_dataframe_from_csv(node, process, test_name)
- if not dataframe:
+ if dataframe.empty:
return {}
data_dict[node][process] = {}
@@ -424,7 +424,7 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
for node in nodes:
data_dict[node] = {}
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return {}
data_dict[node]['glusterfs'] = {}
@@ -436,7 +436,8 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
volume_status=None,
- volume=None):
+ volume=None,
+ vol_name=None):
"""Perform three point check
Args:
@@ -448,14 +449,16 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
kwargs:
volume_status(dict): Volume status output on the give name
volumne(str):Name of volume for which 3 point check has to be done
+ vol_name(str): Name of volume process according to volume status
Returns:
bool: True if memory leak instances are observed else False
"""
# Filter dataframe to be process wise if it's volume specific process
if process in ('glusterfs', 'glusterfsd'):
- pid = int(volume_status[volume][node][process]['pid'])
- dataframe = dataframe[dataframe['Process ID'] == pid]
+ if process == 'glusterfs' and vol_name:
+ pid = int(volume_status[volume][node][vol_name]['pid'])
+ dataframe = dataframe[dataframe['Process ID'] == pid]
# Compute usage gain throught the data frame
memory_increments = list(dataframe['Memory Usage'].diff().dropna())
@@ -476,12 +479,12 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
try:
# Check if memory gain had decrease in the consecutive
# entries, after 2 entry and betwen current and last entry
- if all(memory_increments[instance+1] >
+ if all([memory_increments[instance+1] >
memory_increments[instance],
memory_increments[instance+2] >
memory_increments[instance],
(memory_increments[len(memory_increments)-1] >
- memory_increments[instance])):
+ memory_increments[instance])]):
return True
except IndexError:
@@ -490,7 +493,7 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
g.log.info('Instance at last log entry.')
if process in ('glusterfs', 'glusterfsd'):
cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
- "$6/1024}'" % (pid, process))
+ " $6/1024}'" % (pid, process))
else:
cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
"glusterd$/{print $6/1024}'")
@@ -526,7 +529,7 @@ def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
is_there_a_leak = []
for node in nodes:
dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call 3 point check function
@@ -562,7 +565,7 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -573,7 +576,8 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
# Call 3 point check function
three_point_check = _perform_three_point_check_for_memory_leak(
- dataframe, node, 'glusterfs', gain, volume_status, volume)
+ dataframe, node, 'glusterfs', gain, volume_status, volume,
+ 'Self-heal Daemon')
if three_point_check:
g.log.error("Memory leak observed on node %s in shd "
"on volume %s", node, volume)
@@ -604,7 +608,7 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -615,7 +619,8 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
# Call 3 point check function
three_point_check = _perform_three_point_check_for_memory_leak(
- dataframe, node, 'glusterfsd', gain, volume_status, volume)
+ dataframe, node, 'glusterfsd', gain, volume_status, volume,
+ process)
if three_point_check:
g.log.error("Memory leak observed on node %s in brick "
" process for brick %s on volume %s", node,
@@ -637,7 +642,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
(Default:30)
Returns:
- bool: True if memory leak was obsevred else False
+ bool: True if memory leak was observed else False
NOTE:
This function should be executed when the volume is still mounted.
@@ -646,7 +651,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
for node in nodes:
# Get the volume status on the node
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call 3 point check function
@@ -655,7 +660,25 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
if three_point_check:
g.log.error("Memory leak observed on node %s for client",
node)
- is_there_a_leak.append(three_point_check)
+
+ # If I/O is constantly running on Clients the memory
+ # usage spikes up and stays at a point for long.
+ last_entry = dataframe['Memory Usage'].iloc[-1]
+ cmd = ("ps u -p `pidof glusterfs` | "
+ "awk 'NR>1 && $11~/glusterfs$/{print"
+ " $6/1024}'")
+ ret, out, _ = g.run(node, cmd)
+ if ret:
+ g.log.error('Unable to run the command to fetch current '
+ 'memory utilization.')
+ continue
+
+ if float(out) != last_entry:
+ if float(out) > last_entry:
+ is_there_a_leak.append(True)
+ continue
+
+ is_there_a_leak.append(False)
return any(is_there_a_leak)
@@ -671,9 +694,9 @@ def _check_for_oom_killers(nodes, process, oom_killer_list):
"""
cmd = ("grep -i 'killed process' /var/log/messages* "
"| grep -w '{}'".format(process))
- ret = g.run_parallel(nodes, cmd)
- for key in ret.keys():
- ret, out, _ = ret[key]
+ ret_codes = g.run_parallel(nodes, cmd)
+ for key in ret_codes.keys():
+ ret, out, _ = ret_codes[key]
if not ret:
g.log.error('OOM killer observed on %s for %s', key, process)
g.log.error(out)
@@ -712,7 +735,8 @@ def check_for_oom_killers_on_clients(nodes):
def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
- volume_status=None, volume=None):
+ volume_status=None, volume=None,
+ vol_name=None):
"""Check for cpu spikes for a given process
Args:
@@ -724,13 +748,14 @@ def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
kwargs:
volume_status(dict): Volume status output on the give name
volume(str):Name of volume for which check has to be done
+ vol_name(str): Name of volume process according to volume status
Returns:
bool: True if number of instances more than threshold else False
"""
# Filter dataframe to be process wise if it's volume specific process
if process in ('glusterfs', 'glusterfsd'):
- pid = int(volume_status[volume][node][process]['pid'])
+ pid = int(volume_status[volume][node][vol_name]['pid'])
dataframe = dataframe[dataframe['Process ID'] == pid]
# Check if usage is more than accepted amount of leak
@@ -758,7 +783,7 @@ def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
is_there_a_spike = []
for node in nodes:
dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call function to check for cpu spikes
@@ -795,7 +820,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -807,7 +832,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
# Call function to check for cpu spikes
cpu_spikes = _check_for_cpu_usage_spikes(
dataframe, node, 'glusterfs', threshold, volume_status,
- volume)
+ volume, 'Self-heal Daemon')
if cpu_spikes:
g.log.error("CPU usage spikes observed more than "
"threshold %d on node %s on volume %s for shd",
@@ -839,7 +864,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -851,7 +876,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
# Call function to check for cpu spikes
cpu_spikes = _check_for_cpu_usage_spikes(
dataframe, node, 'glusterfsd', threshold, volume_status,
- volume)
+ volume, process)
if cpu_spikes:
g.log.error("CPU usage spikes observed more than "
"threshold %d on node %s on volume %s for "
@@ -884,7 +909,7 @@ def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
for node in nodes:
# Get the volume status on the node
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call function to check for cpu spikes