summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkshithijiyer <kshithij.ki@gmail.com>2020-10-06 09:05:44 +0530
committerArthy Loganathan <aloganat@redhat.com>2020-10-21 05:21:42 +0000
commit08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
treef8998f6e8304e786f2d96eefc6a82e8f1cfe67b9
parentcd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)
[Test] Add 2 memory leak tests and fix library issues
Scenarios added: ---------------- Test case: 1. Create a volume, start it and mount it. 2. Start I/O from mount point. 3. Check if there are any memory leaks and OOM killers. Test case: 1. Create a volume, start it and mount it. 2. Set features.cache-invalidation to ON. 3. Start I/O from mount point. 4. Run gluster volume heal command in a loop 5. Check if there are any memory leaks and OOM killers on servers. Design change: -------------- - self.id() is moved into test class as it was hitting bound errors in the original logic. - Logic changed for checking leaks fuse. - Fixed breakage in methods where ever needed. Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7 Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
-rwxr-xr-xglustolibs-gluster/glustolibs/gluster/gluster_base_class.py75
-rw-r--r--glustolibs-io/glustolibs/io/memory_and_cpu_utils.py79
-rw-r--r--tests/functional/resource_leak/__init__.py0
-rw-r--r--tests/functional/resource_leak/test_basic_memory_leaks.py120
-rw-r--r--tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py117
5 files changed, 337 insertions, 54 deletions
diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
index baec1be8a..3ce38a304 100755
--- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
@@ -1107,9 +1107,13 @@ class GlusterBaseClass(TestCase):
g.log.info("Teardown nfs ganesha cluster succeeded")
@classmethod
- def start_memory_and_cpu_usage_logging(cls, interval=60, count=100):
+ def start_memory_and_cpu_usage_logging(cls, test_id, interval=60,
+ count=100):
"""Upload logger script and start logging usage on cluster
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
Kawrgs:
interval(int): Time interval after which logs are to be collected
(Default: 60)
@@ -1137,16 +1141,18 @@ class GlusterBaseClass(TestCase):
# Start logging on servers and clients
proc_dict = log_memory_and_cpu_usage_on_cluster(
- cls.servers, cls.clients, cls.id(), interval, count)
+ cls.servers, cls.clients, test_id, interval, count)
return proc_dict
@classmethod
- def compute_and_print_usage_stats(cls, proc_dict, kill_proc=False):
+ def compute_and_print_usage_stats(cls, test_id, proc_dict,
+ kill_proc=False):
"""Compute and print CPU and memory usage statistics
Args:
proc_dict(dict):Dictionary of logging processes
+ test_id(str): ID of the test running fetched from self.id()
Kwargs:
kill_proc(bool): Kill logging process if true else wait
@@ -1172,21 +1178,25 @@ class GlusterBaseClass(TestCase):
g.log.error("Processes didn't complete still running.")
# Compute and print stats for servers
- ret = compute_data_usage_stats_on_servers(cls.servers, cls.id())
+ ret = compute_data_usage_stats_on_servers(cls.servers, test_id)
g.log.info('*' * 50)
g.log.info(ret) # TODO: Make logged message more structured
g.log.info('*' * 50)
# Compute and print stats for clients
- ret = compute_data_usage_stats_on_clients(cls.clients, cls.id())
+ ret = compute_data_usage_stats_on_clients(cls.clients, test_id)
g.log.info('*' * 50)
g.log.info(ret) # TODO: Make logged message more structured
g.log.info('*' * 50)
@classmethod
- def check_for_memory_leaks_and_oom_kills_on_servers(cls, gain=30.0):
+ def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id,
+ gain=30.0):
"""Check for memory leaks and OOM kills on servers
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
Kwargs:
gain(float): Accepted amount of leak for a given testcase in MB
(Default:30)
@@ -1204,31 +1214,35 @@ class GlusterBaseClass(TestCase):
check_for_oom_killers_on_servers)
# Check for memory leaks on glusterd
- if check_for_memory_leaks_in_glusterd(cls.servers, cls.id(), gain):
+ if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain):
g.log.error("Memory leak on glusterd.")
return True
- # Check for memory leaks on shd
- if check_for_memory_leaks_in_glusterfs(cls.servers, cls.id(), gain):
- g.log.error("Memory leak on shd.")
- return True
+ if cls.volume_type != "distributed":
+ # Check for memory leaks on shd
+ if check_for_memory_leaks_in_glusterfs(cls.servers, test_id,
+ gain):
+ g.log.error("Memory leak on shd.")
+ return True
# Check for memory leaks on brick processes
- if check_for_memory_leaks_in_glusterfsd(cls.servers, cls.id(), gain):
+ if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain):
g.log.error("Memory leak on brick process.")
return True
# Check OOM kills on servers for all gluster server processes
- ret = check_for_oom_killers_on_servers(cls.servers)
- if not ret:
+ if check_for_oom_killers_on_servers(cls.servers):
g.log.error('OOM kills present on servers.')
return True
return False
@classmethod
- def check_for_memory_leaks_and_oom_kills_on_clients(cls, gain=30):
+ def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30):
"""Check for memory leaks and OOM kills on clients
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
Kwargs:
gain(float): Accepted amount of leak for a given testcase in MB
(Default:30)
@@ -1244,7 +1258,7 @@ class GlusterBaseClass(TestCase):
check_for_oom_killers_on_clients)
# Check for memory leak on glusterfs fuse process
- if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, cls.id(),
+ if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id,
gain):
g.log.error("Memory leaks observed on FUSE clients.")
return True
@@ -1256,9 +1270,12 @@ class GlusterBaseClass(TestCase):
return False
@classmethod
- def check_for_cpu_usage_spikes_on_servers(cls, threshold=3):
+ def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3):
"""Check for CPU usage spikes on servers
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
Kwargs:
threshold(int): Accepted amount of instances of 100% CPU usage
(Default:3)
@@ -1274,21 +1291,22 @@ class GlusterBaseClass(TestCase):
check_for_cpu_usage_spikes_on_glusterfsd)
# Check for CPU usage spikes on glusterd
- if check_for_cpu_usage_spikes_on_glusterd(cls.servers, cls.id(),
+ if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id,
threshold):
g.log.error("CPU usage spikes observed more than threshold "
"on glusterd.")
return True
- # Check for CPU usage spikes on shd
- if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, cls.id(),
- threshold):
- g.log.error("CPU usage spikes observed more than threshold "
- "on shd.")
- return True
+ if cls.volume_type != "distributed":
+ # Check for CPU usage spikes on shd
+ if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id,
+ threshold):
+ g.log.error("CPU usage spikes observed more than threshold "
+ "on shd.")
+ return True
# Check for CPU usage spikes on brick processes
- if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, cls.id(),
+ if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id,
threshold):
g.log.error("CPU usage spikes observed more than threshold "
"on shd.")
@@ -1296,9 +1314,12 @@ class GlusterBaseClass(TestCase):
return False
@classmethod
- def check_for_cpu_spikes_on_clients(cls, threshold=3):
+ def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3):
"""Check for CPU usage spikes on clients
+ Args:
+ test_id(str): ID of the test running fetched from self.id()
+
Kwargs:
threshold(int): Accepted amount of instances of 100% CPU usage
(Default:3)
@@ -1312,6 +1333,6 @@ class GlusterBaseClass(TestCase):
check_for_cpu_usage_spikes_on_glusterfs_fuse)
ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients,
- cls.id(),
+ test_id,
threshold)
return ret
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
index 3d105bf5e..4e1dadbd7 100644
--- a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -363,7 +363,7 @@ def compute_data_usage_stats_on_servers(nodes, test_name):
# Generate a dataframe from the csv file
dataframe = create_dataframe_from_csv(node, process, test_name)
- if not dataframe:
+ if dataframe.empty:
return {}
data_dict[node][process] = {}
@@ -424,7 +424,7 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
for node in nodes:
data_dict[node] = {}
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return {}
data_dict[node]['glusterfs'] = {}
@@ -436,7 +436,8 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
volume_status=None,
- volume=None):
+ volume=None,
+ vol_name=None):
"""Perform three point check
Args:
@@ -448,14 +449,16 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
kwargs:
volume_status(dict): Volume status output on the give name
volumne(str):Name of volume for which 3 point check has to be done
+ vol_name(str): Name of volume process according to volume status
Returns:
bool: True if memory leak instances are observed else False
"""
# Filter dataframe to be process wise if it's volume specific process
if process in ('glusterfs', 'glusterfsd'):
- pid = int(volume_status[volume][node][process]['pid'])
- dataframe = dataframe[dataframe['Process ID'] == pid]
+ if process == 'glusterfs' and vol_name:
+ pid = int(volume_status[volume][node][vol_name]['pid'])
+ dataframe = dataframe[dataframe['Process ID'] == pid]
# Compute usage gain throught the data frame
memory_increments = list(dataframe['Memory Usage'].diff().dropna())
@@ -476,12 +479,12 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
try:
# Check if memory gain had decrease in the consecutive
# entries, after 2 entry and betwen current and last entry
- if all(memory_increments[instance+1] >
+ if all([memory_increments[instance+1] >
memory_increments[instance],
memory_increments[instance+2] >
memory_increments[instance],
(memory_increments[len(memory_increments)-1] >
- memory_increments[instance])):
+ memory_increments[instance])]):
return True
except IndexError:
@@ -490,7 +493,7 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
g.log.info('Instance at last log entry.')
if process in ('glusterfs', 'glusterfsd'):
cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
- "$6/1024}'" % (pid, process))
+ " $6/1024}'" % (pid, process))
else:
cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
"glusterd$/{print $6/1024}'")
@@ -526,7 +529,7 @@ def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
is_there_a_leak = []
for node in nodes:
dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call 3 point check function
@@ -562,7 +565,7 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -573,7 +576,8 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
# Call 3 point check function
three_point_check = _perform_three_point_check_for_memory_leak(
- dataframe, node, 'glusterfs', gain, volume_status, volume)
+ dataframe, node, 'glusterfs', gain, volume_status, volume,
+ 'Self-heal Daemon')
if three_point_check:
g.log.error("Memory leak observed on node %s in shd "
"on volume %s", node, volume)
@@ -604,7 +608,7 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -615,7 +619,8 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
# Call 3 point check function
three_point_check = _perform_three_point_check_for_memory_leak(
- dataframe, node, 'glusterfsd', gain, volume_status, volume)
+ dataframe, node, 'glusterfsd', gain, volume_status, volume,
+ process)
if three_point_check:
g.log.error("Memory leak observed on node %s in brick "
" process for brick %s on volume %s", node,
@@ -637,7 +642,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
(Default:30)
Returns:
- bool: True if memory leak was obsevred else False
+ bool: True if memory leak was observed else False
NOTE:
This function should be executed when the volume is still mounted.
@@ -646,7 +651,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
for node in nodes:
# Get the volume status on the node
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call 3 point check function
@@ -655,7 +660,25 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
if three_point_check:
g.log.error("Memory leak observed on node %s for client",
node)
- is_there_a_leak.append(three_point_check)
+
+ # If I/O is constantly running on Clients the memory
+ # usage spikes up and stays at a point for long.
+ last_entry = dataframe['Memory Usage'].iloc[-1]
+ cmd = ("ps u -p `pidof glusterfs` | "
+ "awk 'NR>1 && $11~/glusterfs$/{print"
+ " $6/1024}'")
+ ret, out, _ = g.run(node, cmd)
+ if ret:
+ g.log.error('Unable to run the command to fetch current '
+ 'memory utilization.')
+ continue
+
+ if float(out) != last_entry:
+ if float(out) > last_entry:
+ is_there_a_leak.append(True)
+ continue
+
+ is_there_a_leak.append(False)
return any(is_there_a_leak)
@@ -671,9 +694,9 @@ def _check_for_oom_killers(nodes, process, oom_killer_list):
"""
cmd = ("grep -i 'killed process' /var/log/messages* "
"| grep -w '{}'".format(process))
- ret = g.run_parallel(nodes, cmd)
- for key in ret.keys():
- ret, out, _ = ret[key]
+ ret_codes = g.run_parallel(nodes, cmd)
+ for key in ret_codes.keys():
+ ret, out, _ = ret_codes[key]
if not ret:
g.log.error('OOM killer observed on %s for %s', key, process)
g.log.error(out)
@@ -712,7 +735,8 @@ def check_for_oom_killers_on_clients(nodes):
def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
- volume_status=None, volume=None):
+ volume_status=None, volume=None,
+ vol_name=None):
"""Check for cpu spikes for a given process
Args:
@@ -724,13 +748,14 @@ def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
kwargs:
volume_status(dict): Volume status output on the give name
volume(str):Name of volume for which check has to be done
+ vol_name(str): Name of volume process according to volume status
Returns:
bool: True if number of instances more than threshold else False
"""
# Filter dataframe to be process wise if it's volume specific process
if process in ('glusterfs', 'glusterfsd'):
- pid = int(volume_status[volume][node][process]['pid'])
+ pid = int(volume_status[volume][node][vol_name]['pid'])
dataframe = dataframe[dataframe['Process ID'] == pid]
# Check if usage is more than accepted amount of leak
@@ -758,7 +783,7 @@ def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
is_there_a_spike = []
for node in nodes:
dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call function to check for cpu spikes
@@ -795,7 +820,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -807,7 +832,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
# Call function to check for cpu spikes
cpu_spikes = _check_for_cpu_usage_spikes(
dataframe, node, 'glusterfs', threshold, volume_status,
- volume)
+ volume, 'Self-heal Daemon')
if cpu_spikes:
g.log.error("CPU usage spikes observed more than "
"threshold %d on node %s on volume %s for shd",
@@ -839,7 +864,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
# Get the volume status on the node
volume_status = get_volume_status(node)
dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
for volume in volume_status.keys():
@@ -851,7 +876,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
# Call function to check for cpu spikes
cpu_spikes = _check_for_cpu_usage_spikes(
dataframe, node, 'glusterfsd', threshold, volume_status,
- volume)
+ volume, process)
if cpu_spikes:
g.log.error("CPU usage spikes observed more than "
"threshold %d on node %s on volume %s for "
@@ -884,7 +909,7 @@ def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
for node in nodes:
# Get the volume status on the node
dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
- if not dataframe:
+ if dataframe.empty:
return False
# Call function to check for cpu spikes
diff --git a/tests/functional/resource_leak/__init__.py b/tests/functional/resource_leak/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/functional/resource_leak/__init__.py
diff --git a/tests/functional/resource_leak/test_basic_memory_leaks.py b/tests/functional/resource_leak/test_basic_memory_leaks.py
new file mode 100644
index 000000000..46b2c0c6d
--- /dev/null
+++ b/tests/functional/resource_leak/test_basic_memory_leaks.py
@@ -0,0 +1,120 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.io.utils import (run_linux_untar, validate_io_procs,
+ wait_for_io_to_complete)
+from glustolibs.io.memory_and_cpu_utils import (
+ wait_for_logging_processes_to_stop)
+
+
+@runs_on([['distributed-replicated', 'distributed-arbiter',
+ 'distributed-dispersed', 'distributed', 'replicated',
+ 'arbiter', 'dispersed'], ['glusterfs']])
+class TestBasicMemoryleak(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Set test_id for get gathering
+ self.test_id = self.id()
+
+ # Set I/O flag to false
+ self.is_io_running = False
+
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume(self.mounts)
+ if not ret:
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
+
+ def tearDown(self):
+
+ # Wait for I/O to complete
+ if self.is_io_running:
+ if wait_for_io_to_complete(self.list_of_io_processes,
+ self.mounts):
+ raise ExecutionError("Failed to wait for I/O to complete")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume(self.mounts)
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+ self.get_super_method(self, 'tearDown')()
+
+ def test_basic_memory_leak(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Start I/O from mount point.
+ 3. Check if there are any memory leaks and OOM killers.
+ """
+ # Start monitoring resource usage on servers and clients
+ monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+ self.test_id, count=30)
+ self.assertIsNotNone(monitor_proc_dict,
+ "Failed to start monitoring on servers and "
+ "clients")
+
+ # Create a dir to start untar
+ self.linux_untar_dir = "{}/{}".format(self.mounts[1].mountpoint,
+ "linuxuntar")
+ ret = mkdir(self.mounts[1].client_system, self.linux_untar_dir)
+ self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+ # Start multiple I/O from mount points
+ self.list_of_io_processes = []
+ cmd = ("cd {};for i in `seq 1 100`; do mkdir dir.$i ;"
+ "for j in `seq 1 1000`; do dd if=/dev/random "
+ "of=dir.$i/testfile.$j bs=1k count=10;done;done"
+ .format(self.mounts[0].mountpoint))
+ ret = g.run_async(self.mounts[0].client_system, cmd)
+ self.list_of_io_processes = [ret]
+
+ # Start linux untar on dir linuxuntar
+ ret = run_linux_untar(self.mounts[1].client_system,
+ self.mounts[1].mountpoint,
+ dirs=tuple(['linuxuntar']))
+ self.list_of_io_processes += ret
+ self.is_io_running = True
+
+ # Wait for I/O to complete and validate I/O on mount points
+ ret = validate_io_procs(self.list_of_io_processes, self.mounts)
+ self.assertTrue(ret, "I/O failed on mount point")
+ self.is_io_running = False
+
+ # Wait for monitoring processes to complete
+ ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+ cluster=True)
+ self.assertTrue(ret,
+ "ERROR: Failed to stop monitoring processes")
+
+ # Check if there are any memory leaks and OOM killers
+ ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on servers")
+
+ ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on clients")
+ g.log.info("No memory leaks or OOM kills found on serves and clients")
diff --git a/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py
new file mode 100644
index 000000000..3a22a5068
--- /dev/null
+++ b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py
@@ -0,0 +1,117 @@
+# Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along`
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete)
+from glustolibs.io.memory_and_cpu_utils import (
+ wait_for_logging_processes_to_stop)
+
+
+@runs_on([['distributed-replicated', 'distributed-arbiter',
+ 'distributed-dispersed', 'replicated',
+ 'arbiter', 'dispersed'], ['glusterfs']])
+class TestMemoryLeakInShdWithCacheInvalidationOn(GlusterBaseClass):
+
+ def setUp(self):
+
+ self.get_super_method(self, 'setUp')()
+
+ # Set test_id for get gathering
+ self.test_id = self.id()
+
+ # Set I/O flag to false
+ self.is_io_running = False
+
+ # Creating Volume and mounting the volume
+ ret = self.setup_volume_and_mount_volume([self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Volume creation or mount failed: %s"
+ % self.volname)
+
+ def tearDown(self):
+
+ # Wait for I/O to complete
+ if self.is_io_running:
+ if wait_for_io_to_complete(self.list_of_io_processes,
+ self.mounts[0]):
+ raise ExecutionError("Failed to wait for I/O to complete")
+
+ # Unmounting and cleaning volume
+ ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
+ if not ret:
+ raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+ self.get_super_method(self, 'tearDown')()
+
+ def test_memory_leak_in_shd_with_cache_invalidation_on(self):
+ """
+ Test case:
+ 1. Create a volume, start it and mount it.
+ 2. Set features.cache-invalidation to ON.
+ 3. Start I/O from mount point.
+ 4. Run gluster volume heal command in a loop
+ 5. Check if there are any memory leaks and OOM killers on servers.
+ """
+ # Start monitoring resource usage on servers and clients
+ monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+ self.test_id, count=10)
+ self.assertIsNotNone(monitor_proc_dict,
+ "Failed to start monitoring on servers and"
+ " clients")
+
+ # Set features.cache-invalidation to ON
+ ret = set_volume_options(self.mnode, self.volname,
+ {'features.cache-invalidation': 'on'})
+ self.assertTrue(ret, "Failed to set features.cache-invalidation to ON")
+ g.log.info("Successfully set features.cache-invalidation to ON")
+
+ # Start multiple I/O from mount points
+ self.list_of_io_processes = []
+ cmd = ("cd {};for i in `seq 1 1000`;do echo 'abc' > myfile;done"
+ .format(self.mounts[0].mountpoint))
+ ret = g.run_async(self.mounts[0].client_system, cmd)
+ self.list_of_io_processes = [ret]
+ self.is_io_running = True
+
+ # Run gluster volume heal command in a loop for 100 iterations
+ for iteration in range(0, 100):
+ g.log.info("Running gluster volume heal command for %d time",
+ iteration)
+ ret = trigger_heal(self.mnode, self.volname)
+ self.assertTrue(ret, "Heal command triggered successfully")
+ g.log.info("Ran gluster volume heal command in a loop for "
+ "100 iterations.")
+
+ # Wait for I/O to complete and validate I/O on mount points
+ ret = validate_io_procs(self.list_of_io_processes, self.mounts[0])
+ self.assertTrue(ret, "I/O failed on mount point")
+ self.is_io_running = False
+
+ # Wait for monitoring processes to complete
+ ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+ cluster=True)
+ self.assertTrue(ret,
+ "ERROR: Failed to stop monitoring processes")
+
+ # Check if there are any memory leaks and OOM killers
+ ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+ self.test_id)
+ self.assertFalse(ret,
+ "Memory leak and OOM kills check failed on servers")