[Test] Add 2 memory leak tests and fix library issues

Scenarios added: ---------------- Test case: 1. Create a volume, start it and mount it. 2. Start I/O from mount point. 3. Check if there are any memory leaks and OOM killers. Test case: 1. Create a volume, start it and mount it. 2. Set features.cache-invalidation to ON. 3. Start I/O from mount point. 4. Run gluster volume heal command in a loop 5. Check if there are any memory leaks and OOM killers on servers. Design change: -------------- - self.id() is moved into test class as it was hitting bound errors in the original logic. - Logic changed for checking leaks fuse. - Fixed breakage in methods where ever needed. Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7 Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
author: kshithijiyer <kshithij.ki@gmail.com> 2020-10-06 09:05:44 +0530
committer: Arthy Loganathan <aloganat@redhat.com> 2020-10-21 05:21:42 +0000
commit: 08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
tree: f8998f6e8304e786f2d96eefc6a82e8f1cfe67b9
parent: cd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)
5 files changed, 337 insertions, 54 deletions
diff --git a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
index baec1be8a..3ce38a304 100755
--- a/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
+++ b/glustolibs-gluster/glustolibs/gluster/gluster_base_class.py
@@ -1107,9 +1107,13 @@ class GlusterBaseClass(TestCase):
         g.log.info("Teardown nfs ganesha cluster succeeded")
 
     @classmethod
-    def start_memory_and_cpu_usage_logging(cls, interval=60, count=100):
+    def start_memory_and_cpu_usage_logging(cls, test_id, interval=60,
+                                           count=100):
         """Upload logger script and start logging usage on cluster
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kawrgs:
          interval(int): Time interval after which logs are to be collected
                         (Default: 60)
@@ -1137,16 +1141,18 @@ class GlusterBaseClass(TestCase):
 
         # Start logging on servers and clients
         proc_dict = log_memory_and_cpu_usage_on_cluster(
-            cls.servers, cls.clients, cls.id(), interval, count)
+            cls.servers, cls.clients, test_id, interval, count)
 
         return proc_dict
 
     @classmethod
-    def compute_and_print_usage_stats(cls, proc_dict, kill_proc=False):
+    def compute_and_print_usage_stats(cls, test_id, proc_dict,
+                                      kill_proc=False):
         """Compute and print CPU and memory usage statistics
 
         Args:
          proc_dict(dict):Dictionary of logging processes
+         test_id(str): ID of the test running fetched from self.id()
 
         Kwargs:
          kill_proc(bool): Kill logging process if true else wait
@@ -1172,21 +1178,25 @@ class GlusterBaseClass(TestCase):
                 g.log.error("Processes didn't complete still running.")
 
         # Compute and print stats for servers
-        ret = compute_data_usage_stats_on_servers(cls.servers, cls.id())
+        ret = compute_data_usage_stats_on_servers(cls.servers, test_id)
         g.log.info('*' * 50)
         g.log.info(ret)  # TODO: Make logged message more structured
         g.log.info('*' * 50)
 
         # Compute and print stats for clients
-        ret = compute_data_usage_stats_on_clients(cls.clients, cls.id())
+        ret = compute_data_usage_stats_on_clients(cls.clients, test_id)
         g.log.info('*' * 50)
         g.log.info(ret)  # TODO: Make logged message more structured
         g.log.info('*' * 50)
 
     @classmethod
-    def check_for_memory_leaks_and_oom_kills_on_servers(cls, gain=30.0):
+    def check_for_memory_leaks_and_oom_kills_on_servers(cls, test_id,
+                                                        gain=30.0):
         """Check for memory leaks and OOM kills on servers
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          gain(float): Accepted amount of leak for a given testcase in MB
                       (Default:30)
@@ -1204,31 +1214,35 @@ class GlusterBaseClass(TestCase):
             check_for_oom_killers_on_servers)
 
         # Check for memory leaks on glusterd
-        if check_for_memory_leaks_in_glusterd(cls.servers, cls.id(), gain):
+        if check_for_memory_leaks_in_glusterd(cls.servers, test_id, gain):
             g.log.error("Memory leak on glusterd.")
             return True
 
-        # Check for memory leaks on shd
-        if check_for_memory_leaks_in_glusterfs(cls.servers, cls.id(), gain):
-            g.log.error("Memory leak on shd.")
-            return True
+        if cls.volume_type != "distributed":
+            # Check for memory leaks on shd
+            if check_for_memory_leaks_in_glusterfs(cls.servers, test_id,
+                                                   gain):
+                g.log.error("Memory leak on shd.")
+                return True
 
         # Check for memory leaks on brick processes
-        if check_for_memory_leaks_in_glusterfsd(cls.servers, cls.id(), gain):
+        if check_for_memory_leaks_in_glusterfsd(cls.servers, test_id, gain):
             g.log.error("Memory leak on brick process.")
             return True
 
         # Check OOM kills on servers for all gluster server processes
-        ret = check_for_oom_killers_on_servers(cls.servers)
-        if not ret:
+        if check_for_oom_killers_on_servers(cls.servers):
             g.log.error('OOM kills present on servers.')
             return True
         return False
 
     @classmethod
-    def check_for_memory_leaks_and_oom_kills_on_clients(cls, gain=30):
+    def check_for_memory_leaks_and_oom_kills_on_clients(cls, test_id, gain=30):
         """Check for memory leaks and OOM kills on clients
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          gain(float): Accepted amount of leak for a given testcase in MB
                       (Default:30)
@@ -1244,7 +1258,7 @@ class GlusterBaseClass(TestCase):
             check_for_oom_killers_on_clients)
 
         # Check for memory leak on glusterfs fuse process
-        if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, cls.id(),
+        if check_for_memory_leaks_in_glusterfs_fuse(cls.clients, test_id,
                                                     gain):
             g.log.error("Memory leaks observed on FUSE clients.")
             return True
@@ -1256,9 +1270,12 @@ class GlusterBaseClass(TestCase):
         return False
 
     @classmethod
-    def check_for_cpu_usage_spikes_on_servers(cls, threshold=3):
+    def check_for_cpu_usage_spikes_on_servers(cls, test_id, threshold=3):
         """Check for CPU usage spikes on servers
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          threshold(int): Accepted amount of instances of 100% CPU usage
                         (Default:3)
@@ -1274,21 +1291,22 @@ class GlusterBaseClass(TestCase):
             check_for_cpu_usage_spikes_on_glusterfsd)
 
         # Check for CPU usage spikes on glusterd
-        if check_for_cpu_usage_spikes_on_glusterd(cls.servers, cls.id(),
+        if check_for_cpu_usage_spikes_on_glusterd(cls.servers, test_id,
                                                   threshold):
             g.log.error("CPU usage spikes observed more than threshold "
                         "on glusterd.")
             return True
 
-        # Check for CPU usage spikes on shd
-        if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, cls.id(),
-                                                   threshold):
-            g.log.error("CPU usage spikes observed more than threshold "
-                        "on shd.")
-            return True
+        if cls.volume_type != "distributed":
+            # Check for CPU usage spikes on shd
+            if check_for_cpu_usage_spikes_on_glusterfs(cls.servers, test_id,
+                                                       threshold):
+                g.log.error("CPU usage spikes observed more than threshold "
+                            "on shd.")
+                return True
 
         # Check for CPU usage spikes on brick processes
-        if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, cls.id(),
+        if check_for_cpu_usage_spikes_on_glusterfsd(cls.servers, test_id,
                                                     threshold):
             g.log.error("CPU usage spikes observed more than threshold "
                         "on shd.")
@@ -1296,9 +1314,12 @@ class GlusterBaseClass(TestCase):
         return False
 
     @classmethod
-    def check_for_cpu_spikes_on_clients(cls, threshold=3):
+    def check_for_cpu_spikes_on_clients(cls, test_id, threshold=3):
         """Check for CPU usage spikes on clients
 
+        Args:
+         test_id(str): ID of the test running fetched from self.id()
+
         Kwargs:
          threshold(int): Accepted amount of instances of 100% CPU usage
                         (Default:3)
@@ -1312,6 +1333,6 @@ class GlusterBaseClass(TestCase):
             check_for_cpu_usage_spikes_on_glusterfs_fuse)
 
         ret = check_for_cpu_usage_spikes_on_glusterfs_fuse(cls.clients,
-                                                           cls.id(),
+                                                           test_id,
                                                            threshold)
         return ret
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
index 3d105bf5e..4e1dadbd7 100644
--- a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -363,7 +363,7 @@ def compute_data_usage_stats_on_servers(nodes, test_name):
 
             # Generate a dataframe from the csv file
             dataframe = create_dataframe_from_csv(node, process, test_name)
-            if not dataframe:
+            if dataframe.empty:
                 return {}
 
             data_dict[node][process] = {}
@@ -424,7 +424,7 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
     for node in nodes:
         data_dict[node] = {}
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return {}
 
         data_dict[node]['glusterfs'] = {}
@@ -436,7 +436,8 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
 
 def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
                                                volume_status=None,
-                                               volume=None):
+                                               volume=None,
+                                               vol_name=None):
     """Perform three point check
 
     Args:
@@ -448,14 +449,16 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
     kwargs:
      volume_status(dict): Volume status output on the give name
      volumne(str):Name of volume for which 3 point check has to be done
+     vol_name(str): Name of volume process according to volume status
 
     Returns:
      bool: True if memory leak instances are observed else False
     """
     # Filter dataframe to be process wise if it's volume specific process
     if process in ('glusterfs', 'glusterfsd'):
-        pid = int(volume_status[volume][node][process]['pid'])
-        dataframe = dataframe[dataframe['Process ID'] == pid]
+        if process == 'glusterfs' and vol_name:
+            pid = int(volume_status[volume][node][vol_name]['pid'])
+            dataframe = dataframe[dataframe['Process ID'] == pid]
 
     # Compute usage gain throught the data frame
     memory_increments = list(dataframe['Memory Usage'].diff().dropna())
@@ -476,12 +479,12 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
             try:
                 # Check if memory gain had decrease in the consecutive
                 # entries, after 2 entry and betwen current and last entry
-                if all(memory_increments[instance+1] >
+                if all([memory_increments[instance+1] >
                        memory_increments[instance],
                        memory_increments[instance+2] >
                        memory_increments[instance],
                        (memory_increments[len(memory_increments)-1] >
-                        memory_increments[instance])):
+                        memory_increments[instance])]):
                     return True
 
             except IndexError:
@@ -490,7 +493,7 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
                 g.log.info('Instance at last log entry.')
                 if process in ('glusterfs', 'glusterfsd'):
                     cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
-                           "$6/1024}'" % (pid, process))
+                           " $6/1024}'" % (pid, process))
                 else:
                     cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
                            "glusterd$/{print $6/1024}'")
@@ -526,7 +529,7 @@ def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
     is_there_a_leak = []
     for node in nodes:
         dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call 3 point check function
@@ -562,7 +565,7 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -573,7 +576,8 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
 
                 # Call 3 point check function
                 three_point_check = _perform_three_point_check_for_memory_leak(
-                    dataframe, node, 'glusterfs', gain, volume_status, volume)
+                    dataframe, node, 'glusterfs', gain, volume_status, volume,
+                    'Self-heal Daemon')
                 if three_point_check:
                     g.log.error("Memory leak observed on node %s in shd "
                                 "on volume %s", node, volume)
@@ -604,7 +608,7 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -615,7 +619,8 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
 
                 # Call 3 point check function
                 three_point_check = _perform_three_point_check_for_memory_leak(
-                    dataframe, node, 'glusterfsd', gain, volume_status, volume)
+                    dataframe, node, 'glusterfsd', gain, volume_status, volume,
+                    process)
                 if three_point_check:
                     g.log.error("Memory leak observed on node %s in brick "
                                 " process for brick %s on volume %s", node,
@@ -637,7 +642,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
                   (Default:30)
 
     Returns:
-      bool: True if memory leak was obsevred else False
+      bool: True if memory leak was observed else False
 
     NOTE:
      This function should be executed when the volume is still mounted.
@@ -646,7 +651,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
     for node in nodes:
         # Get the volume status on the node
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call 3 point check function
@@ -655,7 +660,25 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
         if three_point_check:
             g.log.error("Memory leak observed on node %s for client",
                         node)
-        is_there_a_leak.append(three_point_check)
+
+            # If I/O is constantly running on Clients the memory
+            # usage spikes up and stays at a point for long.
+            last_entry = dataframe['Memory Usage'].iloc[-1]
+            cmd = ("ps u -p `pidof glusterfs` | "
+                   "awk 'NR>1 && $11~/glusterfs$/{print"
+                   " $6/1024}'")
+            ret, out, _ = g.run(node, cmd)
+            if ret:
+                g.log.error('Unable to run the command to fetch current '
+                            'memory utilization.')
+                continue
+
+            if float(out) != last_entry:
+                if float(out) > last_entry:
+                    is_there_a_leak.append(True)
+                    continue
+
+        is_there_a_leak.append(False)
 
     return any(is_there_a_leak)
 
@@ -671,9 +694,9 @@ def _check_for_oom_killers(nodes, process, oom_killer_list):
     """
     cmd = ("grep -i 'killed process' /var/log/messages* "
            "| grep -w '{}'".format(process))
-    ret = g.run_parallel(nodes, cmd)
-    for key in ret.keys():
-        ret, out, _ = ret[key]
+    ret_codes = g.run_parallel(nodes, cmd)
+    for key in ret_codes.keys():
+        ret, out, _ = ret_codes[key]
         if not ret:
             g.log.error('OOM killer observed on %s for %s', key, process)
             g.log.error(out)
@@ -712,7 +735,8 @@ def check_for_oom_killers_on_clients(nodes):
 
 
 def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
-                                volume_status=None, volume=None):
+                                volume_status=None, volume=None,
+                                vol_name=None):
     """Check for cpu spikes for a given process
 
     Args:
@@ -724,13 +748,14 @@ def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
     kwargs:
      volume_status(dict): Volume status output on the give name
      volume(str):Name of volume for which check has to be done
+     vol_name(str): Name of volume process according to volume status
 
     Returns:
      bool: True if number of instances more than threshold else False
     """
     # Filter dataframe to be process wise if it's volume specific process
     if process in ('glusterfs', 'glusterfsd'):
-        pid = int(volume_status[volume][node][process]['pid'])
+        pid = int(volume_status[volume][node][vol_name]['pid'])
         dataframe = dataframe[dataframe['Process ID'] == pid]
 
     # Check if usage is more than accepted amount of leak
@@ -758,7 +783,7 @@ def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
     is_there_a_spike = []
     for node in nodes:
         dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call function to check for cpu spikes
@@ -795,7 +820,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -807,7 +832,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
                 # Call function to check for cpu spikes
                 cpu_spikes = _check_for_cpu_usage_spikes(
                     dataframe, node, 'glusterfs', threshold, volume_status,
-                    volume)
+                    volume, 'Self-heal Daemon')
                 if cpu_spikes:
                     g.log.error("CPU usage spikes observed more than "
                                 "threshold %d on node %s on volume %s for shd",
@@ -839,7 +864,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -851,7 +876,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
                 # Call function to check for cpu spikes
                 cpu_spikes = _check_for_cpu_usage_spikes(
                     dataframe, node, 'glusterfsd', threshold, volume_status,
-                    volume)
+                    volume, process)
                 if cpu_spikes:
                     g.log.error("CPU usage spikes observed more than "
                                 "threshold %d on node %s on volume %s for "
@@ -884,7 +909,7 @@ def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
     for node in nodes:
         # Get the volume status on the node
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call function to check for cpu spikes
diff --git a/tests/functional/resource_leak/__init__.py b/tests/functional/resource_leak/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tests/functional/resource_leak/__init__.py
diff --git a/tests/functional/resource_leak/test_basic_memory_leaks.py b/tests/functional/resource_leak/test_basic_memory_leaks.py
new file mode 100644
index 000000000..46b2c0c6d
--- /dev/null
+++ b/tests/functional/resource_leak/test_basic_memory_leaks.py
@@ -0,0 +1,120 @@
+#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along`
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.glusterdir import mkdir
+from glustolibs.io.utils import (run_linux_untar, validate_io_procs,
+                                 wait_for_io_to_complete)
+from glustolibs.io.memory_and_cpu_utils import (
+    wait_for_logging_processes_to_stop)
+
+
+@runs_on([['distributed-replicated', 'distributed-arbiter',
+           'distributed-dispersed', 'distributed', 'replicated',
+           'arbiter', 'dispersed'], ['glusterfs']])
+class TestBasicMemoryleak(GlusterBaseClass):
+
+    def setUp(self):
+
+        self.get_super_method(self, 'setUp')()
+
+        # Set test_id for get gathering
+        self.test_id = self.id()
+
+        # Set I/O flag to false
+        self.is_io_running = False
+
+        # Creating Volume and mounting the volume
+        ret = self.setup_volume_and_mount_volume(self.mounts)
+        if not ret:
+            raise ExecutionError("Volume creation or mount failed: %s"
+                                 % self.volname)
+
+    def tearDown(self):
+
+        # Wait for I/O to complete
+        if self.is_io_running:
+            if wait_for_io_to_complete(self.list_of_io_processes,
+                                       self.mounts):
+                raise ExecutionError("Failed to wait for I/O to complete")
+
+        # Unmounting and cleaning volume
+        ret = self.unmount_volume_and_cleanup_volume(self.mounts)
+        if not ret:
+            raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+        self.get_super_method(self, 'tearDown')()
+
+    def test_basic_memory_leak(self):
+        """
+        Test case:
+        1. Create a volume, start it and mount it.
+        2. Start I/O from mount point.
+        3. Check if there are any memory leaks and OOM killers.
+        """
+        # Start monitoring resource usage on servers and clients
+        monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+            self.test_id, count=30)
+        self.assertIsNotNone(monitor_proc_dict,
+                             "Failed to start monitoring on servers and "
+                             "clients")
+
+        # Create a dir to start untar
+        self.linux_untar_dir = "{}/{}".format(self.mounts[1].mountpoint,
+                                              "linuxuntar")
+        ret = mkdir(self.mounts[1].client_system, self.linux_untar_dir)
+        self.assertTrue(ret, "Failed to create dir linuxuntar for untar")
+
+        # Start multiple I/O from mount points
+        self.list_of_io_processes = []
+        cmd = ("cd {};for i in `seq 1 100`; do mkdir dir.$i ;"
+               "for j in `seq 1 1000`; do dd if=/dev/random "
+               "of=dir.$i/testfile.$j bs=1k count=10;done;done"
+               .format(self.mounts[0].mountpoint))
+        ret = g.run_async(self.mounts[0].client_system, cmd)
+        self.list_of_io_processes = [ret]
+
+        # Start linux untar on dir linuxuntar
+        ret = run_linux_untar(self.mounts[1].client_system,
+                              self.mounts[1].mountpoint,
+                              dirs=tuple(['linuxuntar']))
+        self.list_of_io_processes += ret
+        self.is_io_running = True
+
+        # Wait for I/O to complete and validate I/O on mount points
+        ret = validate_io_procs(self.list_of_io_processes, self.mounts)
+        self.assertTrue(ret, "I/O failed on mount point")
+        self.is_io_running = False
+
+        # Wait for monitoring processes to complete
+        ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+                                                 cluster=True)
+        self.assertTrue(ret,
+                        "ERROR: Failed to stop monitoring processes")
+
+        # Check if there are any memory leaks and OOM killers
+        ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+            self.test_id)
+        self.assertFalse(ret,
+                         "Memory leak and OOM kills check failed on servers")
+
+        ret = self.check_for_memory_leaks_and_oom_kills_on_clients(
+            self.test_id)
+        self.assertFalse(ret,
+                         "Memory leak and OOM kills check failed on clients")
+        g.log.info("No memory leaks or OOM kills found on serves and clients")
diff --git a/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py
new file mode 100644
index 000000000..3a22a5068
--- /dev/null
+++ b/tests/functional/resource_leak/test_memory_leak_in_shd_with_cache_invalidation_on.py
@@ -0,0 +1,117 @@
+#  Copyright (C) 2020 Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along`
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.heal_ops import trigger_heal
+from glustolibs.gluster.volume_ops import set_volume_options
+from glustolibs.io.utils import (validate_io_procs, wait_for_io_to_complete)
+from glustolibs.io.memory_and_cpu_utils import (
+    wait_for_logging_processes_to_stop)
+
+
+@runs_on([['distributed-replicated', 'distributed-arbiter',
+           'distributed-dispersed', 'replicated',
+           'arbiter', 'dispersed'], ['glusterfs']])
+class TestMemoryLeakInShdWithCacheInvalidationOn(GlusterBaseClass):
+
+    def setUp(self):
+
+        self.get_super_method(self, 'setUp')()
+
+        # Set test_id for get gathering
+        self.test_id = self.id()
+
+        # Set I/O flag to false
+        self.is_io_running = False
+
+        # Creating Volume and mounting the volume
+        ret = self.setup_volume_and_mount_volume([self.mounts[0]])
+        if not ret:
+            raise ExecutionError("Volume creation or mount failed: %s"
+                                 % self.volname)
+
+    def tearDown(self):
+
+        # Wait for I/O to complete
+        if self.is_io_running:
+            if wait_for_io_to_complete(self.list_of_io_processes,
+                                       self.mounts[0]):
+                raise ExecutionError("Failed to wait for I/O to complete")
+
+        # Unmounting and cleaning volume
+        ret = self.unmount_volume_and_cleanup_volume([self.mounts[0]])
+        if not ret:
+            raise ExecutionError("Unable to delete volume %s" % self.volname)
+
+        self.get_super_method(self, 'tearDown')()
+
+    def test_memory_leak_in_shd_with_cache_invalidation_on(self):
+        """
+        Test case:
+        1. Create a volume, start it and mount it.
+        2. Set features.cache-invalidation to ON.
+        3. Start I/O from mount point.
+        4. Run gluster volume heal command in a loop
+        5. Check if there are any memory leaks and OOM killers on servers.
+        """
+        # Start monitoring resource usage on servers and clients
+        monitor_proc_dict = self.start_memory_and_cpu_usage_logging(
+            self.test_id, count=10)
+        self.assertIsNotNone(monitor_proc_dict,
+                             "Failed to start monitoring on servers and"
+                             " clients")
+
+        # Set features.cache-invalidation to ON
+        ret = set_volume_options(self.mnode, self.volname,
+                                 {'features.cache-invalidation': 'on'})
+        self.assertTrue(ret, "Failed to set features.cache-invalidation to ON")
+        g.log.info("Successfully set features.cache-invalidation to ON")
+
+        # Start multiple I/O from mount points
+        self.list_of_io_processes = []
+        cmd = ("cd {};for i in `seq 1 1000`;do echo 'abc' > myfile;done"
+               .format(self.mounts[0].mountpoint))
+        ret = g.run_async(self.mounts[0].client_system, cmd)
+        self.list_of_io_processes = [ret]
+        self.is_io_running = True
+
+        # Run gluster volume heal command in a loop for 100 iterations
+        for iteration in range(0, 100):
+            g.log.info("Running gluster volume heal command for %d time",
+                       iteration)
+            ret = trigger_heal(self.mnode, self.volname)
+            self.assertTrue(ret, "Heal command triggered successfully")
+        g.log.info("Ran gluster volume heal command in a loop for "
+                   "100 iterations.")
+
+        # Wait for I/O to complete and validate I/O on mount points
+        ret = validate_io_procs(self.list_of_io_processes, self.mounts[0])
+        self.assertTrue(ret, "I/O failed on mount point")
+        self.is_io_running = False
+
+        # Wait for monitoring processes to complete
+        ret = wait_for_logging_processes_to_stop(monitor_proc_dict,
+                                                 cluster=True)
+        self.assertTrue(ret,
+                        "ERROR: Failed to stop monitoring processes")
+
+        # Check if there are any memory leaks and OOM killers
+        ret = self.check_for_memory_leaks_and_oom_kills_on_servers(
+            self.test_id)
+        self.assertFalse(ret,
+                         "Memory leak and OOM kills check failed on servers")
author	kshithijiyer <kshithij.ki@gmail.com>	2020-10-06 09:05:44 +0530
committer	Arthy Loganathan <aloganat@redhat.com>	2020-10-21 05:21:42 +0000
commit	08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
tree	f8998f6e8304e786f2d96eefc6a82e8f1cfe67b9
parent	cd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)