[Test] Add 2 memory leak tests and fix library issues

Scenarios added: ---------------- Test case: 1. Create a volume, start it and mount it. 2. Start I/O from mount point. 3. Check if there are any memory leaks and OOM killers. Test case: 1. Create a volume, start it and mount it. 2. Set features.cache-invalidation to ON. 3. Start I/O from mount point. 4. Run gluster volume heal command in a loop 5. Check if there are any memory leaks and OOM killers on servers. Design change: -------------- - self.id() is moved into test class as it was hitting bound errors in the original logic. - Logic changed for checking leaks fuse. - Fixed breakage in methods where ever needed. Change-Id: Icb600d833d0c08636b6002abb489342ea1f946d7 Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
author: kshithijiyer <kshithij.ki@gmail.com> 2020-10-06 09:05:44 +0530
committer: Arthy Loganathan <aloganat@redhat.com> 2020-10-21 05:21:42 +0000
commit: 08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
tree: f8998f6e8304e786f2d96eefc6a82e8f1cfe67b9 /glustolibs-io
parent: cd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)
1 files changed, 52 insertions, 27 deletions
diff --git a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
index 3d105bf5e..4e1dadbd7 100644
--- a/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
+++ b/glustolibs-io/glustolibs/io/memory_and_cpu_utils.py
@@ -363,7 +363,7 @@ def compute_data_usage_stats_on_servers(nodes, test_name):
 
             # Generate a dataframe from the csv file
             dataframe = create_dataframe_from_csv(node, process, test_name)
-            if not dataframe:
+            if dataframe.empty:
                 return {}
 
             data_dict[node][process] = {}
@@ -424,7 +424,7 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
     for node in nodes:
         data_dict[node] = {}
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return {}
 
         data_dict[node]['glusterfs'] = {}
@@ -436,7 +436,8 @@ def compute_data_usage_stats_on_clients(nodes, test_name):
 
 def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
                                                volume_status=None,
-                                               volume=None):
+                                               volume=None,
+                                               vol_name=None):
     """Perform three point check
 
     Args:
@@ -448,14 +449,16 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
     kwargs:
      volume_status(dict): Volume status output on the give name
      volumne(str):Name of volume for which 3 point check has to be done
+     vol_name(str): Name of volume process according to volume status
 
     Returns:
      bool: True if memory leak instances are observed else False
     """
     # Filter dataframe to be process wise if it's volume specific process
     if process in ('glusterfs', 'glusterfsd'):
-        pid = int(volume_status[volume][node][process]['pid'])
-        dataframe = dataframe[dataframe['Process ID'] == pid]
+        if process == 'glusterfs' and vol_name:
+            pid = int(volume_status[volume][node][vol_name]['pid'])
+            dataframe = dataframe[dataframe['Process ID'] == pid]
 
     # Compute usage gain throught the data frame
     memory_increments = list(dataframe['Memory Usage'].diff().dropna())
@@ -476,12 +479,12 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
             try:
                 # Check if memory gain had decrease in the consecutive
                 # entries, after 2 entry and betwen current and last entry
-                if all(memory_increments[instance+1] >
+                if all([memory_increments[instance+1] >
                        memory_increments[instance],
                        memory_increments[instance+2] >
                        memory_increments[instance],
                        (memory_increments[len(memory_increments)-1] >
-                        memory_increments[instance])):
+                        memory_increments[instance])]):
                     return True
 
             except IndexError:
@@ -490,7 +493,7 @@ def _perform_three_point_check_for_memory_leak(dataframe, node, process, gain,
                 g.log.info('Instance at last log entry.')
                 if process in ('glusterfs', 'glusterfsd'):
                     cmd = ("ps u -p %s | awk 'NR>1 && $11~/%s$/{print "
-                           "$6/1024}'" % (pid, process))
+                           " $6/1024}'" % (pid, process))
                 else:
                     cmd = ("ps u -p `pgrep glusterd` | awk 'NR>1 && $11~/"
                            "glusterd$/{print $6/1024}'")
@@ -526,7 +529,7 @@ def check_for_memory_leaks_in_glusterd(nodes, test_name, gain=30.0):
     is_there_a_leak = []
     for node in nodes:
         dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call 3 point check function
@@ -562,7 +565,7 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -573,7 +576,8 @@ def check_for_memory_leaks_in_glusterfs(nodes, test_name, gain=30.0):
 
                 # Call 3 point check function
                 three_point_check = _perform_three_point_check_for_memory_leak(
-                    dataframe, node, 'glusterfs', gain, volume_status, volume)
+                    dataframe, node, 'glusterfs', gain, volume_status, volume,
+                    'Self-heal Daemon')
                 if three_point_check:
                     g.log.error("Memory leak observed on node %s in shd "
                                 "on volume %s", node, volume)
@@ -604,7 +608,7 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -615,7 +619,8 @@ def check_for_memory_leaks_in_glusterfsd(nodes, test_name, gain=30.0):
 
                 # Call 3 point check function
                 three_point_check = _perform_three_point_check_for_memory_leak(
-                    dataframe, node, 'glusterfsd', gain, volume_status, volume)
+                    dataframe, node, 'glusterfsd', gain, volume_status, volume,
+                    process)
                 if three_point_check:
                     g.log.error("Memory leak observed on node %s in brick "
                                 " process for brick %s on volume %s", node,
@@ -637,7 +642,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
                   (Default:30)
 
     Returns:
-      bool: True if memory leak was obsevred else False
+      bool: True if memory leak was observed else False
 
     NOTE:
      This function should be executed when the volume is still mounted.
@@ -646,7 +651,7 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
     for node in nodes:
         # Get the volume status on the node
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call 3 point check function
@@ -655,7 +660,25 @@ def check_for_memory_leaks_in_glusterfs_fuse(nodes, test_name, gain=30.0):
         if three_point_check:
             g.log.error("Memory leak observed on node %s for client",
                         node)
-        is_there_a_leak.append(three_point_check)
+
+            # If I/O is constantly running on Clients the memory
+            # usage spikes up and stays at a point for long.
+            last_entry = dataframe['Memory Usage'].iloc[-1]
+            cmd = ("ps u -p `pidof glusterfs` | "
+                   "awk 'NR>1 && $11~/glusterfs$/{print"
+                   " $6/1024}'")
+            ret, out, _ = g.run(node, cmd)
+            if ret:
+                g.log.error('Unable to run the command to fetch current '
+                            'memory utilization.')
+                continue
+
+            if float(out) != last_entry:
+                if float(out) > last_entry:
+                    is_there_a_leak.append(True)
+                    continue
+
+        is_there_a_leak.append(False)
 
     return any(is_there_a_leak)
 
@@ -671,9 +694,9 @@ def _check_for_oom_killers(nodes, process, oom_killer_list):
     """
     cmd = ("grep -i 'killed process' /var/log/messages* "
            "| grep -w '{}'".format(process))
-    ret = g.run_parallel(nodes, cmd)
-    for key in ret.keys():
-        ret, out, _ = ret[key]
+    ret_codes = g.run_parallel(nodes, cmd)
+    for key in ret_codes.keys():
+        ret, out, _ = ret_codes[key]
         if not ret:
             g.log.error('OOM killer observed on %s for %s', key, process)
             g.log.error(out)
@@ -712,7 +735,8 @@ def check_for_oom_killers_on_clients(nodes):
 
 
 def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
-                                volume_status=None, volume=None):
+                                volume_status=None, volume=None,
+                                vol_name=None):
     """Check for cpu spikes for a given process
 
     Args:
@@ -724,13 +748,14 @@ def _check_for_cpu_usage_spikes(dataframe, node, process, threshold,
     kwargs:
      volume_status(dict): Volume status output on the give name
      volume(str):Name of volume for which check has to be done
+     vol_name(str): Name of volume process according to volume status
 
     Returns:
      bool: True if number of instances more than threshold else False
     """
     # Filter dataframe to be process wise if it's volume specific process
     if process in ('glusterfs', 'glusterfsd'):
-        pid = int(volume_status[volume][node][process]['pid'])
+        pid = int(volume_status[volume][node][vol_name]['pid'])
         dataframe = dataframe[dataframe['Process ID'] == pid]
 
     # Check if usage is more than accepted amount of leak
@@ -758,7 +783,7 @@ def check_for_cpu_usage_spikes_on_glusterd(nodes, test_name, threshold=3):
     is_there_a_spike = []
     for node in nodes:
         dataframe = create_dataframe_from_csv(node, 'glusterd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call function to check for cpu spikes
@@ -795,7 +820,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -807,7 +832,7 @@ def check_for_cpu_usage_spikes_on_glusterfs(nodes, test_name, threshold=3):
                 # Call function to check for cpu spikes
                 cpu_spikes = _check_for_cpu_usage_spikes(
                     dataframe, node, 'glusterfs', threshold, volume_status,
-                    volume)
+                    volume, 'Self-heal Daemon')
                 if cpu_spikes:
                     g.log.error("CPU usage spikes observed more than "
                                 "threshold %d on node %s on volume %s for shd",
@@ -839,7 +864,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
         # Get the volume status on the node
         volume_status = get_volume_status(node)
         dataframe = create_dataframe_from_csv(node, 'glusterfsd', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         for volume in volume_status.keys():
@@ -851,7 +876,7 @@ def check_for_cpu_usage_spikes_on_glusterfsd(nodes, test_name, threshold=3):
                 # Call function to check for cpu spikes
                 cpu_spikes = _check_for_cpu_usage_spikes(
                     dataframe, node, 'glusterfsd', threshold, volume_status,
-                    volume)
+                    volume, process)
                 if cpu_spikes:
                     g.log.error("CPU usage spikes observed more than "
                                 "threshold %d on node %s on volume %s for "
@@ -884,7 +909,7 @@ def check_for_cpu_usage_spikes_on_glusterfs_fuse(nodes, test_name,
     for node in nodes:
         # Get the volume status on the node
         dataframe = create_dataframe_from_csv(node, 'glusterfs', test_name)
-        if not dataframe:
+        if dataframe.empty:
             return False
 
         # Call function to check for cpu spikes
author	kshithijiyer <kshithij.ki@gmail.com>	2020-10-06 09:05:44 +0530
committer	Arthy Loganathan <aloganat@redhat.com>	2020-10-21 05:21:42 +0000
commit	08faae06ab07b56b815aec5bfbfcf72d653e8055 (patch)
tree	f8998f6e8304e786f2d96eefc6a82e8f1cfe67b9 /glustolibs-io
parent	cd7bf42beaf1590baaace8abe7dac55e7fc3388c (diff)