From f253c71feecfe0968ef96cd41855920468ea08ed Mon Sep 17 00:00:00 2001
From: Shwetha Panduranga <spandura@redhat.com>
Date: Mon, 20 Mar 2017 19:30:22 +0530
Subject: Adding sanity heal tests when IO in progress.

1) test heal with replace-brick when io in progress
2) test heal when bricks goes offline and comes back online when io in progress.

Change-Id: Id9002c465aec8617217a12fa36846cdc1f61d7a4
Signed-off-by: Shwetha Panduranga <spandura@redhat.com>
Signed-off-by: ShwethaHP <spandura@redhat.com>
---
 tests/functional/bvt/test_cvt.py | 185 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 183 insertions(+), 2 deletions(-)

(limited to 'tests/functional/bvt')

diff --git a/tests/functional/bvt/test_cvt.py b/tests/functional/bvt/test_cvt.py
index d56fb100b..b367004ea 100644
--- a/tests/functional/bvt/test_cvt.py
+++ b/tests/functional/bvt/test_cvt.py
@@ -26,9 +26,9 @@
             - enable quota
             - collecting snapshot
             - remove-brick
-        TODO:
             - n/w failure followed by heal
             - replace-brick
+        TODO:
             - attach-tier, detach-tier
 """
 import time
@@ -40,10 +40,16 @@ from glustolibs.gluster.volume_libs import enable_and_validate_volume_options
 from glustolibs.gluster.volume_libs import (
     verify_all_process_of_volume_are_online)
 from glustolibs.gluster.volume_libs import (log_volume_info_and_status,
-                                            expand_volume, shrink_volume)
+                                            expand_volume, shrink_volume,
+                                            replace_brick_from_volume)
 from glustolibs.gluster.rebalance_ops import (rebalance_start,
                                               wait_for_rebalance_to_complete,
                                               rebalance_status)
+from glustolibs.gluster.brick_libs import (select_bricks_to_bring_offline,
+                                           bring_bricks_offline,
+                                           bring_bricks_online,
+                                           are_bricks_offline)
+from glustolibs.gluster.heal_libs import monitor_heal_completion
 from glustolibs.gluster.quota_ops import (enable_quota, disable_quota,
                                           set_quota_limit_usage,
                                           is_quota_enabled,
@@ -562,3 +568,178 @@ class TestSnapshotSanity(GlusterBasicFeaturesSanityBaseClass):
         ret = list_all_files_and_dirs_mounts(self.mounts)
         self.assertTrue(ret, "Failed to list all files and dirs")
         g.log.info("Listing all files and directories is successful")
+
+
+@runs_on([['replicated', 'distributed-replicated'],
+          ['glusterfs', 'nfs', 'cifs']])
+class TestGlusterReplaceBrickSanity(GlusterBasicFeaturesSanityBaseClass):
+    """Sanity tests for Replacing faulty bricks"""
+    @pytest.mark.bvt_cvt
+    def test_replace_brick_when_io_in_progress(self):
+        """Test replacing brick using existing servers bricks when IO is
+            in progress.
+
+        Description:
+            - replace_brick
+            - wait for heal to complete
+            - validate IO
+        """
+        # Log Volume Info and Status before replacing brick from the volume.
+        g.log.info("Logging volume info and Status before replacing brick "
+                   "from the volume %s", self.volname)
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status of volume %s",
+                   self.volname)
+
+        # Replace brick from a sub-volume
+        g.log.info("Replace a faulty brick from the volume")
+        ret = replace_brick_from_volume(self.mnode, self.volname,
+                                        self.servers, self.all_servers_info)
+        self.assertTrue(ret, "Failed to replace faulty brick from the volume")
+        g.log.info("Successfully replaced faulty brick from the volume")
+
+        # Wait for gluster processes to come online
+        time.sleep(30)
+
+        # Log Volume Info and Status after replacing the brick
+        g.log.info("Logging volume info and Status after replacing brick "
+                   "from the volume %s", self.volname)
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status of volume %s",
+                   self.volname)
+
+        # Verify volume's all process are online
+        g.log.info("Verifying volume's all process are online")
+        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+        self.assertTrue(ret, ("Volume %s : All process are not online",
+                              self.volname))
+        g.log.info("Volume %s : All process are online", self.volname)
+
+        # Wait for self-heal to complete
+        g.log.info("Wait for self-heal to complete")
+        ret = monitor_heal_completion(self.mnode, self.volname)
+        self.assertTrue(ret, "Self heal didn't complete even after waiting "
+                        "for 20 minutes. 20 minutes is too much a time for "
+                        "current test workload")
+        g.log.info("self-heal is successful after replace-brick operation")
+
+        # Validate IO
+        g.log.info("Wait for IO to complete and validate IO ...")
+        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+        self.io_validation_complete = True
+        self.assertTrue(ret, "IO failed on some of the clients")
+        g.log.info("IO is successful on all mounts")
+
+        # List all files and dirs created
+        g.log.info("List all files and directories:")
+        ret = list_all_files_and_dirs_mounts(self.mounts)
+        self.assertTrue(ret, "Failed to list all files and dirs")
+        g.log.info("Listing all files and directories is successful")
+
+
+@runs_on([['replicated', 'distributed-replicated', 'dispersed',
+           'distributed-dispersed'],
+          ['glusterfs', 'nfs', 'cifs']])
+class TestGlusterHealSanity(GlusterBasicFeaturesSanityBaseClass):
+    """Sanity tests for SelfHeal"""
+    @pytest.mark.bvt_cvt
+    def test_self_heal_when_io_in_progress(self):
+        """Test self-heal is successful when IO is in progress.
+
+        Description:
+            - simulate brick down.
+            - bring bricks online
+            - wait for heal to complete
+            - validate IO
+        """
+        # Log Volume Info and Status before simulating brick failure
+        g.log.info("Logging volume info and Status before bringing bricks "
+                   "offlien from the volume %s", self.volname)
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status of volume %s",
+                   self.volname)
+
+        # Select bricks to bring offline
+        bricks_to_bring_offline_dict = (select_bricks_to_bring_offline(
+            self.mnode, self.volname))
+        bricks_to_bring_offline = filter(None, (
+            bricks_to_bring_offline_dict['hot_tier_bricks'] +
+            bricks_to_bring_offline_dict['cold_tier_bricks'] +
+            bricks_to_bring_offline_dict['volume_bricks']))
+
+        # Bring bricks offline
+        g.log.info("Bringing bricks: %s offline", bricks_to_bring_offline)
+        ret = bring_bricks_offline(self.volname, bricks_to_bring_offline)
+        self.assertTrue(ret, ("Failed to bring bricks: %s offline",
+                              bricks_to_bring_offline))
+        g.log.info("Successful in bringing bricks: %s offline",
+                   bricks_to_bring_offline)
+
+        # Wait for gluster processes to be offline
+        time.sleep(10)
+
+        # Validate if bricks are offline
+        g.log.info("Validating if bricks: %s are offline",
+                   bricks_to_bring_offline)
+        ret = are_bricks_offline(self.mnode, self.volname,
+                                 bricks_to_bring_offline)
+        self.assertTrue(ret, "Not all the bricks in list:%s are offline")
+        g.log.info("Successfully validated that bricks: %s are all offline")
+
+        # Log Volume Info and Status
+        g.log.info("Logging volume info and Status after bringing bricks "
+                   "offline from the volume %s", self.volname)
+        ret = log_volume_info_and_status(self.mnode, self.volname)
+        self.assertTrue(ret, ("Logging volume info and status failed on "
+                              "volume %s", self.volname))
+        g.log.info("Successful in logging volume info and status of volume %s",
+                   self.volname)
+
+        # Add delay before bringing bricks online
+        time.sleep(40)
+
+        # Bring bricks online
+        g.log.info("Bring bricks: %s online", bricks_to_bring_offline)
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  bricks_to_bring_offline)
+        self.assertTrue(ret, ("Failed to bring bricks: %s online",
+                              bricks_to_bring_offline))
+        g.log.info("Successfully brought all bricks:%s online",
+                   bricks_to_bring_offline)
+
+        # Wait for gluster processes to be online
+        time.sleep(10)
+
+        # Verify volume's all process are online
+        g.log.info("Verifying volume's all process are online")
+        ret = verify_all_process_of_volume_are_online(self.mnode, self.volname)
+        self.assertTrue(ret, ("Volume %s : All process are not online",
+                              self.volname))
+        g.log.info("Volume %s : All process are online", self.volname)
+
+        # Wait for self-heal to complete
+        g.log.info("Wait for self-heal to complete")
+        ret = monitor_heal_completion(self.mnode, self.volname)
+        self.assertTrue(ret, "Self heal didn't complete even after waiting "
+                        "for 20 minutes. 20 minutes is too much a time for "
+                        "current test workload")
+        g.log.info("self-heal is successful after replace-brick operation")
+
+        # Validate IO
+        g.log.info("Wait for IO to complete and validate IO ...")
+        ret = validate_io_procs(self.all_mounts_procs, self.mounts)
+        self.io_validation_complete = True
+        self.assertTrue(ret, "IO failed on some of the clients")
+        g.log.info("IO is successful on all mounts")
+
+        # List all files and dirs created
+        g.log.info("List all files and directories:")
+        ret = list_all_files_and_dirs_mounts(self.mounts)
+        self.assertTrue(ret, "Failed to list all files and dirs")
+        g.log.info("Listing all files and directories is successful")
-- 
cgit