From 23de3876a1a3024b17ef43c769d63d9d810d571b Mon Sep 17 00:00:00 2001
From: kshithijiyer <kshithij.ki@gmail.com>
Date: Tue, 12 Jan 2021 11:04:33 +0530
Subject: [Test] Add test to check self heal with expand volume

Test case:
1. Created a 2X3 volume.
2. Mount the volume using FUSE and give 777 permissions to the mount.
3. Added a new user.
4. Login as new user and created 100 files from the new user:
   for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done
5. Kill a brick which is part of the volume.
6. On the mount, login as root user and create 1000 files:
   for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done
7. On the mount, login as new user, and copy existing data to
   the mount.
8. Start volume using force.
9. While heal is in progress, add-brick and start rebalance.
10. Wait for rebalance and heal to complete.
11. Check for MSGID: 108008 errors in rebalance logs.

Refernce BZ: #1821599
Change-Id: I0782d4b6e44782fd612d4f2ced248c3737132855
Signed-off-by: kshithijiyer <kshithij.ki@gmail.com>
---
 .../afr/test_self_heal_with_expand_volume.py       | 221 +++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 tests/functional/afr/test_self_heal_with_expand_volume.py

diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py
new file mode 100644
index 000000000..d5b6d5d43
--- /dev/null
+++ b/tests/functional/afr/test_self_heal_with_expand_volume.py
@@ -0,0 +1,221 @@
+#  Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License along`
+#  with this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from random import choice
+
+from glusto.core import Glusto as g
+from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on
+from glustolibs.gluster.exceptions import ExecutionError
+from glustolibs.gluster.brick_libs import (bring_bricks_offline,
+                                           bring_bricks_online,
+                                           are_bricks_offline,
+                                           are_bricks_online, get_all_bricks)
+from glustolibs.gluster.glusterfile import (set_file_permissions,
+                                            occurences_of_pattern_in_file)
+from glustolibs.gluster.heal_libs import (monitor_heal_completion,
+                                          is_heal_complete)
+from glustolibs.gluster.rebalance_ops import (
+    rebalance_start, wait_for_rebalance_to_complete)
+from glustolibs.gluster.lib_utils import (add_user, del_user)
+from glustolibs.gluster.volume_libs import (get_subvols, expand_volume)
+
+
+@runs_on([['distributed-replicated'], ['glusterfs']])
+class TestHealWithExpandVolume(GlusterBaseClass):
+
+    def setUp(self):
+
+        self.get_super_method(self, 'setUp')()
+
+        self.first_client = self.mounts[0].client_system
+        self.mountpoint = self.mounts[0].mountpoint
+
+        # Create non-root users
+        self.users = ('qa_user', 'qa_admin')
+        for user in self.users:
+            if not add_user(self.first_client, user):
+                raise ExecutionError("Failed to create non-root user {}"
+                                     .format(user))
+        g.log.info("Successfully created non-root users")
+
+        # Setup Volume
+        if not self.setup_volume_and_mount_volume([self.mounts[0]]):
+            raise ExecutionError("Failed to setup and mount volume")
+
+    def tearDown(self):
+
+        # Delete non-root users
+        for user in self.users:
+            del_user(self.first_client, user)
+            ret, _, _ = g.run(self.first_client,
+                              "rm -rf /home/{}".format(user))
+            if ret:
+                raise ExecutionError("Failed to remove home dir of "
+                                     "non-root user")
+        g.log.info("Successfully deleted all users")
+
+        if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]):
+            raise ExecutionError("Failed to cleanup Volume")
+
+        # Calling GlusterBaseClass tearDown
+        self.get_super_method(self, 'tearDown')()
+
+    def _bring_bricks_offline(self):
+        """Brings bricks offline and confirms if they are offline"""
+        # Select bricks to bring offline from a replica set
+        subvols_dict = get_subvols(self.mnode, self.volname)
+        subvols = subvols_dict['volume_subvols']
+        self.bricks_to_bring_offline = []
+        self.bricks_to_bring_offline.append(choice(subvols[0]))
+
+        # Bring bricks offline
+        ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline)
+        self.assertTrue(ret, 'Failed to bring bricks %s offline' %
+                        self.bricks_to_bring_offline)
+
+        ret = are_bricks_offline(self.mnode, self.volname,
+                                 self.bricks_to_bring_offline)
+        self.assertTrue(ret, 'Bricks %s are not offline'
+                        % self.bricks_to_bring_offline)
+        g.log.info('Bringing bricks %s offline is successful',
+                   self.bricks_to_bring_offline)
+
+    def _restart_volume_and_bring_all_offline_bricks_online(self):
+        """Restart volume and bring all offline bricks online"""
+        ret = bring_bricks_online(self.mnode, self.volname,
+                                  self.bricks_to_bring_offline,
+                                  bring_bricks_online_methods=[
+                                      'volume_start_force'])
+        self.assertTrue(ret, 'Failed to bring bricks %s online' %
+                        self.bricks_to_bring_offline)
+
+        # Check if bricks are back online or not
+        ret = are_bricks_online(self.mnode, self.volname,
+                                self.bricks_to_bring_offline)
+        self.assertTrue(ret, 'Bricks not online %s even after restart' %
+                        self.bricks_to_bring_offline)
+
+        g.log.info('Bringing bricks %s online is successful',
+                   self.bricks_to_bring_offline)
+
+    def _wait_for_heal_to_completed(self):
+        """Check if heal is completed"""
+        ret = monitor_heal_completion(self.mnode, self.volname,
+                                      timeout_period=3600)
+        self.assertTrue(ret, 'Heal has not yet completed')
+
+    def _check_if_there_are_files_to_be_healed(self):
+        """Check if there are files and dirs to be healed"""
+        ret = is_heal_complete(self.mnode, self.volname)
+        self.assertFalse(ret, 'Heal is completed')
+        g.log.info('Heal is pending')
+
+    def _expand_volume_and_wait_for_rebalance_to_complete(self):
+        """Expand volume and wait for rebalance to complete"""
+        # Add brick to volume
+        ret = expand_volume(self.mnode, self.volname, self.servers,
+                            self.all_servers_info)
+        self.assertTrue(ret, "Failed to add brick on volume %s"
+                        % self.volname)
+
+        # Trigger rebalance and wait for it to complete
+        ret, _, _ = rebalance_start(self.mnode, self.volname,
+                                    force=True)
+        self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s"
+                         % self.volname)
+
+        # Wait for rebalance to complete
+        ret = wait_for_rebalance_to_complete(self.mnode, self.volname,
+                                             timeout=6000)
+        self.assertTrue(ret, "Rebalance is not yet complete on the volume "
+                             "%s" % self.volname)
+        g.log.info("Rebalance successfully completed")
+
+    def test_self_heal_and_add_brick_with_data_from_diff_users(self):
+        """
+        Test case:
+        1. Created a 2X3 volume.
+        2. Mount the volume using FUSE and give 777 permissions to the mount.
+        3. Added a new user.
+        4. Login as new user and created 100 files from the new user:
+           for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done
+        5. Kill a brick which is part of the volume.
+        6. On the mount, login as root user and create 1000 files:
+           for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done
+        7. On the mount, login as new user, and copy existing data to
+           the mount.
+        8. Start volume using force.
+        9. While heal is in progress, add-brick and start rebalance.
+        10. Wait for rebalance and heal to complete,
+        11. Check for MSGID: 108008 errors in rebalance logs.
+        """
+        # Change permissions of mount point to 777
+        ret = set_file_permissions(self.first_client, self.mountpoint,
+                                   '-R 777')
+        self.assertTrue(ret, "Unable to change mount point permissions")
+        g.log.info("Mount point permissions set to 777")
+
+        # Create 100 files from non-root user
+        cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom "
+               "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0],
+                                                            self.mountpoint))
+        ret, _, _ = g.run(self.first_client, cmd)
+        self.assertFalse(ret, "Failed to create files from non-root user")
+
+        # Kill one brick which is part of the volume
+        self._bring_bricks_offline()
+
+        # Create 1000 files from root user
+        cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i"
+               " bs=10M count=1;done" % self.mountpoint)
+        ret, _, _ = g.run(self.first_client, cmd)
+        self.assertFalse(ret, "Failed to creare files from root user")
+
+        # On the mount, login as new user, and copy existing data to
+        # the mount
+        cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/"
+               "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;"
+               "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint))
+        ret, _, _ = g.run(self.first_client, cmd)
+        self.assertFalse(ret, "Failed to copy files from non-root user")
+
+        # Check if there are files to be healed
+        self._check_if_there_are_files_to_be_healed()
+
+        # Start the vol using force
+        self._restart_volume_and_bring_all_offline_bricks_online()
+
+        # Add bricks to volume and wait for heal to complete
+        self._expand_volume_and_wait_for_rebalance_to_complete()
+
+        # Wait for heal to complete
+        self._wait_for_heal_to_completed()
+
+        # Check for MSGID: 108008 errors in rebalance logs
+        particiapting_nodes = []
+        for brick in get_all_bricks(self.mnode, self.volname):
+            node, _ = brick.split(':')
+            particiapting_nodes.append(node)
+
+        for server in particiapting_nodes:
+            ret = occurences_of_pattern_in_file(
+                server, "MSGID: 108008",
+                "/var/log/glusterfs/{}-rebalance.log".format(self.volname))
+            self.assertEqual(ret, 0,
+                             "[Input/output error] present in rebalance log"
+                             " file")
+        g.log.info("Expanding volume successful and no MSGID: 108008 "
+                   "errors see in rebalance logs")
-- 
cgit