From 23de3876a1a3024b17ef43c769d63d9d810d571b Mon Sep 17 00:00:00 2001 From: kshithijiyer Date: Tue, 12 Jan 2021 11:04:33 +0530 Subject: [Test] Add test to check self heal with expand volume Test case: 1. Created a 2X3 volume. 2. Mount the volume using FUSE and give 777 permissions to the mount. 3. Added a new user. 4. Login as new user and created 100 files from the new user: for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done 5. Kill a brick which is part of the volume. 6. On the mount, login as root user and create 1000 files: for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done 7. On the mount, login as new user, and copy existing data to the mount. 8. Start volume using force. 9. While heal is in progress, add-brick and start rebalance. 10. Wait for rebalance and heal to complete. 11. Check for MSGID: 108008 errors in rebalance logs. Refernce BZ: #1821599 Change-Id: I0782d4b6e44782fd612d4f2ced248c3737132855 Signed-off-by: kshithijiyer --- .../afr/test_self_heal_with_expand_volume.py | 221 +++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 tests/functional/afr/test_self_heal_with_expand_volume.py diff --git a/tests/functional/afr/test_self_heal_with_expand_volume.py b/tests/functional/afr/test_self_heal_with_expand_volume.py new file mode 100644 index 000000000..d5b6d5d43 --- /dev/null +++ b/tests/functional/afr/test_self_heal_with_expand_volume.py @@ -0,0 +1,221 @@ +# Copyright (C) 2021 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along` +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from random import choice + +from glusto.core import Glusto as g +from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on +from glustolibs.gluster.exceptions import ExecutionError +from glustolibs.gluster.brick_libs import (bring_bricks_offline, + bring_bricks_online, + are_bricks_offline, + are_bricks_online, get_all_bricks) +from glustolibs.gluster.glusterfile import (set_file_permissions, + occurences_of_pattern_in_file) +from glustolibs.gluster.heal_libs import (monitor_heal_completion, + is_heal_complete) +from glustolibs.gluster.rebalance_ops import ( + rebalance_start, wait_for_rebalance_to_complete) +from glustolibs.gluster.lib_utils import (add_user, del_user) +from glustolibs.gluster.volume_libs import (get_subvols, expand_volume) + + +@runs_on([['distributed-replicated'], ['glusterfs']]) +class TestHealWithExpandVolume(GlusterBaseClass): + + def setUp(self): + + self.get_super_method(self, 'setUp')() + + self.first_client = self.mounts[0].client_system + self.mountpoint = self.mounts[0].mountpoint + + # Create non-root users + self.users = ('qa_user', 'qa_admin') + for user in self.users: + if not add_user(self.first_client, user): + raise ExecutionError("Failed to create non-root user {}" + .format(user)) + g.log.info("Successfully created non-root users") + + # Setup Volume + if not self.setup_volume_and_mount_volume([self.mounts[0]]): + raise ExecutionError("Failed to setup and mount volume") + + def tearDown(self): + + # Delete non-root users + for user in self.users: + del_user(self.first_client, user) + ret, _, _ = g.run(self.first_client, + "rm -rf /home/{}".format(user)) + if ret: + raise ExecutionError("Failed to remove home dir of " + "non-root user") + g.log.info("Successfully deleted all users") + + if not self.unmount_volume_and_cleanup_volume([self.mounts[0]]): + raise ExecutionError("Failed to cleanup Volume") + + # Calling GlusterBaseClass tearDown + self.get_super_method(self, 'tearDown')() + + def _bring_bricks_offline(self): + """Brings bricks offline and confirms if they are offline""" + # Select bricks to bring offline from a replica set + subvols_dict = get_subvols(self.mnode, self.volname) + subvols = subvols_dict['volume_subvols'] + self.bricks_to_bring_offline = [] + self.bricks_to_bring_offline.append(choice(subvols[0])) + + # Bring bricks offline + ret = bring_bricks_offline(self.volname, self.bricks_to_bring_offline) + self.assertTrue(ret, 'Failed to bring bricks %s offline' % + self.bricks_to_bring_offline) + + ret = are_bricks_offline(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks %s are not offline' + % self.bricks_to_bring_offline) + g.log.info('Bringing bricks %s offline is successful', + self.bricks_to_bring_offline) + + def _restart_volume_and_bring_all_offline_bricks_online(self): + """Restart volume and bring all offline bricks online""" + ret = bring_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline, + bring_bricks_online_methods=[ + 'volume_start_force']) + self.assertTrue(ret, 'Failed to bring bricks %s online' % + self.bricks_to_bring_offline) + + # Check if bricks are back online or not + ret = are_bricks_online(self.mnode, self.volname, + self.bricks_to_bring_offline) + self.assertTrue(ret, 'Bricks not online %s even after restart' % + self.bricks_to_bring_offline) + + g.log.info('Bringing bricks %s online is successful', + self.bricks_to_bring_offline) + + def _wait_for_heal_to_completed(self): + """Check if heal is completed""" + ret = monitor_heal_completion(self.mnode, self.volname, + timeout_period=3600) + self.assertTrue(ret, 'Heal has not yet completed') + + def _check_if_there_are_files_to_be_healed(self): + """Check if there are files and dirs to be healed""" + ret = is_heal_complete(self.mnode, self.volname) + self.assertFalse(ret, 'Heal is completed') + g.log.info('Heal is pending') + + def _expand_volume_and_wait_for_rebalance_to_complete(self): + """Expand volume and wait for rebalance to complete""" + # Add brick to volume + ret = expand_volume(self.mnode, self.volname, self.servers, + self.all_servers_info) + self.assertTrue(ret, "Failed to add brick on volume %s" + % self.volname) + + # Trigger rebalance and wait for it to complete + ret, _, _ = rebalance_start(self.mnode, self.volname, + force=True) + self.assertEqual(ret, 0, "Failed to start rebalance on the volume %s" + % self.volname) + + # Wait for rebalance to complete + ret = wait_for_rebalance_to_complete(self.mnode, self.volname, + timeout=6000) + self.assertTrue(ret, "Rebalance is not yet complete on the volume " + "%s" % self.volname) + g.log.info("Rebalance successfully completed") + + def test_self_heal_and_add_brick_with_data_from_diff_users(self): + """ + Test case: + 1. Created a 2X3 volume. + 2. Mount the volume using FUSE and give 777 permissions to the mount. + 3. Added a new user. + 4. Login as new user and created 100 files from the new user: + for i in {1..100};do dd if=/dev/urandom of=$i bs=1024 count=1;done + 5. Kill a brick which is part of the volume. + 6. On the mount, login as root user and create 1000 files: + for i in {1..1000};do dd if=/dev/urandom of=f$i bs=10M count=1;done + 7. On the mount, login as new user, and copy existing data to + the mount. + 8. Start volume using force. + 9. While heal is in progress, add-brick and start rebalance. + 10. Wait for rebalance and heal to complete, + 11. Check for MSGID: 108008 errors in rebalance logs. + """ + # Change permissions of mount point to 777 + ret = set_file_permissions(self.first_client, self.mountpoint, + '-R 777') + self.assertTrue(ret, "Unable to change mount point permissions") + g.log.info("Mount point permissions set to 777") + + # Create 100 files from non-root user + cmd = ("su -l %s -c 'cd %s; for i in {1..100};do dd if=/dev/urandom " + "of=nonrootfile$i bs=1024 count=1; done'" % (self.users[0], + self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to create files from non-root user") + + # Kill one brick which is part of the volume + self._bring_bricks_offline() + + # Create 1000 files from root user + cmd = ("cd %s; for i in {1..1000};do dd if=/dev/urandom of=rootfile$i" + " bs=10M count=1;done" % self.mountpoint) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to creare files from root user") + + # On the mount, login as new user, and copy existing data to + # the mount + cmd = ("su -l %s -c 'wget https://cdn.kernel.org/pub/linux/kernel/" + "v5.x/linux-5.4.54.tar.xz; tar -xvf linux-5.4.54.tar.xz;" + "cd %s; cp -r ~/ .;'" % (self.users[1], self.mountpoint)) + ret, _, _ = g.run(self.first_client, cmd) + self.assertFalse(ret, "Failed to copy files from non-root user") + + # Check if there are files to be healed + self._check_if_there_are_files_to_be_healed() + + # Start the vol using force + self._restart_volume_and_bring_all_offline_bricks_online() + + # Add bricks to volume and wait for heal to complete + self._expand_volume_and_wait_for_rebalance_to_complete() + + # Wait for heal to complete + self._wait_for_heal_to_completed() + + # Check for MSGID: 108008 errors in rebalance logs + particiapting_nodes = [] + for brick in get_all_bricks(self.mnode, self.volname): + node, _ = brick.split(':') + particiapting_nodes.append(node) + + for server in particiapting_nodes: + ret = occurences_of_pattern_in_file( + server, "MSGID: 108008", + "/var/log/glusterfs/{}-rebalance.log".format(self.volname)) + self.assertEqual(ret, 0, + "[Input/output error] present in rebalance log" + " file") + g.log.info("Expanding volume successful and no MSGID: 108008 " + "errors see in rebalance logs") -- cgit