summaryrefslogtreecommitdiffstats
path: root/tests/functional/gluster_stability/test_gluster_services_restart.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/functional/gluster_stability/test_gluster_services_restart.py')
-rw-r--r--tests/functional/gluster_stability/test_gluster_services_restart.py340
1 files changed, 340 insertions, 0 deletions
diff --git a/tests/functional/gluster_stability/test_gluster_services_restart.py b/tests/functional/gluster_stability/test_gluster_services_restart.py
new file mode 100644
index 00000000..bbde551f
--- /dev/null
+++ b/tests/functional/gluster_stability/test_gluster_services_restart.py
@@ -0,0 +1,340 @@
+from datetime import datetime
+import re
+import time
+from unittest import skip
+
+import ddt
+from glusto.core import Glusto as g
+
+from cnslibs.common.baseclass import BaseClass
+from cnslibs.common.heketi_ops import heketi_blockvolume_list
+from cnslibs.common.openshift_ops import (
+ get_pod_name_from_dc,
+ match_pv_and_heketi_block_volumes,
+ match_pvc_and_pv,
+ oc_create_app_dc_with_io,
+ oc_create_pvc,
+ oc_create_sc,
+ oc_create_secret,
+ oc_delete,
+ oc_get_custom_resource,
+ oc_get_yaml,
+ oc_rsh,
+ restart_service_on_gluster_pod_or_node,
+ scale_dc_pod_amount_and_wait,
+ verify_pvc_status_is_bound,
+ wait_for_pod_be_ready,
+ wait_for_resource_absence,
+ wait_for_service_status_on_gluster_pod_or_node,
+)
+from cnslibs.common.gluster_ops import (
+ get_block_hosting_volume_name,
+ get_gluster_vol_hosting_nodes,
+ match_heketi_and_gluster_block_volumes_by_prefix,
+ restart_file_volume,
+ restart_gluster_vol_brick_processes,
+ wait_to_heal_complete,
+)
+from cnslibs.common import utils
+
+
+HEKETI_BLOCK_VOLUME_REGEX = "^Id:(.*).Cluster:(.*).Name:%s_(.*)$"
+SERVICE_TARGET = "gluster-block-target"
+SERVICE_BLOCKD = "gluster-blockd"
+SERVICE_TCMU = "tcmu-runner"
+
+
+@ddt.ddt
+class GlusterStabilityTestSetup(BaseClass):
+ """class for gluster stability (restarts different servces) testcases
+ """
+
+ def setUp(self):
+ """Deploys, Verifies and adds resources required for testcases
+ in cleanup method
+ """
+ self.oc_node = self.ocp_master_node[0]
+ self.prefix = "autotest-%s" % utils.get_random_str()
+ _storage_class = self.storage_classes.get(
+ 'storage_class2',
+ self.storage_classes.get('block_storage_class'))
+ self.provisioner = _storage_class["provisioner"]
+ self.restsecretnamespace = _storage_class["restsecretnamespace"]
+ self.restuser = _storage_class["restuser"]
+ self.resturl = _storage_class["resturl"]
+
+ # using pvc size count as 1 by default
+ self.pvcsize = 1
+
+ # using pvc count as 10 by default
+ self.pvccount = 10
+
+ # create gluster block storage class, PVC and user app pod
+ self.sc_name, self.pvc_name, self.dc_name, self.secret_name = (
+ self.deploy_resouces()
+ )
+
+ # verify storage class
+ oc_get_yaml(self.oc_node, "sc", self.sc_name)
+
+ # verify pod creation, it's state and get the pod name
+ self.pod_name = get_pod_name_from_dc(
+ self.oc_node, self.dc_name, timeout=180, wait_step=3
+ )
+ wait_for_pod_be_ready(
+ self.oc_node, self.pod_name, timeout=180, wait_step=3
+ )
+ verify_pvc_status_is_bound(self.oc_node, self.pvc_name)
+
+ # create pvc's to test
+ self.pvc_list = []
+ for pvc in range(self.pvccount):
+ test_pvc_name = oc_create_pvc(
+ self.oc_node, self.sc_name,
+ pvc_name_prefix=self.prefix, pvc_size=self.pvcsize
+ )
+ self.pvc_list.append(test_pvc_name)
+ self.addCleanup(
+ wait_for_resource_absence, self.oc_node, "pvc", test_pvc_name,
+ timeout=600, interval=10
+ )
+
+ for pvc_name in self.pvc_list:
+ self.addCleanup(oc_delete, self.oc_node, "pvc", pvc_name)
+
+ def deploy_resouces(self):
+ """Deploys required resources storage class, pvc and user app
+ with continous I/O runnig
+
+ Returns:
+ sc_name (str): deployed storage class name
+ pvc_name (str): deployed persistent volume claim name
+ dc_name (str): deployed deployment config name
+ secretname (str): created secret file name
+ """
+ secretname = oc_create_secret(
+ self.oc_node, namespace=self.restsecretnamespace,
+ data_key=self.heketi_cli_key, secret_type=self.provisioner)
+ self.addCleanup(oc_delete, self.oc_node, 'secret', secretname)
+
+ sc_name = oc_create_sc(
+ self.oc_node,
+ sc_name_prefix=self.prefix, provisioner=self.provisioner,
+ resturl=self.resturl, restuser=self.restuser,
+ restsecretnamespace=self.restsecretnamespace,
+ restsecretname=secretname, volumenameprefix=self.prefix
+ )
+ self.addCleanup(oc_delete, self.oc_node, "sc", sc_name)
+
+ pvc_name = oc_create_pvc(
+ self.oc_node, sc_name,
+ pvc_name_prefix=self.prefix, pvc_size=self.pvcsize
+ )
+ self.addCleanup(
+ wait_for_resource_absence, self.oc_node, "pvc", pvc_name,
+ timeout=120, interval=5
+ )
+ self.addCleanup(oc_delete, self.oc_node, "pvc", pvc_name)
+
+ dc_name = oc_create_app_dc_with_io(
+ self.oc_node, pvc_name, dc_name_prefix=self.prefix
+ )
+ self.addCleanup(oc_delete, self.oc_node, "dc", dc_name)
+ self.addCleanup(scale_dc_pod_amount_and_wait, self.oc_node, dc_name, 0)
+
+ return sc_name, pvc_name, dc_name, secretname
+
+ def get_block_hosting_volume_by_pvc_name(self, pvc_name):
+ """Get block hosting volume of pvc name given
+
+ Args:
+ pvc_name (str): pvc name of which host name is need
+ to be returned
+ """
+ pv_name = oc_get_custom_resource(
+ self.oc_node, 'pvc', ':.spec.volumeName', name=pvc_name
+ )[0]
+
+ block_volume = oc_get_custom_resource(
+ self.oc_node, 'pv',
+ r':.metadata.annotations."gluster\.org\/volume\-id"',
+ name=pv_name
+ )[0]
+
+ # get block hosting volume from pvc name
+ block_hosting_vol = get_block_hosting_volume_name(
+ self.heketi_client_node, self.heketi_server_url, block_volume)
+
+ return block_hosting_vol
+
+ def get_heketi_block_volumes(self):
+ """lists heketi block volumes
+
+ Returns:
+ list : list of ids of heketi block volumes
+ """
+ heketi_cmd_out = heketi_blockvolume_list(
+ self.heketi_client_node,
+ self.heketi_server_url,
+ secret=self.heketi_cli_key,
+ user=self.heketi_cli_user
+ )
+
+ self.assertTrue(heketi_cmd_out, "failed to get block volume list")
+
+ heketi_block_volume_ids = []
+ heketi_block_volume_names = []
+ for block_vol in heketi_cmd_out.split("\n"):
+ heketi_vol_match = re.search(
+ HEKETI_BLOCK_VOLUME_REGEX % self.prefix, block_vol.strip()
+ )
+ if heketi_vol_match:
+ heketi_block_volume_ids.append(
+ (heketi_vol_match.group(1)).strip()
+ )
+ heketi_block_volume_names.append(
+ (heketi_vol_match.group(3)).strip()
+ )
+
+ return (sorted(heketi_block_volume_ids), sorted(
+ heketi_block_volume_names)
+ )
+
+ def validate_volumes_and_blocks(self):
+ """Validates PVC and block volumes generated through heketi and OCS
+ """
+
+ # verify pvc status is in "Bound" for all the pvc
+ for pvc in self.pvc_list:
+ verify_pvc_status_is_bound(
+ self.oc_node, pvc, timeout=300, wait_step=10
+ )
+
+ # validate pvcs and pvs created on OCS
+ match_pvc_and_pv(self.oc_node, self.prefix)
+
+ # get list of block volumes using heketi
+ heketi_block_volume_ids, heketi_block_volume_names = (
+ self.get_heketi_block_volumes()
+ )
+
+ # validate block volumes listed by heketi and pvs
+ match_pv_and_heketi_block_volumes(
+ self.oc_node, heketi_block_volume_ids, self.prefix
+ )
+
+ # validate block volumes listed by heketi and gluster
+ match_heketi_and_gluster_block_volumes_by_prefix(
+ heketi_block_volume_names, "%s_" % self.prefix)
+
+ def get_io_time(self):
+ """Gets last io time of io pod by listing log file directory
+ /mnt on pod
+ """
+ ret, stdout, stderr = oc_rsh(
+ self.oc_node, self.pod_name, "ls -l /mnt/ | awk '{print $8}'"
+ )
+ if ret != 0:
+ err_msg = "failed to get io time for pod %s" % self.pod_name
+ g.log.error(err_msg)
+ raise AssertionError(err_msg)
+
+ get_time = None
+ try:
+ get_time = datetime.strptime(stdout.strip(), "%H:%M")
+ except Exception:
+ g.log.error("invalid time format ret %s, stout: %s, "
+ "stderr: %s" % (ret, stdout, stderr))
+ raise
+
+ return get_time
+
+ def restart_block_hosting_volume_wait_for_heal(self, block_hosting_vol):
+ """restarts block hosting volume and wait for heal to complete
+
+ Args:
+ block_hosting_vol (str): block hosting volume which need to
+ restart
+ """
+ start_io_time = self.get_io_time()
+
+ restart_file_volume(block_hosting_vol)
+
+ # Explicit wait to start ios on pvc after volume start
+ time.sleep(5)
+ resume_io_time = self.get_io_time()
+
+ self.assertGreater(resume_io_time, start_io_time, "IO has not stopped")
+
+ wait_to_heal_complete()
+
+ @ddt.data(SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET)
+ def test_restart_services_provision_volume_and_run_io(self, service):
+ """Restart gluster service then validate volumes"""
+ block_hosting_vol = self.get_block_hosting_volume_by_pvc_name(
+ self.pvc_name)
+ g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol)
+ self.assertGreater(len(g_nodes), 2)
+
+ # restarts glusterfs service
+ restart_service_on_gluster_pod_or_node(
+ self.oc_node, service, g_nodes[0])
+
+ # wait for deployed user pod to be in Running state after restarting
+ # service
+ wait_for_pod_be_ready(
+ self.oc_node, self.pod_name, timeout=60, wait_step=5)
+
+ # checks if all glusterfs services are in running state
+ for g_node in g_nodes:
+ for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET):
+ status = "exited" if service == SERVICE_TARGET else "running"
+ self.assertTrue(wait_for_service_status_on_gluster_pod_or_node(
+ self.oc_node, service, status, g_node))
+
+ # validates pvc, pv, heketi block and gluster block count after
+ # service restarts
+ self.validate_volumes_and_blocks()
+
+ @skip("Blocked by BZ-1634745, BZ-1635736, BZ-1636477")
+ def test_target_side_failures_brick_failure_on_block_hosting_volume(self):
+ """Target side failures - Brick failure on block hosting volume"""
+ # get block hosting volume from pvc name
+ block_hosting_vol = self.get_block_hosting_volume_by_pvc_name(
+ self.pvc_name)
+
+ # restarts 2 brick processes of block hosting volume
+ g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol)
+ self.assertGreater(len(g_nodes), 2)
+ restart_gluster_vol_brick_processes(
+ self.oc_node, block_hosting_vol, g_nodes[:2])
+
+ # checks if all glusterfs services are in running state
+ for g_node in g_nodes:
+ for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET):
+ status = "exited" if service == SERVICE_TARGET else "running"
+ self.assertTrue(wait_for_service_status_on_gluster_pod_or_node(
+ self.oc_node, service, status, g_node))
+
+ # validates pvc, pv, heketi block and gluster block count after
+ # service restarts
+ self.validate_volumes_and_blocks()
+
+ @skip("Blocked by BZ-1634745, BZ-1635736, BZ-1636477")
+ def test_start_stop_block_volume_service(self):
+ """Validate block hosting volume by start/stop operation
+
+ Perform stop/start operation on block hosting volume when
+ IO's and provisioning are going on
+ """
+ # get block hosting volume from pvc name
+ block_hosting_vol = self.get_block_hosting_volume_by_pvc_name(
+ self.pvc_name
+ )
+
+ # restarts one of the block hosting volume and checks heal
+ self.restart_block_hosting_volume_wait_for_heal(block_hosting_vol)
+
+ # validates pvc, pv, heketi block and gluster block count after
+ # service restarts
+ self.validate_volumes_and_blocks()