diff options
author | Valerii Ponomarov <vponomar@redhat.com> | 2019-01-21 18:27:09 +0530 |
---|---|---|
committer | vponomar <vponomar@redhat.com> | 2019-02-08 07:33:43 +0000 |
commit | 2c5ae6e4bfd9ccb6cb7330e46cfec3f910468dad (patch) | |
tree | 6b743d3eabcd90cc85870b7f489dd64c7f284779 | |
parent | 6876a64de0913772c4004d3c0c836051f3d103f5 (diff) |
Add CRS support to the 'restart gluster services' test cases
Change-Id: I40b7ea79d1f7dbc82db825f6ced4447a157361e6
-rw-r--r-- | cns-libs/cnslibs/common/gluster_ops.py | 270 | ||||
-rw-r--r-- | cns-libs/cnslibs/common/openshift_ops.py | 87 | ||||
-rw-r--r-- | tests/functional/common/gluster_stability/test_gluster_services_restart.py | 92 | ||||
-rw-r--r-- | tests/functional/common/test_node_restart.py | 6 |
4 files changed, 205 insertions, 250 deletions
diff --git a/cns-libs/cnslibs/common/gluster_ops.py b/cns-libs/cnslibs/common/gluster_ops.py index 76b3bc7d..e740daa3 100644 --- a/cns-libs/cnslibs/common/gluster_ops.py +++ b/cns-libs/cnslibs/common/gluster_ops.py @@ -1,9 +1,9 @@ -import six import time import json import re from glusto.core import Glusto as g +from glustolibs.gluster.block_ops import block_list from glustolibs.gluster.heal_libs import is_heal_complete from glustolibs.gluster.volume_ops import ( get_volume_status, @@ -12,63 +12,27 @@ from glustolibs.gluster.volume_ops import ( volume_start, volume_stop ) -from glustolibs.gluster.block_ops import block_list + +from cnslibs.common import exceptions +from cnslibs.common.heketi_ops import heketi_blockvolume_info from cnslibs.common.openshift_ops import ( - oc_get_pods, - oc_rsh, - wait_for_process_to_kill_on_pod + cmd_run_on_gluster_pod_or_node, ) -from cnslibs.common.heketi_ops import heketi_blockvolume_info -from cnslibs.common import exceptions, podcmd +from cnslibs.common import podcmd from cnslibs.common import waiter -def _get_gluster_pod(gluster_pod, hostname=None): - """create glusto.podcmd object if gluster_pod is string and - hostname is given else returns gluster_pod object given - - Args: - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - hostname (str): master node on which gluster pod exists - """ - if isinstance(gluster_pod, podcmd.Pod): - return gluster_pod - elif isinstance(gluster_pod, six.string_types): - if hostname: - return podcmd.Pod(hostname, gluster_pod) - else: - raise exceptions.ExecutionError( - "gluster pod is string '%s' but hostname '%s' not valid" % ( - gluster_pod, hostname) - ) - else: - raise exceptions.ExecutionError( - "invalid gluster pod parameter '%s', '%s'" % ( - gluster_pod, type(gluster_pod)) - ) - - @podcmd.GlustoPod() -def wait_to_heal_complete( - gluster_pod, hostname=None, timeout=300, wait_step=5): - """Monitors heal for volumes on gluster - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - hostname (str): master node on which gluster pod exists - """ - gluster_pod = _get_gluster_pod(gluster_pod, hostname) - - gluster_vol_list = get_volume_list(gluster_pod) +def wait_to_heal_complete(timeout=300, wait_step=5): + """Monitors heal for volumes on gluster""" + gluster_vol_list = get_volume_list("auto_get_gluster_endpoint") if not gluster_vol_list: raise AssertionError("failed to get gluster volume list") _waiter = waiter.Waiter(timeout=timeout, interval=wait_step) for gluster_vol in gluster_vol_list: for w in _waiter: - if is_heal_complete(gluster_pod, gluster_vol): + if is_heal_complete("auto_get_gluster_endpoint", gluster_vol): break if w.expired: @@ -79,161 +43,170 @@ def wait_to_heal_complete( @podcmd.GlustoPod() -def get_brick_pids(gluster_pod, block_hosting_vol, hostname=None): - """gets brick pids from gluster pods +def get_gluster_vol_status(file_vol): + """Get Gluster vol hosting nodes. Args: - hostname (str): hostname on which gluster pod exists - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - block_hosting_vol (str): Block hosting volume id + file_vol (str): file volume name. """ - gluster_pod = _get_gluster_pod(gluster_pod, hostname) - - gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol) + # Get Gluster vol info + gluster_volume_status = get_volume_status( + "auto_get_gluster_endpoint", file_vol) if not gluster_volume_status: - raise AssertionError("failed to get volume status for gluster " - "volume '%s' on pod '%s'" % ( - gluster_pod, block_hosting_vol)) - - gluster_volume_status = gluster_volume_status.get(block_hosting_vol) - assert gluster_volume_status, ("gluster volume %s not present" % ( - block_hosting_vol)) - - pids = {} - for parent_key, parent_val in gluster_volume_status.items(): - for child_key, child_val in parent_val.items(): - if not child_key.startswith("/var"): - continue - - pid = child_val["pid"] - # When birck is down, pid of the brick is returned as -1. - # Which is unexepeted situation, hence raising error. - if pid == "-1": - raise AssertionError("Something went wrong brick pid is -1") - - pids[parent_key] = pid - - return pids + raise AssertionError("Failed to get volume status for gluster " + "volume '%s'" % file_vol) + if file_vol in gluster_volume_status: + gluster_volume_status = gluster_volume_status.get(file_vol) + return gluster_volume_status @podcmd.GlustoPod() -def restart_brick_process(hostname, gluster_pod, block_hosting_vol): - """restarts brick process of block hosting volumes +def get_gluster_vol_hosting_nodes(file_vol): + """Get Gluster vol hosting nodes. Args: - hostname (str): hostname on which gluster pod exists - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - block_hosting_vol (str): block hosting volume name + file_vol (str): file volume name. """ - pids = get_brick_pids(gluster_pod, block_hosting_vol, hostname) - - # using count variable to limit the max pod process kill to 2 - count = 0 - killed_process = {} - pid_keys = pids.keys() - oc_pods = oc_get_pods(hostname) - for pod in oc_pods.keys(): - if not (oc_pods[pod]["ip"] in pid_keys and count <= 1): - continue - - ret, out, err = oc_rsh( - hostname, pod, "kill -9 %s" % pids[oc_pods[pod]["ip"]] - ) - if ret != 0: - err_msg = "failed to kill process id %s error: %s" % ( - pids[oc_pods[pod]["ip"]], err) - g.log.error(err_msg) - raise AssertionError(err_msg) + vol_status = get_gluster_vol_status(file_vol) + g_nodes = [] + for g_node, g_node_data in vol_status.items(): + for process_name, process_data in g_node_data.items(): + if not process_name.startswith("/var"): + continue + g_nodes.append(g_node) + return g_nodes - killed_process[pod] = pids[oc_pods[pod]["ip"]] - count += 1 - for pod, pid in killed_process.items(): - wait_for_process_to_kill_on_pod(pod, pid, hostname) +@podcmd.GlustoPod() +def restart_gluster_vol_brick_processes(ocp_client_node, file_vol, + gluster_nodes): + """Restarts brick process of a file volume. - ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) + Args: + ocp_client_node (str): Node to execute OCP commands on. + file_vol (str): file volume name. + gluster_nodes (str/list): One or several IPv4 addresses of Gluster + nodes, where 'file_vol' brick processes must be recreated. + """ + if not isinstance(gluster_nodes, (list, set, tuple)): + gluster_nodes = [gluster_nodes] + + # Get Gluster vol brick PIDs + gluster_volume_status = get_gluster_vol_status(file_vol) + pids = () + for gluster_node in gluster_nodes: + pid = None + for g_node, g_node_data in gluster_volume_status.items(): + if g_node != gluster_node: + continue + for process_name, process_data in g_node_data.items(): + if not process_name.startswith("/var"): + continue + pid = process_data["pid"] + # When birck is down, pid of the brick is returned as -1. + # Which is unexepeted situation. So, add appropriate assertion. + assert pid != "-1", ( + "Got unexpected PID (-1) for '%s' gluster vol on '%s' " + "node." % file_vol, gluster_node) + assert pid, ("Could not find 'pid' in Gluster vol data for '%s' " + "Gluster node. Data: %s" % ( + gluster_node, gluster_volume_status)) + pids.append((gluster_node, pid)) + + # Restart Gluster vol brick processes using found PIDs + for gluster_node, pid in pids: + cmd = "kill -9 %s" % pid + cmd_run_on_gluster_pod_or_node(ocp_client_node, cmd, gluster_node) + + # Wait for Gluster vol brick processes to be recreated + for gluster_node, pid in pids: + killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'" + _waiter = waiter.Waiter(timeout=60, interval=2) + for w in _waiter: + result = cmd_run_on_gluster_pod_or_node( + ocp_client_node, killed_pid_cmd, gluster_node) + if result.strip() == pid: + continue + g.log.info("Brick process '%s' was killed successfully on '%s'" % ( + pid, gluster_node)) + break + if w.expired: + error_msg = ("Process ID '%s' still exists on '%s' after waiting " + "for it 60 seconds to get killed." % ( + pid, gluster_node)) + g.log.error(error_msg) + raise exceptions.ExecutionError(error_msg) + + # Start volume after gluster vol brick processes recreation + ret, out, err = volume_start( + "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: - err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( - block_hosting_vol, gluster_pod, err) + err_msg = "Failed to start gluster volume %s on %s. error: %s" % ( + file_vol, gluster_node, err) g.log.error(err_msg) raise AssertionError(err_msg) @podcmd.GlustoPod() -def restart_block_hosting_volume( - gluster_pod, block_hosting_vol, sleep_time=120, hostname=None): - """restars block hosting volume service +def restart_file_volume(file_vol, sleep_time=120): + """Restars file volume service. Args: - hostname (str): hostname on which gluster pod exists - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name - block_hosting_vol (str): name of block hosting volume + file_vol (str): name of a file volume """ - gluster_pod = _get_gluster_pod(gluster_pod, hostname) - - gluster_volume_status = get_volume_status(gluster_pod, block_hosting_vol) + gluster_volume_status = get_volume_status( + "auto_get_gluster_endpoint", file_vol) if not gluster_volume_status: raise AssertionError("failed to get gluster volume status") g.log.info("Gluster volume %s status\n%s : " % ( - block_hosting_vol, gluster_volume_status) + file_vol, gluster_volume_status) ) - ret, out, err = volume_stop(gluster_pod, block_hosting_vol) + ret, out, err = volume_stop("auto_get_gluster_endpoint", file_vol) if ret != 0: - err_msg = "failed to stop gluster volume %s on pod %s error: %s" % ( - block_hosting_vol, gluster_pod, err) + err_msg = "Failed to stop gluster volume %s. error: %s" % ( + file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) # Explicit wait to stop ios and pvc creation for 2 mins time.sleep(sleep_time) - ret, out, err = volume_start(gluster_pod, block_hosting_vol, force=True) + + ret, out, err = volume_start( + "auto_get_gluster_endpoint", file_vol, force=True) if ret != 0: - err_msg = "failed to start gluster volume %s on pod %s error: %s" % ( - block_hosting_vol, gluster_pod, err) + err_msg = "failed to start gluster volume %s error: %s" % ( + file_vol, err) g.log.error(err_msg) raise AssertionError(err_msg) - ret, out, err = volume_status(gluster_pod, block_hosting_vol) + ret, out, err = volume_status("auto_get_gluster_endpoint", file_vol) if ret != 0: - err_msg = ("failed to get status for gluster volume %s on pod %s " - "error: %s" % (block_hosting_vol, gluster_pod, err)) + err_msg = ("Failed to get status for gluster volume %s error: %s" % ( + file_vol, err)) g.log.error(err_msg) raise AssertionError(err_msg) @podcmd.GlustoPod() def match_heketi_and_gluster_block_volumes_by_prefix( - gluster_pod, heketi_block_volumes, block_vol_prefix, hostname=None): + heketi_block_volumes, block_vol_prefix): """Match block volumes from heketi and gluster. This function can't be used for block volumes with custom prefixes Args: - gluster_pod (podcmd | str): gluster pod class object has gluster - pod and ocp master node or gluster - pod name heketi_block_volumes (list): list of heketi block volumes with which gluster block volumes need to be matched block_vol_prefix (str): block volume prefix by which the block volumes needs to be filtered - hostname (str): ocp master node on which oc command gets executed - """ - gluster_pod = _get_gluster_pod(gluster_pod, hostname) - - gluster_vol_list = get_volume_list(gluster_pod) + gluster_vol_list = get_volume_list("auto_get_gluster_endpoint") gluster_vol_block_list = [] for gluster_vol in gluster_vol_list[1:]: - ret, out, err = block_list(gluster_pod, gluster_vol) + ret, out, err = block_list("auto_get_gluster_endpoint", gluster_vol) try: if ret != 0 and json.loads(out)["RESULT"] == "FAIL": msg = "failed to get block volume list with error: %s" % err @@ -260,7 +233,7 @@ def match_heketi_and_gluster_block_volumes_by_prefix( @podcmd.GlustoPod() def get_block_hosting_volume_name(heketi_client_node, heketi_server_url, - block_volume, gluster_pod, hostname=None): + block_volume): """Returns block hosting volume name of given block volume Args: @@ -268,16 +241,9 @@ def get_block_hosting_volume_name(heketi_client_node, heketi_server_url, heketi_server_url (str): Heketi server url block_volume (str): Block volume of which block hosting volume returned - gluster_pod (podcmd | str): Gluster pod class object has gluster - pod and ocp master node or gluster - pod name - hostname (str): OCP master node on which ocp commands get executed - Returns: str : Name of the block hosting volume for given block volume """ - gluster_pod = _get_gluster_pod(gluster_pod, hostname) - block_vol_info = heketi_blockvolume_info( heketi_client_node, heketi_server_url, block_volume ) @@ -290,7 +256,7 @@ def get_block_hosting_volume_name(heketi_client_node, heketi_server_url, if not block_hosting_vol_match: continue - gluster_vol_list = get_volume_list(gluster_pod) + gluster_vol_list = get_volume_list("auto_get_gluster_endpoint") for vol in gluster_vol_list: if block_hosting_vol_match.group(1).strip() in vol: return vol diff --git a/cns-libs/cnslibs/common/openshift_ops.py b/cns-libs/cnslibs/common/openshift_ops.py index d98c550b..dd5f8e17 100644 --- a/cns-libs/cnslibs/common/openshift_ops.py +++ b/cns-libs/cnslibs/common/openshift_ops.py @@ -1370,15 +1370,14 @@ def match_pv_and_heketi_block_volumes( raise AssertionError(err_msg) -def check_service_status( - hostname, podname, service, status, timeout=180, wait_step=3): - """Checks provided service to be in "Running" status for given - timeout on given podname +def check_service_status_on_pod( + ocp_client, podname, service, status, timeout=180, wait_step=3): + """Check a service state on a pod. Args: - hostname (str): hostname on which we want to check service - podname (str): pod name on which service needs to be restarted - service (str): service which needs to be restarted + ocp_client (str): node with 'oc' client + podname (str): pod name on which service needs to be checked + service (str): service which needs to be checked status (str): status to be checked timeout (int): seconds to wait before service starts having specified 'status' @@ -1389,7 +1388,7 @@ def check_service_status( "having '%s' status" % (timeout, service, status)) for w in waiter.Waiter(timeout, wait_step): - ret, out, err = oc_rsh(hostname, podname, SERVICE_STATUS % service) + ret, out, err = oc_rsh(ocp_client, podname, SERVICE_STATUS % service) if ret != 0: err_msg = ("failed to get service %s's status on pod %s" % (service, podname)) @@ -1406,50 +1405,46 @@ def check_service_status( raise exceptions.ExecutionError(err_msg) -def restart_service_on_pod(hostname, podname, service): - """Restarts service on podname given +def wait_for_service_status_on_gluster_pod_or_node( + ocp_client, service, status, gluster_node, timeout=180, wait_step=3): + """Wait for a service specific status on a Gluster POD or node. Args: - hostname (str): hostname on which we want to restart service - podname (str): pod name on which service needs to be restarted - service (str): service which needs to be restarted - Raises: - AssertionError in case failed to restarts service + ocp_client (str): hostname on which we want to check service + service (str): target service to be checked + status (str): service status which we wait for + gluster_node (str): Gluster node IPv4 which stores either Gluster POD + or Gluster services directly. + timeout (int): seconds to wait before service starts having + specified 'status' + wait_step (int): interval in seconds to wait before checking + service again. """ - ret, out, err = oc_rsh(hostname, podname, SERVICE_RESTART % service) - if ret != 0: - err_msg = ("failed to restart service %s on pod %s" % - (service, podname)) + err_msg = ("Exceeded timeout of %s sec for verifying %s service to start " + "having '%s' status" % (timeout, service, status)) + + for w in waiter.Waiter(timeout, wait_step): + out = cmd_run_on_gluster_pod_or_node( + ocp_client, SERVICE_STATUS % service, gluster_node) + for line in out.splitlines(): + status_match = re.search(SERVICE_STATUS_REGEX, line) + if status_match and status_match.group(1) == status: + return True + if w.expired: g.log.error(err_msg) - raise AssertionError(err_msg) + raise exceptions.ExecutionError(err_msg) -def wait_for_process_to_kill_on_pod( - pod, pid, hostname, timeout=60, interval=3): - """check for process presence if process is present for more than - timeout sec raise exception +def restart_service_on_gluster_pod_or_node(ocp_client, service, gluster_node): + """Restart service on Gluster either POD or node. Args: - pid (int | str): process id to be killed on pod - pod (str): pod name on which process id to be killed - hostname (str): hostname on which pod is present + ocp_client (str): host on which we want to run 'oc' commands. + service (str): service which needs to be restarted + gluster_node (str): Gluster node IPv4 which stores either Gluster POD + or Gluster services directly. + Raises: + AssertionError in case restart of a service fails. """ - killed_pid_cmd = "ps -eaf | grep %s | grep -v grep | awk '{print $2}'" - _waiter = waiter.Waiter(timeout=60, interval=3) - for w in _waiter: - ret, out, err = oc_rsh(hostname, pod, killed_pid_cmd % pid) - if ret != 0: - err_msg = ("failed to get killed process id '%s' details " - "from pod '%s' err: %s" % (pid, pod, err)) - g.log.error(err_msg) - raise AssertionError(err_msg) - - if not out.strip() == pid: - g.log.info("brick process '%s' killed on pod '%s'" % (pid, pod)) - break - - if w.expired: - error_msg = ("process id '%s' still exists on pod '%s' after waiting " - "for it '%s' seconds to get kill" % (pid, pod, timeout)) - g.log.error(error_msg) - raise exceptions.ExecutionError(error_msg) + cmd_run_on_gluster_pod_or_node( + ocp_client, SERVICE_RESTART % service, gluster_node) diff --git a/tests/functional/common/gluster_stability/test_gluster_services_restart.py b/tests/functional/common/gluster_stability/test_gluster_services_restart.py index 168ff466..2c8603a2 100644 --- a/tests/functional/common/gluster_stability/test_gluster_services_restart.py +++ b/tests/functional/common/gluster_stability/test_gluster_services_restart.py @@ -1,16 +1,14 @@ -from unittest import skip - -import ddt +from datetime import datetime import re import time +from unittest import skip -from datetime import datetime +import ddt from glusto.core import Glusto as g + +from cnslibs.cns.cns_baseclass import BaseClass from cnslibs.common.heketi_ops import heketi_blockvolume_list from cnslibs.common.openshift_ops import ( - check_service_status, - oc_get_custom_resource, - get_ocp_gluster_pod_names, get_pod_name_from_dc, match_pv_and_heketi_block_volumes, match_pvc_and_pv, @@ -19,26 +17,27 @@ from cnslibs.common.openshift_ops import ( oc_create_sc, oc_create_secret, oc_delete, + oc_get_custom_resource, oc_get_yaml, oc_rsh, - restart_service_on_pod, + restart_service_on_gluster_pod_or_node, scale_dc_pod_amount_and_wait, verify_pvc_status_is_bound, wait_for_pod_be_ready, - wait_for_resource_absence + wait_for_resource_absence, + wait_for_service_status_on_gluster_pod_or_node, ) from cnslibs.common.gluster_ops import ( get_block_hosting_volume_name, + get_gluster_vol_hosting_nodes, match_heketi_and_gluster_block_volumes_by_prefix, - restart_block_hosting_volume, - restart_brick_process, - wait_to_heal_complete + restart_file_volume, + restart_gluster_vol_brick_processes, + wait_to_heal_complete, ) -from cnslibs.cns.cns_baseclass import BaseClass -from cnslibs.common import podcmd -HEKETI_BLOCK_VOLUME_REGEX = "^Id:(.*).Cluster:(.*).Name:%s_(.*)$" +HEKETI_BLOCK_VOLUME_REGEX = "^Id:(.*).Cluster:(.*).Name:%s_(.*)$" SERVICE_TARGET = "gluster-block-target" SERVICE_BLOCKD = "gluster-blockd" SERVICE_TCMU = "tcmu-runner" @@ -54,8 +53,6 @@ class GlusterStabilityTestSetup(BaseClass): in cleanup method """ self.oc_node = self.ocp_master_node[0] - self.gluster_pod = get_ocp_gluster_pod_names(self.oc_node)[0] - self.gluster_pod_obj = podcmd.Pod(self.oc_node, self.gluster_pod) # prefix used to create resources, generating using glusto_test_id # which uses time and date of test case @@ -169,9 +166,7 @@ class GlusterStabilityTestSetup(BaseClass): # get block hosting volume from pvc name block_hosting_vol = get_block_hosting_volume_name( - self.heketi_client_node, self.heketi_server_url, - block_volume, self.gluster_pod, self.oc_node - ) + self.heketi_client_node, self.heketi_server_url, block_volume) return block_hosting_vol @@ -233,9 +228,7 @@ class GlusterStabilityTestSetup(BaseClass): # validate block volumes listed by heketi and gluster match_heketi_and_gluster_block_volumes_by_prefix( - self.gluster_pod_obj, heketi_block_volume_names, - "%s_" % self.prefix - ) + heketi_block_volume_names, "%s_" % self.prefix) def get_io_time(self): """Gets last io time of io pod by listing log file directory @@ -268,7 +261,7 @@ class GlusterStabilityTestSetup(BaseClass): """ start_io_time = self.get_io_time() - restart_block_hosting_volume(self.gluster_pod_obj, block_hosting_vol) + restart_file_volume(block_hosting_vol) # Explicit wait to start ios on pvc after volume start time.sleep(5) @@ -276,29 +269,31 @@ class GlusterStabilityTestSetup(BaseClass): self.assertGreater(resume_io_time, start_io_time, "IO has not stopped") - wait_to_heal_complete(self.gluster_pod_obj) + wait_to_heal_complete() @ddt.data(SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET) def test_restart_services_provision_volume_and_run_io(self, service): """Restart gluster service then validate volumes""" + block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( + self.pvc_name) + g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) + self.assertGreater(len(g_nodes), 2) + # restarts glusterfs service - restart_service_on_pod(self.oc_node, self.gluster_pod, service) + restart_service_on_gluster_pod_or_node( + self.oc_node, service, g_nodes[0]) # wait for deployed user pod to be in Running state after restarting # service wait_for_pod_be_ready( - self.oc_node, self.pod_name, timeout=60, wait_step=5 - ) + self.oc_node, self.pod_name, timeout=60, wait_step=5) # checks if all glusterfs services are in running state - for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): - status = "exited" if service == SERVICE_TARGET else "running" - self.assertTrue( - check_service_status( - self.oc_node, self.gluster_pod, service, status - ), - "service %s is not in %s state" % (service, status) - ) + for g_node in g_nodes: + for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): + status = "exited" if service == SERVICE_TARGET else "running" + self.assertTrue(wait_for_service_status_on_gluster_pod_or_node( + self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts @@ -309,23 +304,20 @@ class GlusterStabilityTestSetup(BaseClass): """Target side failures - Brick failure on block hosting volume""" # get block hosting volume from pvc name block_hosting_vol = self.get_block_hosting_volume_by_pvc_name( - self.pvc_name - ) + self.pvc_name) - # restarts brick 2 process of block hosting volume - restart_brick_process( - self.oc_node, self.gluster_pod_obj, block_hosting_vol - ) + # restarts 2 brick processes of block hosting volume + g_nodes = get_gluster_vol_hosting_nodes(block_hosting_vol) + self.assertGreater(len(g_nodes), 2) + restart_gluster_vol_brick_processes( + self.oc_node, block_hosting_vol, g_nodes[:2]) # checks if all glusterfs services are in running state - for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): - status = "exited" if service == SERVICE_TARGET else "running" - self.assertTrue( - check_service_status( - self.oc_node, self.gluster_pod, service, status - ), - "service %s is not in %s state" % (service, status) - ) + for g_node in g_nodes: + for service in (SERVICE_BLOCKD, SERVICE_TCMU, SERVICE_TARGET): + status = "exited" if service == SERVICE_TARGET else "running" + self.assertTrue(wait_for_service_status_on_gluster_pod_or_node( + self.oc_node, service, status, g_node)) # validates pvc, pv, heketi block and gluster block count after # service restarts diff --git a/tests/functional/common/test_node_restart.py b/tests/functional/common/test_node_restart.py index 02272321..fc8bec07 100644 --- a/tests/functional/common/test_node_restart.py +++ b/tests/functional/common/test_node_restart.py @@ -4,7 +4,7 @@ import time from unittest import skip from cnslibs.cns.cns_baseclass import BaseClass from cnslibs.common.openshift_ops import ( - check_service_status, + check_service_status_on_pod, get_ocp_gluster_pod_names, oc_rsh, wait_for_pod_be_ready) @@ -20,6 +20,8 @@ class TestNodeRestart(BaseClass): self.oc_node = self.ocp_master_node[0] self.gluster_pod_list = get_ocp_gluster_pod_names(self.oc_node) + if not self.gluster_pod_list: + self.skipTest("Standalone Gluster is not supported by this test.") self.gluster_pod_name = self.gluster_pod_list[0] self.sc_name = self.create_storage_class() @@ -130,7 +132,7 @@ class TestNodeRestart(BaseClass): for service in service_names: g.log.info("gluster_pod - '%s' : gluster_service '%s'" % ( gluster_pod, service)) - check_service_status( + check_service_status_on_pod( self.oc_node, gluster_pod, service, "running" ) |