summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/cloundproviders/__init__.py0
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py237
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/exceptions.py8
-rw-r--r--openshift-storage-libs/openshiftstoragelibs/node_ops.py130
-rw-r--r--tests/functional/gluster_stability/test_gluster_block_stability.py80
-rw-r--r--tests/glusterfs-containers-tests-config.yaml10
-rw-r--r--tox.ini2
7 files changed, 467 insertions, 0 deletions
diff --git a/openshift-storage-libs/openshiftstoragelibs/cloundproviders/__init__.py b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/__init__.py
diff --git a/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py
new file mode 100644
index 00000000..8312efa6
--- /dev/null
+++ b/openshift-storage-libs/openshiftstoragelibs/cloundproviders/vmware.py
@@ -0,0 +1,237 @@
+"""
+Note: Do not use this module directly in the Test Cases. This module can be
+used with the help of 'node_ops'
+"""
+import re
+
+from glusto.core import Glusto as g
+from pyVim import connect
+from pyVmomi import vim, vmodl
+import six
+
+from openshiftstoragelibs import exceptions
+from openshiftstoragelibs.waiter import Waiter
+
+
+IP_REGEX = r"(^[12]?\d{1,2}\.[12]?\d{1,3}\.[12]?\d{1,2}\.[12]?\d{1,2}$)"
+
+
+class VmWare(object):
+
+ def __init__(self):
+ try:
+ self.hostname = g.config['cloud_provider']['vmware']['hostname']
+ self.username = g.config['cloud_provider']['vmware']['username']
+ self.password = g.config['cloud_provider']['vmware']['password']
+ self.port = g.config['cloud_provider']['vmware'].get('port', 443)
+ except KeyError:
+ msg = ("Config file doesn't have values related to vmware Cloud"
+ " Provider.")
+ g.log.error(msg)
+ raise exceptions.ConfigError(msg)
+
+ # Connect vsphere client
+ try:
+ self.vsphere_client = connect.ConnectNoSSL(
+ self.hostname, self.port, self.username, self.password)
+ except Exception as e:
+ g.log.error(e)
+ raise exceptions.CloudProviderError(e)
+
+ def __del__(self):
+ # Disconnect vsphere client
+ try:
+ connect.Disconnect(self.vsphere_client)
+ except Exception as e:
+ g.log.error(e)
+ raise exceptions.CloudProviderError(e)
+
+ def _wait_for_tasks(self, tasks, si):
+ """Given the service instance si and tasks, it returns after all the
+ tasks are complete.
+ """
+
+ pc = si.content.propertyCollector
+
+ taskList = [six.text_type(task) for task in tasks]
+
+ # Create filter
+ objSpecs = [vmodl.query.PropertyCollector.ObjectSpec(obj=task)
+ for task in tasks]
+ propSpec = vmodl.query.PropertyCollector.PropertySpec(
+ type=vim.Task, pathSet=[], all=True)
+ filterSpec = vmodl.query.PropertyCollector.FilterSpec()
+ filterSpec.objectSet = objSpecs
+ filterSpec.propSet = [propSpec]
+ filterTask = pc.CreateFilter(filterSpec, True)
+
+ try:
+ version, state = None, None
+
+ # Looking for updates till the state moves to a completed state.
+ while len(taskList):
+ update = pc.WaitForUpdates(version)
+ for filterSet in update.filterSet:
+ for objSet in filterSet.objectSet:
+ task = objSet.obj
+ for change in objSet.changeSet:
+ if change.name == 'info':
+ state = change.val.state
+ elif change.name == 'info.state':
+ state = change.val
+ else:
+ continue
+
+ if not six.text_type(task) in taskList:
+ continue
+
+ if state == vim.TaskInfo.State.success:
+ # Remove task from taskList
+ taskList.remove(six.text_type(task))
+ elif state == vim.TaskInfo.State.error:
+ raise task.info.error
+ # Move to next version
+ version = update.version
+ finally:
+ if filterTask:
+ filterTask.Destroy()
+
+ def wait_for_hostname(self, vm_name, timeout=600, interval=10):
+ """Wait for hostname to get assigned to a VM.
+
+ Args:
+ vm_name (str): Name of the VM.
+ Returns:
+ str: hostname of the VM.
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ for w in Waiter(timeout, interval):
+ vmlist = (
+ self.vsphere_client.content.viewManager.CreateContainerView(
+ self.vsphere_client.content.rootFolder,
+ [vim.VirtualMachine], True))
+ vm = [vm for vm in vmlist.view if vm.name == vm_name]
+ hostname = vm[0].summary.guest.hostName
+ if hostname:
+ return hostname
+ msg = 'VM %s did not got assigned hostname' % vm_name
+ g.log.error(msg)
+ raise exceptions.CloudProviderError(msg)
+
+ def find_vm_name_by_ip_or_hostname(self, ip_or_hostname):
+ """Find the name of VM by its IPv4 or HostName in vmware client.
+
+ Args:
+ ip_or_hostname (str): IPv4 or HostName of the VM.
+ Returns:
+ str: name of the VM.
+ Raises:
+ CloudProviderError: In case of any failures.
+ Note:
+ VM should be up and IP should be assigned to use this lib.
+ """
+ # Get a searchIndex object
+ searcher = self.vsphere_client.content.searchIndex
+
+ global IP_REGEX
+ status_match = re.search(IP_REGEX, ip_or_hostname)
+
+ if status_match:
+ # Find a VM by IP
+ vm = searcher.FindByIp(ip=ip_or_hostname, vmSearch=True)
+ else:
+ # Find a VM by hostname
+ vm = searcher.FindByDnsName(dnsName=ip_or_hostname, vmSearch=True)
+
+ if vm:
+ return vm.name
+
+ msg = 'IP or hostname %s is not assigned to any VM' % ip_or_hostname
+ g.log.error(msg)
+ raise exceptions.CloudProviderError(msg)
+
+ def get_power_state_of_vm_by_name(self, vm_name):
+ """Get the power state of VM by its name.
+
+ Args:
+ vm_name (str): name of the VM.
+ Returns:
+ str: power state of VM.
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ # Get list of all VM's
+ vmlist = self.vsphere_client.content.viewManager.CreateContainerView(
+ self.vsphere_client.content.rootFolder, [vim.VirtualMachine], True)
+
+ # Find VM
+ vm = [vm for vm in vmlist.view if vm.name == vm_name]
+
+ if vm:
+ # Get VM power State
+ return vm[0].summary.runtime.powerState
+
+ msg = 'VM %s is not present in the cluster' % vm_name
+ g.log.error(msg)
+ raise exceptions.CloudProviderError(msg)
+
+ def power_on_vm_by_name(self, vm_name):
+ """Power on VM by its name.
+
+ Args:
+ vm_name (str): name of the VM.
+ Returns:
+ None
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ # Get list of all VM's
+ vmlist = self.vsphere_client.content.viewManager.CreateContainerView(
+ self.vsphere_client.content.rootFolder, [vim.VirtualMachine], True)
+
+ # Find VM
+ vm = [vm for vm in vmlist.view if vm.name == vm_name]
+
+ if not vm:
+ msg = 'VM %s is not present in list' % vm_name
+ g.log.error(msg)
+ exceptions.CloudProviderError(msg)
+
+ if vm[0].summary.runtime.powerState == 'poweredOn':
+ msg = 'VM %s is already powered On' % vm_name
+ g.log.error(msg)
+ exceptions.CloudProviderError(msg)
+
+ tasks = [vm[0].PowerOn()]
+ self._wait_for_tasks(tasks, self.vsphere_client)
+
+ def power_off_vm_by_name(self, vm_name):
+ """Power off VM by its name.
+
+ Args:
+ vm_name (str): name of the VM.
+ Returns:
+ None
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ # Get list of all VM's
+ vmlist = self.vsphere_client.content.viewManager.CreateContainerView(
+ self.vsphere_client.content.rootFolder, [vim.VirtualMachine], True)
+
+ # Find VM
+ vm = [vm for vm in vmlist.view if vm.name == vm_name]
+
+ if not vm:
+ msg = 'VM %s is not present in list' % vm_name
+ g.log.error(msg)
+ exceptions.CloudProviderError(msg)
+
+ if vm[0].summary.runtime.powerState == 'poweredOff':
+ msg = 'VM %s is already powered Off' % vm_name
+ g.log.error(msg)
+ exceptions.CloudProviderError(msg)
+
+ tasks = [vm[0].PowerOff()]
+ self._wait_for_tasks(tasks, self.vsphere_client)
diff --git a/openshift-storage-libs/openshiftstoragelibs/exceptions.py b/openshift-storage-libs/openshiftstoragelibs/exceptions.py
index 44daee12..64f187ed 100644
--- a/openshift-storage-libs/openshiftstoragelibs/exceptions.py
+++ b/openshift-storage-libs/openshiftstoragelibs/exceptions.py
@@ -21,3 +21,11 @@ class NotSupportedException(Exception):
For example, pv resize is not supported in OCP version < 3.9
'''
+
+
+class CloudProviderError(Exception):
+ '''
+ Custom exception thrown when operation failed in cloud provider libraries.
+
+ For example, unable to find a vm in vsphere client.
+ '''
diff --git a/openshift-storage-libs/openshiftstoragelibs/node_ops.py b/openshift-storage-libs/openshiftstoragelibs/node_ops.py
index fb4aaa26..f456b325 100644
--- a/openshift-storage-libs/openshiftstoragelibs/node_ops.py
+++ b/openshift-storage-libs/openshiftstoragelibs/node_ops.py
@@ -1,11 +1,16 @@
import time
+from glustolibs.gluster.exceptions import ExecutionError
from glusto.core import Glusto as g
+from openshiftstoragelibs.cloundproviders.vmware import VmWare
from openshiftstoragelibs import exceptions
from openshiftstoragelibs import waiter
+CLOUD_PROVIDER = None
+
+
def node_reboot_by_command(node, timeout=600, wait_step=10):
"""Reboot node and wait to start for given timeout.
@@ -45,3 +50,128 @@ def node_reboot_by_command(node, timeout=600, wait_step=10):
"not reachable" % (timeout, node))
g.log.error(error_msg)
raise exceptions.ExecutionError(error_msg)
+
+
+def wait_for_ssh_connection(hostname, timeout=600, interval=10):
+ """Wait for ssh conection to be ready within given timeout.
+
+ Args:
+ hostname (str): hostname of a machine.
+ Returns:
+ None
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ for w in waiter.Waiter(timeout, interval):
+ try:
+ # Run random command to verify ssh connection
+ g.run(hostname, 'ls')
+ return
+ except (exceptions.ExecutionError, ExecutionError):
+ g.log.info("Waiting for ssh connection on host '%s'" % hostname)
+
+ msg = 'Not able to connect with the %s' % hostname
+ g.log.error(msg)
+ raise exceptions.CloudProviderError(msg)
+
+
+def _get_cloud_provider():
+ """Gather cloud provider facts"""
+
+ global CLOUD_PROVIDER
+ if CLOUD_PROVIDER:
+ return CLOUD_PROVIDER
+
+ try:
+ cloud_provider_name = g.config['cloud_provider']['name']
+ except KeyError:
+ msg = "Incorrect config file. Cloud provider name is missing."
+ g.log.error(msg)
+ raise exceptions.ConfigError(msg)
+
+ if cloud_provider_name == 'vmware':
+ CLOUD_PROVIDER = VmWare()
+ else:
+ msg = "Cloud Provider %s is not supported." % cloud_provider_name
+ g.log.error(msg)
+ raise NotImplementedError(msg)
+
+ return CLOUD_PROVIDER
+
+
+def find_vm_name_by_ip_or_hostname(ip_or_hostname):
+ """Find VM name from the ip or hostname.
+
+ Args:
+ ip_or_hostname (str): IP address or hostname of VM.
+ Returns:
+ str: Name of the VM.
+ """
+ cloudProvider = _get_cloud_provider()
+ g.log.info('getting the name of vm for ip or hostname %s' % ip_or_hostname)
+ return cloudProvider.find_vm_name_by_ip_or_hostname(ip_or_hostname)
+
+
+def get_power_state_of_vm_by_name(name):
+ """Get the power state of VM.
+
+ Args:
+ name (str): name of the VM for which state has to be find.
+ Returns:
+ str: Power state of the VM.
+ """
+ cloudProvider = _get_cloud_provider()
+ g.log.info('getting the power state of vm "%s"' % name)
+ return cloudProvider.get_power_state_of_vm_by_name(name)
+
+
+def power_off_vm_by_name(name):
+ """Power off the virtual machine.
+
+ Args:
+ name (str): name of the VM which needs to be powered off.
+ Returns:
+ None
+ """
+ cloudProvider = _get_cloud_provider()
+ g.log.info('powering off the vm "%s"' % name)
+ cloudProvider.power_off_vm_by_name(name)
+ g.log.info('powered off the vm "%s" successfully' % name)
+
+
+def power_on_vm_by_name(name, timeout=600, interval=10):
+ """Power on the virtual machine and wait for SSH ready within given
+ timeout.
+
+ Args:
+ name (str): name of the VM which needs to be powered on.
+ Returns:
+ None
+ Raises:
+ CloudProviderError: In case of any failures.
+ """
+ cloudProvider = _get_cloud_provider()
+ g.log.info('powering on the VM "%s"' % name)
+ cloudProvider.power_on_vm_by_name(name)
+ g.log.info('Powered on the VM "%s" successfully' % name)
+
+ # Wait for hostname to get assigned
+ _waiter = waiter.Waiter(timeout, interval)
+ for w in _waiter:
+ try:
+ hostname = cloudProvider.wait_for_hostname(name, 1, 1)
+ break
+ except Exception as e:
+ g.log.info(e)
+ if w.expired:
+ raise exceptions.CloudProviderError(e)
+
+ # Wait for hostname to ssh connection ready
+ for w in _waiter:
+ try:
+ wait_for_ssh_connection(hostname, 1, 1)
+ break
+ except Exception as e:
+ g.log.info(e)
+ if w.expired:
+ raise exceptions.CloudProviderError(e)
diff --git a/tests/functional/gluster_stability/test_gluster_block_stability.py b/tests/functional/gluster_stability/test_gluster_block_stability.py
index 0232c790..acbec125 100644
--- a/tests/functional/gluster_stability/test_gluster_block_stability.py
+++ b/tests/functional/gluster_stability/test_gluster_block_stability.py
@@ -1,10 +1,23 @@
from openshiftstoragelibs.baseclass import GlusterBlockBaseClass
from openshiftstoragelibs.command import cmd_run
+from openshiftstoragelibs.exceptions import ConfigError
+from openshiftstoragelibs.heketi_ops import (
+ heketi_node_info,
+ heketi_node_list,
+)
+from openshiftstoragelibs.node_ops import (
+ find_vm_name_by_ip_or_hostname,
+ power_off_vm_by_name,
+ power_on_vm_by_name,
+)
from openshiftstoragelibs.openshift_ops import (
cmd_run_on_gluster_pod_or_node,
+ get_ocp_gluster_pod_details,
get_pod_name_from_dc,
+ get_pv_name_from_pvc,
oc_adm_manage_node,
oc_delete,
+ oc_get_custom_resource,
oc_get_schedulable_nodes,
oc_rsh,
wait_for_pod_be_ready,
@@ -263,3 +276,70 @@ class TestGlusterBlockStability(GlusterBlockBaseClass):
# Verify that all the paths are up
self.verify_all_paths_are_up_in_multipath(mpath, hacount, node)
+
+ def test_initiator_side_failure_restart_pod_when_target_node_is_down(self):
+ """Restart app pod when one gluster node is down"""
+ # Skip test if does not meets requirements
+ try:
+ vm_name = find_vm_name_by_ip_or_hostname(self.node)
+ except (NotImplementedError, ConfigError) as e:
+ self.skipTest(e)
+
+ # Get heketi node list
+ h_nodes_ids = heketi_node_list(
+ self.heketi_client_node, self.heketi_server_url)
+
+ # Get the ips and hostname of gluster nodes from heketi
+ h_nodes = {}
+ for node in h_nodes_ids:
+ info = heketi_node_info(
+ self.heketi_client_node, self.heketi_server_url, node,
+ json=True)
+ h_nodes[info['hostnames']['storage'][0]] = (
+ info['hostnames']['manage'][0])
+
+ pvc_name = self.create_and_wait_for_pvc()
+ pv_name = get_pv_name_from_pvc(self.node, pvc_name)
+
+ # Create app pod
+ dc_name, pod_name = self.create_dc_with_pvc(self.pvc_name)
+
+ iqn, hacount, p_node = self.verify_iscsi_sessions_and_multipath(
+ self.pvc_name, dc_name)
+
+ # Get list of containerized gluster nodes
+ g_nodes = get_ocp_gluster_pod_details(self.node)
+
+ # Get target portals for the PVC
+ targets = oc_get_custom_resource(
+ self.node, 'pv', ':.spec.iscsi.portals,:.spec.iscsi.targetPortal',
+ name=pv_name)
+ targets = [item.strip('[').strip(
+ ']') for item in targets if isinstance(item, str)]
+
+ # Select hostname for powering off
+ if h_nodes[targets[0]] == p_node:
+ vm_hostname = h_nodes[targets[1]]
+ else:
+ vm_hostname = h_nodes[targets[0]]
+
+ # Find VM Name for powering it off
+ vm_name = find_vm_name_by_ip_or_hostname(vm_hostname)
+
+ # Unschedulable Node if containerised glusterfs
+ if g_nodes:
+ oc_adm_manage_node(self.node, '--schedulable=false', [vm_hostname])
+ self.addCleanup(
+ oc_adm_manage_node, self.node, '--schedulable', [vm_hostname])
+
+ # Power off gluster node
+ power_off_vm_by_name(vm_name)
+ self.addCleanup(power_on_vm_by_name, vm_name)
+
+ # Delete pod so it get respun
+ oc_delete(self.node, 'pod', pod_name)
+ wait_for_resource_absence(self.node, 'pod', pod_name)
+
+ # Wait for pod to come up when 1 target node is down
+ pod_name = get_pod_name_from_dc(self.node, dc_name)
+ wait_for_pod_be_ready(self.node, pod_name, timeout=120, wait_step=5)
diff --git a/tests/glusterfs-containers-tests-config.yaml b/tests/glusterfs-containers-tests-config.yaml
index c69f3922..d92dbaac 100644
--- a/tests/glusterfs-containers-tests-config.yaml
+++ b/tests/glusterfs-containers-tests-config.yaml
@@ -59,3 +59,13 @@ openshift:
common:
stop_on_first_failure: False
heketi_command_timeout: 120
+
+cloud_provider:
+ name: '<fake-cloud-provider-name eg. vmware>'
+ vmware:
+ hostname: '<fake-hostname>'
+ username: '<fake-username>'
+ password: '<fake-password>'
+ port: 443
+ aws: # To be done in future
+ libvirt: # To be done in future
diff --git a/tox.ini b/tox.ini
index 30eb9eca..f54daae9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -25,6 +25,7 @@ commands =
mock \
rtyaml \
ddt \
+ pyvmomi \
prometheus_client>=0.4.2 \
git+git://github.com/loadtheaccumulator/glusto.git \
"git+git://github.com/gluster/glusto-tests.git#egg=glustolibs-gluster&subdirectory=glustolibs-gluster" \
@@ -43,6 +44,7 @@ commands =
mock \
rtyaml \
ddt \
+ pyvmomi \
prometheus_client>=0.4.2 \
git+git://github.com/loadtheaccumulator/glusto.git@python3_port1 \
"git+git://github.com/gluster/glusto-tests.git#egg=glustolibs-gluster&subdirectory=glustolibs-gluster" \