Files
watcher/watcher/common/nova_helper.py
afanasev.s 0f5b6a07d0 Fix incorrect logging format
Fix incorrect logging format for multiple variables because of what this
functionality didn't work correctly and some log messages were skipped.
The logging calls require two arguments, but they are passed in a tuple
so it's interpreted as one argument only and it fails as is missing
the second argument.

Closes-Bug: 2110149

Change-Id: I74ed44134b50782c105a0e82f3af34a5fa45d119
2025-05-15 12:55:18 +02:00

746 lines
29 KiB
Python

# -*- encoding: utf-8 -*-
# Copyright (c) 2015 b<>com
#
# Authors: Jean-Emile DARTOIS <jean-emile.dartois@b-com.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import time
from novaclient import api_versions
from oslo_log import log
import glanceclient.exc as glexceptions
import novaclient.exceptions as nvexceptions
from watcher.common import clients
from watcher.common import exception
from watcher import conf
LOG = log.getLogger(__name__)
CONF = conf.CONF
class NovaHelper(object):
def __init__(self, osc=None):
""":param osc: an OpenStackClients instance"""
self.osc = osc if osc else clients.OpenStackClients()
self.neutron = self.osc.neutron()
self.cinder = self.osc.cinder()
self.nova = self.osc.nova()
self.glance = self.osc.glance()
def get_compute_node_list(self):
hypervisors = self.nova.hypervisors.list()
# filter out baremetal nodes from hypervisors
compute_nodes = [node for node in hypervisors if
node.hypervisor_type != 'ironic']
return compute_nodes
def get_compute_node_by_name(self, node_name, servers=False,
detailed=False):
"""Search for a hypervisor (compute node) by hypervisor_hostname
:param node_name: The hypervisor_hostname to search
:param servers: If true, include information about servers per
hypervisor
:param detailed: If true, include information about the compute service
per hypervisor (requires microversion 2.53)
"""
return self.nova.hypervisors.search(node_name, servers=servers,
detailed=detailed)
def get_compute_node_by_hostname(self, node_hostname):
"""Get compute node by hostname
:param node_hostname: Compute service hostname
:returns: novaclient.v2.hypervisors.Hypervisor object if found
:raises: ComputeNodeNotFound if no hypervisor is found for the compute
service hostname or there was an error communicating with nova
"""
try:
# This is a fuzzy match on hypervisor_hostname so we could get back
# more than one compute node. If so, match on the compute service
# hostname.
compute_nodes = self.get_compute_node_by_name(
node_hostname, detailed=True)
for cn in compute_nodes:
if cn.service['host'] == node_hostname:
return cn
raise exception.ComputeNodeNotFound(name=node_hostname)
except Exception as exc:
LOG.exception(exc)
raise exception.ComputeNodeNotFound(name=node_hostname)
def get_compute_node_by_uuid(self, node_uuid):
"""Get compute node by uuid
:param node_uuid: hypervisor id as uuid after microversion 2.53
:returns: novaclient.v2.hypervisors.Hypervisor object if found
"""
return self.nova.hypervisors.get(node_uuid)
def get_instance_list(self, filters=None, marker=None, limit=-1):
"""List servers for all tenants with details.
This always gets servers with the all_tenants=True filter.
:param filters: Dict of additional filters (optional).
:param marker: Get servers that appear later in the server
list than that represented by this server id (optional).
:param limit: Maximum number of servers to return (optional).
If limit == -1, all servers will be returned,
note that limit == -1 will have a performance
penalty. For details, please see:
https://bugs.launchpad.net/watcher/+bug/1834679
:returns: list of novaclient Server objects
"""
search_opts = {'all_tenants': True}
if filters:
search_opts.update(filters)
return self.nova.servers.list(search_opts=search_opts,
marker=marker,
limit=limit)
def get_instance_by_uuid(self, instance_uuid):
return [instance for instance in
self.nova.servers.list(search_opts={"all_tenants": True,
"uuid": instance_uuid})]
def get_instance_by_name(self, instance_name):
return [instance for instance in
self.nova.servers.list(search_opts={"all_tenants": True,
"name": instance_name})]
def get_instances_by_node(self, host):
return [instance for instance in
self.nova.servers.list(search_opts={"all_tenants": True,
"host": host},
limit=-1)]
def get_flavor_list(self):
return self.nova.flavors.list(**{'is_public': None})
def get_service(self, service_id):
return self.nova.services.find(id=service_id)
def get_aggregate_list(self):
return self.nova.aggregates.list()
def get_aggregate_detail(self, aggregate_id):
return self.nova.aggregates.get(aggregate_id)
def get_availability_zone_list(self):
return self.nova.availability_zones.list(detailed=True)
def get_service_list(self):
return self.nova.services.list(binary='nova-compute')
def find_instance(self, instance_id):
return self.nova.servers.get(instance_id)
def confirm_resize(self, instance, previous_status, retry=60):
instance.confirm_resize()
instance = self.nova.servers.get(instance.id)
while instance.status != previous_status and retry:
instance = self.nova.servers.get(instance.id)
retry -= 1
time.sleep(1)
if instance.status == previous_status:
return True
else:
LOG.debug("confirm resize failed for the "
"instance %s", instance.id)
return False
def wait_for_volume_status(self, volume, status, timeout=60,
poll_interval=1):
"""Wait until volume reaches given status.
:param volume: volume resource
:param status: expected status of volume
:param timeout: timeout in seconds
:param poll_interval: poll interval in seconds
"""
start_time = time.time()
while time.time() - start_time < timeout:
volume = self.cinder.volumes.get(volume.id)
if volume.status == status:
break
time.sleep(poll_interval)
else:
raise Exception("Volume %s did not reach status %s after %d s"
% (volume.id, status, timeout))
return volume.status == status
def watcher_non_live_migrate_instance(self, instance_id, dest_hostname,
retry=120):
"""This method migrates a given instance
This method uses the Nova built-in migrate()
action to do a migration of a given instance.
For migrating a given dest_hostname, Nova API version
must be 2.56 or higher.
It returns True if the migration was successful,
False otherwise.
:param instance_id: the unique id of the instance to migrate.
:param dest_hostname: the name of the destination compute node, if
destination_node is None, nova scheduler choose
the destination host
"""
LOG.debug(
"Trying a cold migrate of instance '%s' ", instance_id)
# Looking for the instance to migrate
instance = self.find_instance(instance_id)
if not instance:
LOG.debug("Instance %s not found !", instance_id)
return False
else:
host_name = getattr(instance, "OS-EXT-SRV-ATTR:host")
LOG.debug(
"Instance %(instance)s found on host '%(host)s'.",
{'instance': instance_id, 'host': host_name})
previous_status = getattr(instance, 'status')
instance.migrate(host=dest_hostname)
instance = self.nova.servers.get(instance_id)
while (getattr(instance, 'status') not in
["VERIFY_RESIZE", "ERROR"] and retry):
instance = self.nova.servers.get(instance.id)
time.sleep(2)
retry -= 1
new_hostname = getattr(instance, 'OS-EXT-SRV-ATTR:host')
if (host_name != new_hostname and
instance.status == 'VERIFY_RESIZE'):
if not self.confirm_resize(instance, previous_status):
return False
LOG.debug(
"cold migration succeeded : "
"instance %(instance)s is now on host '%(host)s'.",
{'instance': instance_id, 'host': new_hostname})
return True
else:
LOG.debug(
"cold migration for instance %s failed", instance_id)
return False
def resize_instance(self, instance_id, flavor, retry=120):
"""This method resizes given instance with specified flavor.
This method uses the Nova built-in resize()
action to do a resize of a given instance.
It returns True if the resize was successful,
False otherwise.
:param instance_id: the unique id of the instance to resize.
:param flavor: the name or ID of the flavor to resize to.
"""
LOG.debug(
"Trying a resize of instance %(instance)s to "
"flavor '%(flavor)s'",
{'instance': instance_id, 'flavor': flavor})
# Looking for the instance to resize
instance = self.find_instance(instance_id)
flavor_id = None
try:
flavor_id = self.nova.flavors.get(flavor).id
except nvexceptions.NotFound:
flavor_id = [f.id for f in self.nova.flavors.list() if
f.name == flavor][0]
except nvexceptions.ClientException as e:
LOG.debug("Nova client exception occurred while resizing "
"instance %s. Exception: %s", instance_id, e)
if not flavor_id:
LOG.debug("Flavor not found: %s", flavor)
return False
if not instance:
LOG.debug("Instance not found: %s", instance_id)
return False
instance_status = getattr(instance, 'OS-EXT-STS:vm_state')
LOG.debug(
"Instance %(id)s is in '%(status)s' status.",
{'id': instance_id, 'status': instance_status})
instance.resize(flavor=flavor_id)
while getattr(instance,
'OS-EXT-STS:vm_state') != 'resized' \
and retry:
instance = self.nova.servers.get(instance.id)
LOG.debug('Waiting the resize of %s to %s', instance, flavor_id)
time.sleep(1)
retry -= 1
instance_status = getattr(instance, 'status')
if instance_status != 'VERIFY_RESIZE':
return False
instance.confirm_resize()
LOG.debug("Resizing succeeded : instance %s is now on flavor "
"'%s'.", instance_id, flavor_id)
return True
def live_migrate_instance(self, instance_id, dest_hostname, retry=120):
"""This method does a live migration of a given instance
This method uses the Nova built-in live_migrate()
action to do a live migration of a given instance.
It returns True if the migration was successful,
False otherwise.
:param instance_id: the unique id of the instance to migrate.
:param dest_hostname: the name of the destination compute node, if
destination_node is None, nova scheduler choose
the destination host
"""
LOG.debug(
"Trying a live migrate instance %(instance)s ",
{'instance': instance_id})
# Looking for the instance to migrate
instance = self.find_instance(instance_id)
if not instance:
LOG.debug("Instance not found: %s", instance_id)
return False
else:
host_name = getattr(instance, 'OS-EXT-SRV-ATTR:host')
LOG.debug(
"Instance %(instance)s found on host '%(host)s'.",
{'instance': instance_id, 'host': host_name})
# From nova api version 2.25(Mitaka release), the default value of
# block_migration is None which is mapped to 'auto'.
instance.live_migrate(host=dest_hostname)
instance = self.nova.servers.get(instance_id)
# NOTE: If destination host is not specified for live migration
# let nova scheduler choose the destination host.
if dest_hostname is None:
while (instance.status not in ['ACTIVE', 'ERROR'] and retry):
instance = self.nova.servers.get(instance.id)
LOG.debug('Waiting the migration of %s', instance.id)
time.sleep(1)
retry -= 1
new_hostname = getattr(instance, 'OS-EXT-SRV-ATTR:host')
if host_name != new_hostname and instance.status == 'ACTIVE':
LOG.debug(
"Live migration succeeded : "
"instance %(instance)s is now on host '%(host)s'.",
{'instance': instance_id, 'host': new_hostname})
return True
else:
return False
while getattr(instance,
'OS-EXT-SRV-ATTR:host') != dest_hostname \
and retry:
instance = self.nova.servers.get(instance.id)
if not getattr(instance, 'OS-EXT-STS:task_state'):
LOG.debug("Instance task state: %s is null", instance_id)
break
LOG.debug('Waiting the migration of %s to %s',
instance,
getattr(instance, 'OS-EXT-SRV-ATTR:host'))
time.sleep(1)
retry -= 1
host_name = getattr(instance, 'OS-EXT-SRV-ATTR:host')
if host_name != dest_hostname:
return False
LOG.debug(
"Live migration succeeded : "
"instance %(instance)s is now on host '%(host)s'.",
{'instance': instance_id, 'host': host_name})
return True
def abort_live_migrate(self, instance_id, source, destination, retry=240):
LOG.debug("Aborting live migration of instance %s", instance_id)
migration = self.get_running_migration(instance_id)
if migration:
migration_id = getattr(migration[0], "id")
try:
self.nova.server_migrations.live_migration_abort(
server=instance_id, migration=migration_id)
except exception as e:
# Note: Does not return from here, as abort request can't be
# accepted but migration still going on.
LOG.exception(e)
else:
LOG.debug(
"No running migrations found for instance %s", instance_id)
while retry:
instance = self.nova.servers.get(instance_id)
if (getattr(instance, 'OS-EXT-STS:task_state') is None and
getattr(instance, 'status') in ['ACTIVE', 'ERROR']):
break
time.sleep(2)
retry -= 1
instance_host = getattr(instance, 'OS-EXT-SRV-ATTR:host')
instance_status = getattr(instance, 'status')
# Abort live migration successful, action is cancelled
if instance_host == source and instance_status == 'ACTIVE':
return True
# Nova Unable to abort live migration, action is succeeded
elif instance_host == destination and instance_status == 'ACTIVE':
return False
else:
raise Exception("Live migration execution and abort both failed "
"for the instance %s" % instance_id)
def enable_service_nova_compute(self, hostname):
if float(CONF.nova_client.api_version) < 2.53:
status = self.nova.services.enable(
host=hostname, binary='nova-compute').status == 'enabled'
else:
service_uuid = self.nova.services.list(host=hostname,
binary='nova-compute')[0].id
status = self.nova.services.enable(
service_uuid=service_uuid).status == 'enabled'
return status
def disable_service_nova_compute(self, hostname, reason=None):
if float(CONF.nova_client.api_version) < 2.53:
status = self.nova.services.disable_log_reason(
host=hostname,
binary='nova-compute',
reason=reason).status == 'disabled'
else:
service_uuid = self.nova.services.list(host=hostname,
binary='nova-compute')[0].id
status = self.nova.services.disable_log_reason(
service_uuid=service_uuid,
reason=reason).status == 'disabled'
return status
def create_image_from_instance(self, instance_id, image_name,
metadata={"reason": "instance_migrate"}):
"""This method creates a new image from a given instance.
It waits for this image to be in 'active' state before returning.
It returns the unique UUID of the created image if successful,
None otherwise.
:param instance_id: the uniqueid of
the instance to backup as an image.
:param image_name: the name of the image to create.
:param metadata: a dictionary containing the list of
key-value pairs to associate to the image as metadata.
"""
LOG.debug(
"Trying to create an image from instance %s ...", instance_id)
# Looking for the instance
instance = self.find_instance(instance_id)
if not instance:
LOG.debug("Instance not found: %s", instance_id)
return None
else:
host_name = getattr(instance, 'OS-EXT-SRV-ATTR:host')
LOG.debug(
"Instance %(instance)s found on host '%(host)s'.",
{'instance': instance_id, 'host': host_name})
# We need to wait for an appropriate status
# of the instance before we can build an image from it
if self.wait_for_instance_status(instance, ('ACTIVE', 'SHUTOFF'),
5,
10):
image_uuid = self.nova.servers.create_image(instance_id,
image_name,
metadata)
image = self.glance.images.get(image_uuid)
if not image:
return None
# Waiting for the new image to be officially in ACTIVE state
# in order to make sure it can be used
status = image.status
retry = 10
while status != 'active' and status != 'error' and retry:
time.sleep(5)
retry -= 1
# Retrieve the instance again so the status field updates
image = self.glance.images.get(image_uuid)
if not image:
break
status = image.status
LOG.debug("Current image status: %s", status)
if not image:
LOG.debug("Image not found: %s", image_uuid)
else:
LOG.debug(
"Image %(image)s successfully created for "
"instance %(instance)s",
{'image': image_uuid, 'instance': instance_id})
return image_uuid
return None
def delete_instance(self, instance_id):
"""This method deletes a given instance.
:param instance_id: the unique id of the instance to delete.
"""
LOG.debug("Trying to remove instance %s ...", instance_id)
instance = self.find_instance(instance_id)
if not instance:
LOG.debug("Instance not found: %s", instance_id)
return False
else:
self.nova.servers.delete(instance_id)
LOG.debug("Instance %s removed.", instance_id)
return True
def stop_instance(self, instance_id):
"""This method stops a given instance.
:param instance_id: the unique id of the instance to stop.
"""
LOG.debug("Trying to stop instance %s ...", instance_id)
instance = self.find_instance(instance_id)
if not instance:
LOG.debug("Instance not found: %s", instance_id)
return False
elif getattr(instance, 'OS-EXT-STS:vm_state') == "stopped":
LOG.debug("Instance has been stopped: %s", instance_id)
return True
else:
self.nova.servers.stop(instance_id)
if self.wait_for_instance_state(instance, "stopped", 8, 10):
LOG.debug("Instance %s stopped.", instance_id)
return True
else:
return False
def wait_for_instance_state(self, server, state, retry, sleep):
"""Waits for server to be in a specific state
The state can be one of the following :
active, stopped
:param server: server object.
:param state: for which state we are waiting for
:param retry: how many times to retry
:param sleep: seconds to sleep between the retries
"""
if not server:
return False
while getattr(server, 'OS-EXT-STS:vm_state') != state and retry:
time.sleep(sleep)
server = self.nova.servers.get(server)
retry -= 1
return getattr(server, 'OS-EXT-STS:vm_state') == state
def wait_for_instance_status(self, instance, status_list, retry, sleep):
"""Waits for instance to be in a specific status
The status can be one of the following
: BUILD, ACTIVE, ERROR, VERIFY_RESIZE, SHUTOFF
:param instance: instance object.
:param status_list: tuple containing the list of
status we are waiting for
:param retry: how many times to retry
:param sleep: seconds to sleep between the retries
"""
if not instance:
return False
while instance.status not in status_list and retry:
LOG.debug("Current instance status: %s", instance.status)
time.sleep(sleep)
instance = self.nova.servers.get(instance.id)
retry -= 1
LOG.debug("Current instance status: %s", instance.status)
return instance.status in status_list
def create_instance(self, node_id, inst_name="test", image_id=None,
flavor_name="m1.tiny",
sec_group_list=["default"],
network_names_list=["demo-net"], keypair_name="mykeys",
create_new_floating_ip=True,
block_device_mapping_v2=None):
"""This method creates a new instance
It also creates, if requested, a new floating IP and associates
it with the new instance
It returns the unique id of the created instance.
"""
LOG.debug(
"Trying to create new instance '%(inst)s' "
"from image '%(image)s' with flavor '%(flavor)s' ...",
{'inst': inst_name, 'image': image_id, 'flavor': flavor_name})
try:
self.nova.keypairs.findall(name=keypair_name)
except nvexceptions.NotFound:
LOG.debug("Key pair '%s' not found ", keypair_name)
return
try:
image = self.glance.images.get(image_id)
except glexceptions.NotFound:
LOG.debug("Image '%s' not found ", image_id)
return
try:
flavor = self.nova.flavors.find(name=flavor_name)
except nvexceptions.NotFound:
LOG.debug("Flavor '%s' not found ", flavor_name)
return
# Make sure all security groups exist
for sec_group_name in sec_group_list:
group_id = self.get_security_group_id_from_name(sec_group_name)
if not group_id:
LOG.debug("Security group '%s' not found ", sec_group_name)
return
net_list = list()
for network_name in network_names_list:
nic_id = self.get_network_id_from_name(network_name)
if not nic_id:
LOG.debug("Network '%s' not found ", network_name)
return
net_obj = {"net-id": nic_id}
net_list.append(net_obj)
# get availability zone of destination host
azone = self.nova.services.list(host=node_id,
binary='nova-compute')[0].zone
instance = self.nova.servers.create(
inst_name, image,
flavor=flavor,
key_name=keypair_name,
security_groups=sec_group_list,
nics=net_list,
block_device_mapping_v2=block_device_mapping_v2,
availability_zone="%s:%s" % (azone, node_id))
# Poll at 5 second intervals, until the status is no longer 'BUILD'
if instance:
if self.wait_for_instance_status(instance,
('ACTIVE', 'ERROR'), 5, 10):
instance = self.nova.servers.get(instance.id)
if create_new_floating_ip and instance.status == 'ACTIVE':
LOG.debug(
"Creating a new floating IP"
" for instance '%s'", instance.id)
# Creating floating IP for the new instance
floating_ip = self.nova.floating_ips.create()
instance.add_floating_ip(floating_ip)
LOG.debug(
"Instance %(instance)s associated to "
"Floating IP '%(ip)s'",
{'instance': instance.id, 'ip': floating_ip.ip})
return instance
def get_security_group_id_from_name(self, group_name="default"):
"""This method returns the security group of the provided group name"""
security_groups = self.neutron.list_security_groups(name=group_name)
security_group_id = security_groups['security_groups'][0]['id']
return security_group_id
def get_network_id_from_name(self, net_name="private"):
"""This method returns the unique id of the provided network name"""
networks = self.neutron.list_networks(name=net_name)
# LOG.debug(networks)
network_id = networks['networks'][0]['id']
return network_id
def get_hostname(self, instance):
return str(getattr(instance, 'OS-EXT-SRV-ATTR:host'))
def get_running_migration(self, instance_id):
return self.nova.server_migrations.list(server=instance_id)
def swap_volume(self, old_volume, new_volume,
retry=120, retry_interval=10):
"""Swap old_volume for new_volume"""
attachments = old_volume.attachments
instance_id = attachments[0]['server_id']
# do volume update
self.nova.volumes.update_server_volume(
instance_id, old_volume.id, new_volume.id)
while getattr(new_volume, 'status') != 'in-use' and retry:
new_volume = self.cinder.volumes.get(new_volume.id)
LOG.debug('Waiting volume update to %s', new_volume)
time.sleep(retry_interval)
retry -= 1
LOG.debug("retry count: %s", retry)
if getattr(new_volume, 'status') != "in-use":
LOG.error("Volume update retry timeout or error")
return False
host_name = getattr(new_volume, "os-vol-host-attr:host")
LOG.debug(
"Volume update succeeded : "
"Volume %(volume)s is now on host '%(host)s'.",
{'volume': new_volume.id, 'host': host_name})
return True
def _check_nova_api_version(self, client, version):
api_version = api_versions.APIVersion(version_str=version)
try:
api_versions.discover_version(client, api_version)
return True
except nvexceptions.UnsupportedVersion as e:
LOG.exception(e)
return False