Files
watcher/watcher/decision_engine/strategy/strategies/uniform_airflow.py
aditi 8e89d5489c Use Property setters
At various places in watcher code, we are using property getters
to set property, in this way the property setters defined are
never used, this patch fixes to use property setters to set
property.

Change-Id: Idb274887f383523cea39277b166ec9b46ebcda85
2017-09-27 10:43:43 +09:00

450 lines
18 KiB
Python

# -*- encoding: utf-8 -*-
# Copyright (c) 2016 Intel Corp
#
# Authors: Junjie-Huang <junjie.huang@intel.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
[PoC]Uniform Airflow using live migration
*Description*
It is a migration strategy based on the airflow of physical
servers. It generates solutions to move VM whenever a server's
airflow is higher than the specified threshold.
*Requirements*
* Hardware: compute node with NodeManager 3.0 support
* Software: Ceilometer component ceilometer-agent-compute running
in each compute node, and Ceilometer API can report such telemetry
"airflow, system power, inlet temperature" successfully.
* You must have at least 2 physical compute nodes to run this strategy
*Limitations*
- This is a proof of concept that is not meant to be used in production.
- We cannot forecast how many servers should be migrated. This is the
reason why we only plan a single virtual machine migration at a time.
So it's better to use this algorithm with `CONTINUOUS` audits.
- It assumes that live migrations are possible.
"""
import datetime
from oslo_config import cfg
from oslo_log import log
from watcher._i18n import _
from watcher.common import exception as wexc
from watcher.datasource import ceilometer as ceil
from watcher.datasource import gnocchi as gnoc
from watcher.decision_engine.model import element
from watcher.decision_engine.strategy.strategies import base
LOG = log.getLogger(__name__)
class UniformAirflow(base.BaseStrategy):
"""[PoC]Uniform Airflow using live migration
*Description*
It is a migration strategy based on the airflow of physical
servers. It generates solutions to move VM whenever a server's
airflow is higher than the specified threshold.
*Requirements*
* Hardware: compute node with NodeManager 3.0 support
* Software: Ceilometer component ceilometer-agent-compute running
in each compute node, and Ceilometer API can report such telemetry
"airflow, system power, inlet temperature" successfully.
* You must have at least 2 physical compute nodes to run this strategy
*Limitations*
- This is a proof of concept that is not meant to be used in production.
- We cannot forecast how many servers should be migrated. This is the
reason why we only plan a single virtual machine migration at a time.
So it's better to use this algorithm with `CONTINUOUS` audits.
- It assumes that live migrations are possible.
"""
# choose 300 seconds as the default duration of meter aggregation
PERIOD = 300
METRIC_NAMES = dict(
ceilometer=dict(
# The meter to report Airflow of physical server in ceilometer
host_airflow='hardware.ipmi.node.airflow',
# The meter to report inlet temperature of physical server
# in ceilometer
host_inlet_temp='hardware.ipmi.node.temperature',
# The meter to report system power of physical server in ceilometer
host_power='hardware.ipmi.node.power'),
gnocchi=dict(
# The meter to report Airflow of physical server in gnocchi
host_airflow='hardware.ipmi.node.airflow',
# The meter to report inlet temperature of physical server
# in gnocchi
host_inlet_temp='hardware.ipmi.node.temperature',
# The meter to report system power of physical server in gnocchi
host_power='hardware.ipmi.node.power'),
)
MIGRATION = "migrate"
def __init__(self, config, osc=None):
"""Using live migration
:param config: A mapping containing the configuration of this strategy
:type config: dict
:param osc: an OpenStackClients object
"""
super(UniformAirflow, self).__init__(config, osc)
# The migration plan will be triggered when the airflow reaches
# threshold
self.meter_name_airflow = self.METRIC_NAMES[
self.config.datasource]['host_airflow']
self.meter_name_inlet_t = self.METRIC_NAMES[
self.config.datasource]['host_inlet_temp']
self.meter_name_power = self.METRIC_NAMES[
self.config.datasource]['host_power']
self._ceilometer = None
self._gnocchi = None
self._period = self.PERIOD
@property
def ceilometer(self):
if self._ceilometer is None:
self.ceilometer = ceil.CeilometerHelper(osc=self.osc)
return self._ceilometer
@ceilometer.setter
def ceilometer(self, c):
self._ceilometer = c
@property
def gnocchi(self):
if self._gnocchi is None:
self.gnocchi = gnoc.GnocchiHelper(osc=self.osc)
return self._gnocchi
@gnocchi.setter
def gnocchi(self, g):
self._gnocchi = g
@classmethod
def get_name(cls):
return "uniform_airflow"
@classmethod
def get_display_name(cls):
return _("Uniform airflow migration strategy")
@classmethod
def get_translatable_display_name(cls):
return "Uniform airflow migration strategy"
@classmethod
def get_goal_name(cls):
return "airflow_optimization"
@property
def granularity(self):
return self.input_parameters.get('granularity', 300)
@classmethod
def get_schema(cls):
# Mandatory default setting for each element
return {
"properties": {
"threshold_airflow": {
"description": ("airflow threshold for migration, Unit is "
"0.1CFM"),
"type": "number",
"default": 400.0
},
"threshold_inlet_t": {
"description": ("inlet temperature threshold for "
"migration decision"),
"type": "number",
"default": 28.0
},
"threshold_power": {
"description": ("system power threshold for migration "
"decision"),
"type": "number",
"default": 350.0
},
"period": {
"description": "aggregate time period of ceilometer",
"type": "number",
"default": 300
},
"granularity": {
"description": "The time between two measures in an "
"aggregated timeseries of a metric.",
"type": "number",
"default": 300
},
},
}
@classmethod
def get_config_opts(cls):
return [
cfg.StrOpt(
"datasource",
help="Data source to use in order to query the needed metrics",
default="ceilometer",
choices=["ceilometer", "gnocchi"])
]
def get_available_compute_nodes(self):
default_node_scope = [element.ServiceState.ENABLED.value]
return {uuid: cn for uuid, cn in
self.compute_model.get_all_compute_nodes().items()
if cn.state == element.ServiceState.ONLINE.value and
cn.status in default_node_scope}
def calculate_used_resource(self, node):
"""Compute the used vcpus, memory and disk based on instance flavors"""
instances = self.compute_model.get_node_instances(node)
vcpus_used = 0
memory_mb_used = 0
disk_gb_used = 0
for instance in instances:
vcpus_used += instance.vcpus
memory_mb_used += instance.memory
disk_gb_used += instance.disk
return vcpus_used, memory_mb_used, disk_gb_used
def choose_instance_to_migrate(self, hosts):
"""Pick up an active instance instance to migrate from provided hosts
:param hosts: the array of dict which contains node object
"""
instances_tobe_migrate = []
for nodemap in hosts:
source_node = nodemap['node']
source_instances = self.compute_model.get_node_instances(
source_node)
if source_instances:
if self.config.datasource == "ceilometer":
inlet_t = self.ceilometer.statistic_aggregation(
resource_id=source_node.uuid,
meter_name=self.meter_name_inlet_t,
period=self._period,
aggregate='avg')
power = self.ceilometer.statistic_aggregation(
resource_id=source_node.uuid,
meter_name=self.meter_name_power,
period=self._period,
aggregate='avg')
elif self.config.datasource == "gnocchi":
stop_time = datetime.datetime.utcnow()
start_time = stop_time - datetime.timedelta(
seconds=int(self._period))
inlet_t = self.gnocchi.statistic_aggregation(
resource_id=source_node.uuid,
metric=self.meter_name_inlet_t,
granularity=self.granularity,
start_time=start_time,
stop_time=stop_time,
aggregation='mean')
power = self.gnocchi.statistic_aggregation(
resource_id=source_node.uuid,
metric=self.meter_name_power,
granularity=self.granularity,
start_time=start_time,
stop_time=stop_time,
aggregation='mean')
if (power < self.threshold_power and
inlet_t < self.threshold_inlet_t):
# hardware issue, migrate all instances from this node
for instance in source_instances:
instances_tobe_migrate.append(instance)
return source_node, instances_tobe_migrate
else:
# migrate the first active instance
for instance in source_instances:
if (instance.state !=
element.InstanceState.ACTIVE.value):
LOG.info(
"Instance not active, skipped: %s",
instance.uuid)
continue
instances_tobe_migrate.append(instance)
return source_node, instances_tobe_migrate
else:
LOG.info("Instance not found on node: %s",
source_node.uuid)
def filter_destination_hosts(self, hosts, instances_to_migrate):
"""Find instance and host with sufficient available resources"""
# large instances go first
instances_to_migrate = sorted(
instances_to_migrate, reverse=True,
key=lambda x: (x.vcpus))
# find hosts for instances
destination_hosts = []
for instance_to_migrate in instances_to_migrate:
required_cores = instance_to_migrate.vcpus
required_disk = instance_to_migrate.disk
required_mem = instance_to_migrate.memory
dest_migrate_info = {}
for nodemap in hosts:
host = nodemap['node']
if 'cores_used' not in nodemap:
# calculate the available resources
nodemap['cores_used'], nodemap['mem_used'],\
nodemap['disk_used'] = self.calculate_used_resource(
host)
cores_available = (host.vcpus -
nodemap['cores_used'])
disk_available = (host.disk -
nodemap['disk_used'])
mem_available = (
host.memory - nodemap['mem_used'])
if (cores_available >= required_cores and
disk_available >= required_disk and
mem_available >= required_mem):
dest_migrate_info['instance'] = instance_to_migrate
dest_migrate_info['node'] = host
nodemap['cores_used'] += required_cores
nodemap['mem_used'] += required_mem
nodemap['disk_used'] += required_disk
destination_hosts.append(dest_migrate_info)
break
# check if all instances have target hosts
if len(destination_hosts) != len(instances_to_migrate):
LOG.warning("Not all target hosts could be found; it might "
"be because there is not enough resource")
return None
return destination_hosts
def group_hosts_by_airflow(self):
"""Group hosts based on airflow meters"""
nodes = self.get_available_compute_nodes()
if not nodes:
raise wexc.ClusterEmpty()
overload_hosts = []
nonoverload_hosts = []
for node_id in nodes:
airflow = None
node = self.compute_model.get_node_by_uuid(
node_id)
resource_id = node.uuid
if self.config.datasource == "ceilometer":
airflow = self.ceilometer.statistic_aggregation(
resource_id=resource_id,
meter_name=self.meter_name_airflow,
period=self._period,
aggregate='avg')
elif self.config.datasource == "gnocchi":
stop_time = datetime.datetime.utcnow()
start_time = stop_time - datetime.timedelta(
seconds=int(self._period))
airflow = self.gnocchi.statistic_aggregation(
resource_id=resource_id,
metric=self.meter_name_airflow,
granularity=self.granularity,
start_time=start_time,
stop_time=stop_time,
aggregation='mean')
# some hosts may not have airflow meter, remove from target
if airflow is None:
LOG.warning("%s: no airflow data", resource_id)
continue
LOG.debug("%s: airflow %f" % (resource_id, airflow))
nodemap = {'node': node, 'airflow': airflow}
if airflow >= self.threshold_airflow:
# mark the node to release resources
overload_hosts.append(nodemap)
else:
nonoverload_hosts.append(nodemap)
return overload_hosts, nonoverload_hosts
def pre_execute(self):
LOG.debug("Initializing Uniform Airflow Strategy")
if not self.compute_model:
raise wexc.ClusterStateNotDefined()
if self.compute_model.stale:
raise wexc.ClusterStateStale()
LOG.debug(self.compute_model.to_string())
def do_execute(self):
self.threshold_airflow = self.input_parameters.threshold_airflow
self.threshold_inlet_t = self.input_parameters.threshold_inlet_t
self.threshold_power = self.input_parameters.threshold_power
self._period = self.input_parameters.period
source_nodes, target_nodes = self.group_hosts_by_airflow()
if not source_nodes:
LOG.debug("No hosts require optimization")
return self.solution
if not target_nodes:
LOG.warning("No hosts currently have airflow under %s, "
"therefore there are no possible target "
"hosts for any migration",
self.threshold_airflow)
return self.solution
# migrate the instance from server with largest airflow first
source_nodes = sorted(source_nodes,
reverse=True,
key=lambda x: (x["airflow"]))
instances_to_migrate = self.choose_instance_to_migrate(source_nodes)
if not instances_to_migrate:
return self.solution
source_node, instances_src = instances_to_migrate
# sort host with airflow
target_nodes = sorted(target_nodes, key=lambda x: (x["airflow"]))
# find the hosts that have enough resource
# for the instance to be migrated
destination_hosts = self.filter_destination_hosts(
target_nodes, instances_src)
if not destination_hosts:
LOG.warning("No target host could be found; it might "
"be because there is not enough resources")
return self.solution
# generate solution to migrate the instance to the dest server,
for info in destination_hosts:
instance = info['instance']
destination_node = info['node']
if self.compute_model.migrate_instance(
instance, source_node, destination_node):
parameters = {'migration_type': 'live',
'source_node': source_node.uuid,
'destination_node': destination_node.uuid}
self.solution.add_action(action_type=self.MIGRATION,
resource_id=instance.uuid,
input_parameters=parameters)
def post_execute(self):
self.solution.model = self.compute_model
# TODO(v-francoise): Add the indicators to the solution
LOG.debug(self.compute_model.to_string())