Files
watcher/watcher/decision_engine/datasources/gnocchi.py

284 lines
10 KiB
Python

# -*- encoding: utf-8 -*-
# Copyright (c) 2017 Servionica
#
# Authors: Alexander Chadin <a.chadin@servionica.ru>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import datetime
from datetime import timedelta
from gnocchiclient import exceptions as gnc_exc
from oslo_config import cfg
from oslo_log import log
from watcher.common import clients
from watcher.decision_engine.datasources import base
CONF = cfg.CONF
LOG = log.getLogger(__name__)
class GnocchiHelper(base.DataSourceBase):
NAME = 'gnocchi'
METRIC_MAP = dict(host_cpu_usage='compute.node.cpu.percent',
host_ram_usage='hardware.memory.used',
host_outlet_temp='hardware.ipmi.node.outlet_temperature',
host_inlet_temp='hardware.ipmi.node.temperature',
host_airflow='hardware.ipmi.node.airflow',
host_power='hardware.ipmi.node.power',
instance_cpu_usage='cpu',
instance_ram_usage='memory.resident',
instance_ram_allocated='memory',
instance_l3_cache_usage='cpu_l3_cache',
instance_root_disk_size='disk.root.size',
)
def __init__(self, osc=None):
""":param osc: an OpenStackClients instance"""
self.osc = osc if osc else clients.OpenStackClients()
self.gnocchi = self.osc.gnocchi()
def check_availability(self):
status = self.query_retry(self.gnocchi.status.get)
if status:
return 'available'
else:
return 'not available'
def list_metrics(self):
"""List the user's meters."""
response = self.query_retry(f=self.gnocchi.metric.list)
if not response:
return set()
else:
return set([metric['name'] for metric in response])
def statistic_aggregation(self, resource=None, resource_type=None,
meter_name=None, period=300, aggregate='mean',
granularity=300):
stop_time = datetime.utcnow()
start_time = stop_time - timedelta(seconds=(int(period)))
meter = self._get_meter(meter_name)
if aggregate == 'count':
aggregate = 'mean'
LOG.warning('aggregate type count not supported by gnocchi,'
' replaced with mean.')
resource_id = resource.uuid
if resource_type == 'compute_node':
resource_id = "%s_%s" % (resource.hostname, resource.hostname)
kwargs = dict(query={"=": {"original_resource_id": resource_id}},
limit=1)
resources = self.query_retry(
f=self.gnocchi.resource.search,
ignored_exc=gnc_exc.NotFound,
**kwargs)
if not resources:
LOG.warning("The {0} resource {1} could not be "
"found".format(self.NAME, resource_id))
return
resource_id = resources[0]['id']
if meter_name == "instance_cpu_usage":
if resource_type != "instance":
LOG.warning("Unsupported resource type for metric "
"'instance_cpu_usage': ", resource_type)
return
# The "cpu_util" gauge (percentage) metric has been removed.
# We're going to obtain the same result by using the rate of change
# aggregate operation.
if aggregate not in ("mean", "rate:mean"):
LOG.warning("Unsupported aggregate for instance_cpu_usage "
"metric: %s. "
"Supported aggregates: mean, rate:mean ",
aggregate)
return
# TODO(lpetrut): consider supporting other aggregates.
aggregate = "rate:mean"
raw_kwargs = dict(
metric=meter,
start=start_time,
stop=stop_time,
resource_id=resource_id,
granularity=granularity,
aggregation=aggregate,
)
kwargs = {k: v for k, v in raw_kwargs.items() if k and v}
statistics = self.query_retry(
f=self.gnocchi.metric.get_measures,
ignored_exc=gnc_exc.NotFound,
**kwargs)
return_value = None
if statistics:
# return value of latest measure
# measure has structure [time, granularity, value]
return_value = statistics[-1][2]
if meter_name == 'host_airflow':
# Airflow from hardware.ipmi.node.airflow is reported as
# 1/10 th of actual CFM
return_value *= 10
if meter_name == "instance_cpu_usage":
# "rate:mean" can return negative values for migrated vms.
return_value = max(0, return_value)
# We're converting the cumulative cpu time (ns) to cpu usage
# percentage.
vcpus = resource.vcpus
if not vcpus:
LOG.warning("instance vcpu count not set, assuming 1")
vcpus = 1
return_value *= 100 / (granularity * 10e+8) / vcpus
return return_value
def statistic_series(self, resource=None, resource_type=None,
meter_name=None, start_time=None, end_time=None,
granularity=300):
meter = self._get_meter(meter_name)
resource_id = resource.uuid
if resource_type == 'compute_node':
resource_id = "%s_%s" % (resource.hostname, resource.hostname)
kwargs = dict(query={"=": {"original_resource_id": resource_id}},
limit=1)
resources = self.query_retry(
f=self.gnocchi.resource.search,
ignored_exc=gnc_exc.NotFound,
**kwargs)
if not resources:
LOG.warning("The {0} resource {1} could not be "
"found".format(self.NAME, resource_id))
return
resource_id = resources[0]['id']
raw_kwargs = dict(
metric=meter,
start=start_time,
stop=end_time,
resource_id=resource_id,
granularity=granularity,
)
kwargs = {k: v for k, v in raw_kwargs.items() if k and v}
statistics = self.query_retry(
f=self.gnocchi.metric.get_measures,
ignored_exc=gnc_exc.NotFound,
**kwargs)
return_value = None
if statistics:
# measure has structure [time, granularity, value]
if meter_name == 'host_airflow':
# Airflow from hardware.ipmi.node.airflow is reported as
# 1/10 th of actual CFM
return_value = {s[0]: s[2]*10 for s in statistics}
else:
return_value = {s[0]: s[2] for s in statistics}
return return_value
def get_host_cpu_usage(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_cpu_usage', period,
aggregate, granularity)
def get_host_ram_usage(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_ram_usage', period,
aggregate, granularity)
def get_host_outlet_temp(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_outlet_temp', period,
aggregate, granularity)
def get_host_inlet_temp(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_inlet_temp', period,
aggregate, granularity)
def get_host_airflow(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_airflow', period,
aggregate, granularity)
def get_host_power(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'compute_node', 'host_power', period,
aggregate, granularity)
def get_instance_cpu_usage(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'instance', 'instance_cpu_usage', period,
aggregate, granularity)
def get_instance_ram_usage(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'instance', 'instance_ram_usage', period,
aggregate, granularity)
def get_instance_ram_allocated(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'instance', 'instance_ram_allocated', period,
aggregate, granularity)
def get_instance_l3_cache_usage(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'instance', 'instance_l3_cache_usage', period,
aggregate, granularity)
def get_instance_root_disk_size(self, resource, period, aggregate,
granularity=300):
return self.statistic_aggregation(
resource, 'instance', 'instance_root_disk_size', period,
aggregate, granularity)