Handle deprecated "cpu_util" metric

The "cpu_util" metric has been deprecated a few years ago.
We'll obtain the same result by converting the cumulative cpu
time to a percentage, leveraging the rate of change aggregation.

Change-Id: I18fe0de6f74c785e674faceea0c48f44055818fe
This commit is contained in:
Lucian Petrut
2023-10-10 14:29:06 +03:00
parent 922478fbda
commit 00fea975e2
9 changed files with 74 additions and 31 deletions

View File

@@ -38,7 +38,7 @@ class GnocchiHelper(base.DataSourceBase):
host_inlet_temp='hardware.ipmi.node.temperature',
host_airflow='hardware.ipmi.node.airflow',
host_power='hardware.ipmi.node.power',
instance_cpu_usage='cpu_util',
instance_cpu_usage='cpu',
instance_ram_usage='memory.resident',
instance_ram_allocated='memory',
instance_l3_cache_usage='cpu_l3_cache',
@@ -93,6 +93,25 @@ class GnocchiHelper(base.DataSourceBase):
resource_id = resources[0]['id']
if meter_name == "instance_cpu_usage":
if resource_type != "instance":
LOG.warning("Unsupported resource type for metric "
"'instance_cpu_usage': ", resource_type)
return
# The "cpu_util" gauge (percentage) metric has been removed.
# We're going to obtain the same result by using the rate of change
# aggregate operation.
if aggregate not in ("mean", "rate:mean"):
LOG.warning("Unsupported aggregate for instance_cpu_usage "
"metric: %s. "
"Supported aggregates: mean, rate:mean ",
aggregate)
return
# TODO(lpetrut): consider supporting other aggregates.
aggregate = "rate:mean"
raw_kwargs = dict(
metric=meter,
start=start_time,
@@ -117,6 +136,17 @@ class GnocchiHelper(base.DataSourceBase):
# Airflow from hardware.ipmi.node.airflow is reported as
# 1/10 th of actual CFM
return_value *= 10
if meter_name == "instance_cpu_usage":
# "rate:mean" can return negative values for migrated vms.
return_value = max(0, return_value)
# We're converting the cumulative cpu time (ns) to cpu usage
# percentage.
vcpus = resource.vcpus
if not vcpus:
LOG.warning("instance vcpu count not set, assuming 1")
vcpus = 1
return_value *= 100 / (granularity * 10e+8) / vcpus
return return_value

View File

@@ -295,7 +295,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
self.threshold)
return self.solution
# choose the server with largest cpu_util
# choose the server with largest cpu usage
source_nodes = sorted(source_nodes,
reverse=True,
key=lambda x: (x[self._meter]))

View File

@@ -40,17 +40,25 @@ class TestGnocchiHelper(base.BaseTestCase):
self.addCleanup(stat_agg_patcher.stop)
def test_gnocchi_statistic_aggregation(self, mock_gnocchi):
vcpus = 2
mock_instance = mock.Mock(
id='16a86790-327a-45f9-bc82-45839f062fdc',
vcpus=vcpus)
gnocchi = mock.MagicMock()
# cpu time rate of change (ns)
mock_rate_measure = 360 * 10e+8 * vcpus * 5.5 / 100
expected_result = 5.5
expected_measures = [["2017-02-02T09:00:00.000000", 360, 5.5]]
expected_measures = [
["2017-02-02T09:00:00.000000", 360, mock_rate_measure]]
gnocchi.metric.get_measures.return_value = expected_measures
mock_gnocchi.return_value = gnocchi
helper = gnocchi_helper.GnocchiHelper()
result = helper.statistic_aggregation(
resource=mock.Mock(id='16a86790-327a-45f9-bc82-45839f062fdc'),
resource=mock_instance,
resource_type='instance',
meter_name='instance_cpu_usage',
period=300,
@@ -59,6 +67,14 @@ class TestGnocchiHelper(base.BaseTestCase):
)
self.assertEqual(expected_result, result)
gnocchi.metric.get_measures.assert_called_once_with(
metric="cpu",
start=mock.ANY,
stop=mock.ANY,
resource_id=mock_instance.uuid,
granularity=360,
aggregation="rate:mean")
def test_gnocchi_statistic_series(self, mock_gnocchi):
gnocchi = mock.MagicMock()
expected_result = {