Aggregate by label when querying instance cpu usage in prometheus

Currently, when the prometheus datasource query ceilometer_cpu metric
for instance cpu usage, it aggregates by instance and filter by the
label containing the instance uuid. While this works fine in real
scenarios, where a single metric is provided in a single instance, in
some cases as the CI jobs where metrics are directly injected, leads to
incorrect metric calculation.

We applied a similar fix for the host metrics in [1] but we did not
implement it for instance cpu.

I am also converting the query formatting to the dict format to improve
understability.

[1] https://review.opendev.org/c/openstack/watcher/+/946049

Closes-Bug: #2113936
Change-Id: I3038dec20612162c411fc77446e86a47e0354423
This commit is contained in:
Alfredo Moralejo
2025-06-11 14:40:23 +02:00
parent 15981117ee
commit 3860de0b1e
2 changed files with 10 additions and 8 deletions

View File

@@ -342,10 +342,12 @@ class PrometheusHelper(base.DataSourceBase):
)
vcpus = 1
query_args = (
"clamp_max((%s by (instance)(rate(%s{%s='%s'}[%ss]))/10e+8) "
"*(100/%s), 100)" %
(aggregate, meter, uuid_label_key, instance_label, period,
vcpus)
"clamp_max((%(agg)s by (%(label)s)"
"(rate(%(meter)s{%(label)s='%(label_value)s'}[%(period)ss]))"
"/10e+8) *(100/%(vcpus)s), 100)"
% {'label': uuid_label_key, 'label_value': instance_label,
'agg': aggregate, 'meter': meter, 'period': period,
'vcpus': vcpus}
)
else:
raise exception.InvalidParameter(

View File

@@ -242,7 +242,7 @@ class TestPrometheusHelper(base.BaseTestCase):
self.assertEqual(expected_cpu_usage, result_cpu)
self.assertIsInstance(result_cpu, float)
mock_prometheus_query.assert_called_once_with(
"clamp_max((avg by (instance)(rate("
"clamp_max((avg by (resource)(rate("
"ceilometer_cpu{resource='uuid-0'}[300s]))"
"/10e+8) *(100/2), 100)"
)
@@ -644,7 +644,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_instance_cpu_avg_agg(self):
expected_query = (
"clamp_max((avg by (instance)(rate("
"clamp_max((avg by (resource)(rate("
"ceilometer_cpu{resource='uuid-0'}[222s]))"
"/10e+8) *(100/2), 100)"
)
@@ -655,7 +655,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_instance_cpu_max_agg(self):
expected_query = (
"clamp_max((max by (instance)(rate("
"clamp_max((max by (resource)(rate("
"ceilometer_cpu{resource='uuid-0'}[555s]))"
"/10e+8) *(100/4), 100)"
)
@@ -699,7 +699,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_prometheus_query_custom_uuid_label(self, mock_prometheus_get):
cfg.CONF.prometheus_client.instance_uuid_label = 'custom_uuid_label'
expected_query = (
"clamp_max((max by (instance)"
"clamp_max((max by (custom_uuid_label)"
"(rate(ceilometer_cpu{custom_uuid_label='uuid-0'}[555s]))"
"/10e+8) *(100/4), 100)"
)