Merge "Aggregate by fqdn label instead instance in host cpu metrics"

This commit is contained in:
Zuul
2025-04-08 17:37:10 +00:00
committed by Gerrit Code Review
2 changed files with 9 additions and 9 deletions

View File

@@ -264,8 +264,8 @@ class PrometheusHelper(base.DataSourceBase):
This function builds and returns the string query that will be sent This function builds and returns the string query that will be sent
to the Prometheus server /query endpoint. For host cpu usage we use: to the Prometheus server /query endpoint. For host cpu usage we use:
100 - (avg by (instance)(rate(node_cpu_seconds_total{mode='idle', 100 - (avg by (fqdn)(rate(node_cpu_seconds_total{mode='idle',
instance='some_host'}[300s])) * 100) fqdn='some_host'}[300s])) * 100)
so using prometheus rate function over the specified period, we average so using prometheus rate function over the specified period, we average
per instance (all cpus) idle time and then 'everything else' is cpu per instance (all cpus) idle time and then 'everything else' is cpu
@@ -307,7 +307,7 @@ class PrometheusHelper(base.DataSourceBase):
if meter == 'node_cpu_seconds_total': if meter == 'node_cpu_seconds_total':
query_args = ( query_args = (
"100 - (%(agg)s by (instance)(rate(%(meter)s" "100 - (%(agg)s by (%(label)s)(rate(%(meter)s"
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) " "{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
"* 100)" "* 100)"
% {'label': self.prometheus_fqdn_label, % {'label': self.prometheus_fqdn_label,
@@ -464,8 +464,8 @@ class PrometheusHelper(base.DataSourceBase):
This calculates the host cpu usage and returns it as a percentage This calculates the host cpu usage and returns it as a percentage
The calculation is made by using the cpu 'idle' time, per The calculation is made by using the cpu 'idle' time, per
instance (so all CPUs are included). For example the query looks like instance (so all CPUs are included). For example the query looks like
(100 - (avg by (instance)(rate(node_cpu_seconds_total (100 - (avg by (fqdn)(rate(node_cpu_seconds_total
{mode='idle',instance='localhost:9100'}[300s])) * 100)) {mode='idle',fqdn='compute1.example.com'}[300s])) * 100))
""" """
aggregate = self._invert_max_min_aggregate(aggregate) aggregate = self._invert_max_min_aggregate(aggregate)
cpu_usage = self.statistic_aggregation( cpu_usage = self.statistic_aggregation(

View File

@@ -146,7 +146,7 @@ class TestPrometheusHelper(base.BaseTestCase):
) )
self.assertEqual(expected_cpu_usage, result) self.assertEqual(expected_cpu_usage, result)
mock_prometheus_query.assert_called_once_with( mock_prometheus_query.assert_called_once_with(
"100 - (avg by (instance)(rate(node_cpu_seconds_total" "100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))" "{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
" * 100)") " * 100)")
@@ -575,7 +575,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_avg_agg(self): def test_build_prometheus_query_node_cpu_avg_agg(self):
expected_query = ( expected_query = (
"100 - (avg by (instance)(rate(node_cpu_seconds_total" "100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='a_host'}[111s])) * 100)") "{mode='idle',fqdn='a_host'}[111s])) * 100)")
result = self.helper._build_prometheus_query( result = self.helper._build_prometheus_query(
'avg', 'node_cpu_seconds_total', 'a_host', '111') 'avg', 'node_cpu_seconds_total', 'a_host', '111')
@@ -583,7 +583,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_max_agg(self): def test_build_prometheus_query_node_cpu_max_agg(self):
expected_query = ( expected_query = (
"100 - (max by (instance)(rate(node_cpu_seconds_total" "100 - (max by (fqdn)(rate(node_cpu_seconds_total"
"{mode='idle',fqdn='b_host'}[444s])) * 100)") "{mode='idle',fqdn='b_host'}[444s])) * 100)")
result = self.helper._build_prometheus_query( result = self.helper._build_prometheus_query(
'max', 'node_cpu_seconds_total', 'b_host', '444') 'max', 'node_cpu_seconds_total', 'b_host', '444')
@@ -610,7 +610,7 @@ class TestPrometheusHelper(base.BaseTestCase):
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self): def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
self.helper.prometheus_fqdn_label = 'custom_fqdn_label' self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
expected_query = ( expected_query = (
"100 - (avg by (instance)(rate(node_cpu_seconds_total" "100 - (avg by (custom_fqdn_label)(rate(node_cpu_seconds_total"
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)") "{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
result = self.helper._build_prometheus_query( result = self.helper._build_prometheus_query(
'avg', 'node_cpu_seconds_total', 'a_host', '111') 'avg', 'node_cpu_seconds_total', 'a_host', '111')