Merge "Aggregate by fqdn label instead instance in host cpu metrics"
This commit is contained in:
@@ -264,8 +264,8 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
This function builds and returns the string query that will be sent
|
This function builds and returns the string query that will be sent
|
||||||
to the Prometheus server /query endpoint. For host cpu usage we use:
|
to the Prometheus server /query endpoint. For host cpu usage we use:
|
||||||
|
|
||||||
100 - (avg by (instance)(rate(node_cpu_seconds_total{mode='idle',
|
100 - (avg by (fqdn)(rate(node_cpu_seconds_total{mode='idle',
|
||||||
instance='some_host'}[300s])) * 100)
|
fqdn='some_host'}[300s])) * 100)
|
||||||
|
|
||||||
so using prometheus rate function over the specified period, we average
|
so using prometheus rate function over the specified period, we average
|
||||||
per instance (all cpus) idle time and then 'everything else' is cpu
|
per instance (all cpus) idle time and then 'everything else' is cpu
|
||||||
@@ -307,7 +307,7 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
|
|
||||||
if meter == 'node_cpu_seconds_total':
|
if meter == 'node_cpu_seconds_total':
|
||||||
query_args = (
|
query_args = (
|
||||||
"100 - (%(agg)s by (instance)(rate(%(meter)s"
|
"100 - (%(agg)s by (%(label)s)(rate(%(meter)s"
|
||||||
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
|
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
|
||||||
"* 100)"
|
"* 100)"
|
||||||
% {'label': self.prometheus_fqdn_label,
|
% {'label': self.prometheus_fqdn_label,
|
||||||
@@ -464,8 +464,8 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
This calculates the host cpu usage and returns it as a percentage
|
This calculates the host cpu usage and returns it as a percentage
|
||||||
The calculation is made by using the cpu 'idle' time, per
|
The calculation is made by using the cpu 'idle' time, per
|
||||||
instance (so all CPUs are included). For example the query looks like
|
instance (so all CPUs are included). For example the query looks like
|
||||||
(100 - (avg by (instance)(rate(node_cpu_seconds_total
|
(100 - (avg by (fqdn)(rate(node_cpu_seconds_total
|
||||||
{mode='idle',instance='localhost:9100'}[300s])) * 100))
|
{mode='idle',fqdn='compute1.example.com'}[300s])) * 100))
|
||||||
"""
|
"""
|
||||||
aggregate = self._invert_max_min_aggregate(aggregate)
|
aggregate = self._invert_max_min_aggregate(aggregate)
|
||||||
cpu_usage = self.statistic_aggregation(
|
cpu_usage = self.statistic_aggregation(
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(expected_cpu_usage, result)
|
self.assertEqual(expected_cpu_usage, result)
|
||||||
mock_prometheus_query.assert_called_once_with(
|
mock_prometheus_query.assert_called_once_with(
|
||||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
|
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
|
||||||
" * 100)")
|
" * 100)")
|
||||||
|
|
||||||
@@ -575,7 +575,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
|
|
||||||
def test_build_prometheus_query_node_cpu_avg_agg(self):
|
def test_build_prometheus_query_node_cpu_avg_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',fqdn='a_host'}[111s])) * 100)")
|
"{mode='idle',fqdn='a_host'}[111s])) * 100)")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||||
@@ -583,7 +583,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
|
|
||||||
def test_build_prometheus_query_node_cpu_max_agg(self):
|
def test_build_prometheus_query_node_cpu_max_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"100 - (max by (instance)(rate(node_cpu_seconds_total"
|
"100 - (max by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',fqdn='b_host'}[444s])) * 100)")
|
"{mode='idle',fqdn='b_host'}[444s])) * 100)")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'max', 'node_cpu_seconds_total', 'b_host', '444')
|
'max', 'node_cpu_seconds_total', 'b_host', '444')
|
||||||
@@ -610,7 +610,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
|
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
|
||||||
self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
|
self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
"100 - (avg by (custom_fqdn_label)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
|
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||||
|
|||||||
Reference in New Issue
Block a user