Merge "Use KiB as unit for host_ram_usage when using prometheus datasource" into stable/2025.1
This commit is contained in:
14
releasenotes/notes/bug-2113776-4bd314fb46623fbc.yaml
Normal file
14
releasenotes/notes/bug-2113776-4bd314fb46623fbc.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
When running an audit with the `workload_stabilization` strategy with
|
||||
`instance_ram_usage` metric in a deployment with prometheus datasource,
|
||||
the host metric for the ram usage was wrongly reported with the incorrect
|
||||
unit which lead to incorrect standard deviation and action plans due to the
|
||||
application of the wrong scale factor in the algorithm.
|
||||
|
||||
The host ram usage metric is now properly reported in KB when using a
|
||||
prometheus datasource and the strategy `workload_stabilization` calculates
|
||||
the standard deviation properly.
|
||||
|
||||
For more details: https://launchpad.net/bugs/2113776
|
||||
@@ -178,7 +178,7 @@ class DataSourceBase(object):
|
||||
granularity=None):
|
||||
"""Get the ram usage for a host such as a compute_node
|
||||
|
||||
:return: ram usage as float in megabytes
|
||||
:return: ram usage as float in kibibytes
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -276,7 +276,7 @@ class PrometheusHelper(base.DataSourceBase):
|
||||
(node_memory_MemTotal_bytes{instance='the_host'} -
|
||||
avg_over_time(
|
||||
node_memory_MemAvailable_bytes{instance='the_host'}[300s]))
|
||||
/ 1024 / 1024
|
||||
/ 1024
|
||||
|
||||
So we take total and subtract available memory to determine
|
||||
how much is in use. We use the prometheus xxx_over_time functions
|
||||
@@ -315,10 +315,11 @@ class PrometheusHelper(base.DataSourceBase):
|
||||
'meter': meter, 'period': period}
|
||||
)
|
||||
elif meter == 'node_memory_MemAvailable_bytes':
|
||||
# Prometheus metric is in B and we need to return KB
|
||||
query_args = (
|
||||
"(node_memory_MemTotal_bytes{%(label)s='%(label_value)s'} "
|
||||
"- %(agg)s_over_time(%(meter)s{%(label)s='%(label_value)s'}"
|
||||
"[%(period)ss])) / 1024 / 1024"
|
||||
"[%(period)ss])) / 1024"
|
||||
% {'label': self.prometheus_fqdn_label,
|
||||
'label_value': instance_label, 'agg': aggregate,
|
||||
'meter': meter, 'period': period}
|
||||
|
||||
@@ -593,7 +593,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
expected_query = (
|
||||
"(node_memory_MemTotal_bytes{fqdn='c_host'} - avg_over_time"
|
||||
"(node_memory_MemAvailable_bytes{fqdn='c_host'}[555s])) "
|
||||
"/ 1024 / 1024")
|
||||
"/ 1024")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'avg', 'node_memory_MemAvailable_bytes', 'c_host', '555')
|
||||
self.assertEqual(result, expected_query)
|
||||
@@ -602,7 +602,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
expected_query = (
|
||||
"(node_memory_MemTotal_bytes{fqdn='d_host'} - min_over_time"
|
||||
"(node_memory_MemAvailable_bytes{fqdn='d_host'}[222s])) "
|
||||
"/ 1024 / 1024")
|
||||
"/ 1024")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'min', 'node_memory_MemAvailable_bytes', 'd_host', '222')
|
||||
self.assertEqual(result, expected_query)
|
||||
@@ -621,7 +621,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
||||
expected_query = (
|
||||
"(node_memory_MemTotal_bytes{custom_fqdn='d_host'} - min_over_time"
|
||||
"(node_memory_MemAvailable_bytes{custom_fqdn='d_host'}[222s])) "
|
||||
"/ 1024 / 1024")
|
||||
"/ 1024")
|
||||
result = self.helper._build_prometheus_query(
|
||||
'min', 'node_memory_MemAvailable_bytes', 'd_host', '222')
|
||||
self.assertEqual(result, expected_query)
|
||||
|
||||
@@ -212,11 +212,71 @@ class TestWorkloadStabilization(TestBaseStrategy):
|
||||
len(self.strategy.simulate_migrations(self.hosts_load_assert)))
|
||||
|
||||
def test_check_threshold(self):
|
||||
|
||||
# sd for 0.05, 0.05, 0.07, 0.07, 0.8
|
||||
test_cpu_sd = 0.296
|
||||
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.001,
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||
'instance_ram_usage': 0.2}
|
||||
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertTrue(self.strategy.check_threshold())
|
||||
self.assertEqual(
|
||||
round(self.strategy.sd_before_audit, 3),
|
||||
test_cpu_sd)
|
||||
|
||||
def test_check_threshold_cpu(self):
|
||||
|
||||
test_cpu_sd = 0.296
|
||||
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.metrics = ["instance_cpu_usage"]
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.25}
|
||||
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertTrue(self.strategy.check_threshold())
|
||||
self.assertEqual(
|
||||
round(self.strategy.sd_before_audit, 3),
|
||||
test_cpu_sd)
|
||||
|
||||
def test_check_threshold_ram(self):
|
||||
|
||||
# sd for 4,5,7,8, 29 MB used in 132MB total memory hosts
|
||||
test_ram_sd = 0.071
|
||||
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.metrics = ["instance_ram_usage"]
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||
'instance_ram_usage': 0.05}
|
||||
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertTrue(self.strategy.check_threshold())
|
||||
self.assertEqual(
|
||||
round(self.strategy.sd_before_audit, 3),
|
||||
test_ram_sd)
|
||||
|
||||
def test_check_threshold_fail(self):
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.3,
|
||||
'instance_ram_usage': 0.2}
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertFalse(self.strategy.check_threshold())
|
||||
|
||||
def test_check_threshold_cpu_fail(self):
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.metrics = ["instance_cpu_usage"]
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.4}
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertFalse(self.strategy.check_threshold())
|
||||
|
||||
def test_check_threshold_ram_fail(self):
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
self.strategy.metrics = ["instance_ram_usage"]
|
||||
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||
'instance_ram_usage': 0.1}
|
||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||
self.assertFalse(self.strategy.check_threshold())
|
||||
|
||||
def test_execute_one_migration(self):
|
||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||
|
||||
Reference in New Issue
Block a user