From 8d61c1a2b4ab7158c00559ac0b5254cd2bbf5859 Mon Sep 17 00:00:00 2001 From: Alexander Chadin Date: Tue, 23 Jan 2018 11:19:57 +0300 Subject: [PATCH] Fix workload_stabilization unavailable nodes and instances This patch set excludes nodes and instances from auditing if appropriate metrics aren't available. Change-Id: I87c6c249e3962f45d082f92d7e6e0be04e101799 Closes-Bug: #1736982 (cherry picked from commit 701b258dc76cd989e902efc2b507642359d1bfd7) --- .../strategies/workload_stabilization.py | 19 +++++-- .../model/ceilometer_metrics.py | 9 +++- .../scenario_1_with_1_node_unavailable.xml | 50 +++++++++++++++++++ .../model/faker_cluster_state.py | 3 ++ .../decision_engine/model/gnocchi_metrics.py | 9 +++- .../strategies/test_workload_stabilization.py | 12 +++++ 6 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml diff --git a/watcher/decision_engine/strategy/strategies/workload_stabilization.py b/watcher/decision_engine/strategy/strategies/workload_stabilization.py index 7e3e96f40..64735b0da 100644 --- a/watcher/decision_engine/strategy/strategies/workload_stabilization.py +++ b/watcher/decision_engine/strategy/strategies/workload_stabilization.py @@ -252,7 +252,7 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): "No values returned by %(resource_id)s " "for %(metric_name)s" % dict( resource_id=instance.uuid, metric_name=meter)) - avg_meter = 0 + return if meter == 'cpu_util': avg_meter /= float(100) instance_load[meter] = avg_meter @@ -308,12 +308,10 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): ) if avg_meter is None: - if meter_name == 'hardware.memory.used': - avg_meter = node.memory - if meter_name == 'compute.node.cpu.percent': - avg_meter = 1 LOG.warning('No values returned by node %s for %s', node_id, meter_name) + del hosts_load[node_id] + break else: if meter_name == 'hardware.memory.used': avg_meter /= oslo_utils.units.Ki @@ -362,6 +360,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): migration_case = [] new_hosts = copy.deepcopy(hosts) instance_load = self.get_instance_load(instance) + if not instance_load: + return s_host_vcpus = new_hosts[src_node.uuid]['vcpus'] d_host_vcpus = new_hosts[dst_node.uuid]['vcpus'] for metric in self.metrics: @@ -408,6 +408,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): dst_node = self.compute_model.get_node_by_uuid(dst_host) sd_case = self.calculate_migration_case( hosts, instance, src_node, dst_node) + if sd_case is None: + break weighted_sd = self.calculate_weighted_sd(sd_case[:-1]) @@ -416,6 +418,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): 'host': dst_node.uuid, 'value': weighted_sd, 's_host': src_node.uuid, 'instance': instance.uuid} instance_host_map.append(min_sd_case) + if sd_case is None: + continue return sorted(instance_host_map, key=lambda x: x['value']) def check_threshold(self): @@ -424,7 +428,12 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): normalized_load = self.normalize_hosts_load(hosts_load) for metric in self.metrics: metric_sd = self.get_sd(normalized_load, metric) + LOG.info("Standard deviation for %s is %s." + % (metric, metric_sd)) if metric_sd > float(self.thresholds[metric]): + LOG.info("Standard deviation of %s exceeds" + " appropriate threshold %s." + % (metric, metric_sd)) return self.simulate_migrations(hosts_load) def add_migration(self, diff --git a/watcher/tests/decision_engine/model/ceilometer_metrics.py b/watcher/tests/decision_engine/model/ceilometer_metrics.py index 1103fb41f..6e76bded4 100644 --- a/watcher/tests/decision_engine/model/ceilometer_metrics.py +++ b/watcher/tests/decision_engine/model/ceilometer_metrics.py @@ -176,6 +176,8 @@ class FakeCeilometerMetrics(object): # node 3 mock['Node_6_hostname_6'] = 8 + # This node doesn't send metrics + mock['LOST_NODE_hostname_7'] = None mock['Node_19_hostname_19'] = 10 # node 4 mock['INSTANCE_7_hostname_7'] = 4 @@ -190,7 +192,10 @@ class FakeCeilometerMetrics(object): # mock[uuid] = random.randint(1, 4) mock[uuid] = 8 - return float(mock[str(uuid)]) + if mock[str(uuid)] is not None: + return float(mock[str(uuid)]) + else: + return mock[str(uuid)] @staticmethod def get_average_usage_instance_cpu_wb(uuid): @@ -255,6 +260,8 @@ class FakeCeilometerMetrics(object): # node 4 mock['INSTANCE_7'] = 4 + + mock['LOST_INSTANCE'] = None if uuid not in mock.keys(): # mock[uuid] = random.randint(1, 4) mock[uuid] = 8 diff --git a/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml b/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml new file mode 100644 index 000000000..ad95c2fda --- /dev/null +++ b/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/watcher/tests/decision_engine/model/faker_cluster_state.py b/watcher/tests/decision_engine/model/faker_cluster_state.py index 18935447f..3ef9a67bb 100644 --- a/watcher/tests/decision_engine/model/faker_cluster_state.py +++ b/watcher/tests/decision_engine/model/faker_cluster_state.py @@ -114,6 +114,9 @@ class FakerModelCollector(base.BaseClusterDataModelCollector): def generate_scenario_1(self): return self.load_model('scenario_1.xml') + def generate_scenario_1_with_1_node_unavailable(self): + return self.load_model('scenario_1_with_1_node_unavailable.xml') + def generate_scenario_3_with_2_nodes(self): return self.load_model('scenario_3_with_2_nodes.xml') diff --git a/watcher/tests/decision_engine/model/gnocchi_metrics.py b/watcher/tests/decision_engine/model/gnocchi_metrics.py index 8c0a75e4b..1362e7d23 100644 --- a/watcher/tests/decision_engine/model/gnocchi_metrics.py +++ b/watcher/tests/decision_engine/model/gnocchi_metrics.py @@ -132,6 +132,8 @@ class FakeGnocchiMetrics(object): # node 3 mock['Node_6_hostname_6'] = 8 + # This node doesn't send metrics + mock['LOST_NODE_hostname_7'] = None mock['Node_19_hostname_19'] = 10 # node 4 mock['INSTANCE_7_hostname_7'] = 4 @@ -145,7 +147,10 @@ class FakeGnocchiMetrics(object): if uuid not in mock.keys(): mock[uuid] = 8 - return float(mock[str(uuid)]) + if mock[str(uuid)] is not None: + return float(mock[str(uuid)]) + else: + return mock[str(uuid)] @staticmethod def get_average_usage_instance_cpu(uuid): @@ -172,6 +177,8 @@ class FakeGnocchiMetrics(object): # node 4 mock['INSTANCE_7'] = 4 + + mock['LOST_INSTANCE'] = None if uuid not in mock.keys(): mock[uuid] = 8 diff --git a/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py b/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py index 8c9e656ed..9802f41d8 100644 --- a/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py +++ b/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py @@ -172,6 +172,12 @@ class TestWorkloadStabilization(base.TestCase): granularity=300, start_time=start_time, stop_time=stop_time, aggregation='mean') + def test_get_instance_load_with_no_metrics(self): + model = self.fake_cluster.generate_scenario_1_with_1_node_unavailable() + self.m_model.return_value = model + lost_instance = model.get_instance_by_uuid("LOST_INSTANCE") + self.assertIsNone(self.strategy.get_instance_load(lost_instance)) + def test_normalize_hosts_load(self): self.m_model.return_value = self.fake_cluster.generate_scenario_1() fake_hosts = {'Node_0': {'cpu_util': 0.07, 'memory.resident': 7}, @@ -196,6 +202,12 @@ class TestWorkloadStabilization(base.TestCase): self.assertEqual(self.strategy.get_hosts_load(), self.hosts_load_assert) + def test_get_hosts_load_with_node_missing(self): + self.m_model.return_value = \ + self.fake_cluster.generate_scenario_1_with_1_node_unavailable() + self.assertEqual(self.hosts_load_assert, + self.strategy.get_hosts_load()) + def test_get_sd(self): test_cpu_sd = 0.296 test_ram_sd = 9.3