From 701b258dc76cd989e902efc2b507642359d1bfd7 Mon Sep 17 00:00:00 2001 From: Alexander Chadin Date: Tue, 23 Jan 2018 11:19:57 +0300 Subject: [PATCH] Fix workload_stabilization unavailable nodes and instances This patch set excludes nodes and instances from auditing if appropriate metrics aren't available. Change-Id: I87c6c249e3962f45d082f92d7e6e0be04e101799 Closes-Bug: #1736982 --- .../strategies/workload_stabilization.py | 19 +++++-- .../model/ceilometer_metrics.py | 9 +++- .../scenario_1_with_1_node_unavailable.xml | 50 +++++++++++++++++++ .../model/faker_cluster_state.py | 3 ++ .../decision_engine/model/gnocchi_metrics.py | 9 +++- .../strategies/test_workload_stabilization.py | 12 +++++ 6 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml diff --git a/watcher/decision_engine/strategy/strategies/workload_stabilization.py b/watcher/decision_engine/strategy/strategies/workload_stabilization.py index bef5ba29d..0ae918387 100644 --- a/watcher/decision_engine/strategy/strategies/workload_stabilization.py +++ b/watcher/decision_engine/strategy/strategies/workload_stabilization.py @@ -202,7 +202,7 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): "No values returned by %(resource_id)s " "for %(metric_name)s" % dict( resource_id=instance.uuid, metric_name=meter)) - avg_meter = 0 + return if meter == 'cpu_util': avg_meter /= float(100) instance_load[meter] = avg_meter @@ -242,12 +242,10 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): self.periods['node'], 'mean', granularity=self.granularity) if avg_meter is None: - if meter_name == 'hardware.memory.used': - avg_meter = node.memory - if meter_name == 'compute.node.cpu.percent': - avg_meter = 1 LOG.warning('No values returned by node %s for %s', node_id, meter_name) + del hosts_load[node_id] + break else: if meter_name == 'hardware.memory.used': avg_meter /= oslo_utils.units.Ki @@ -296,6 +294,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): migration_case = [] new_hosts = copy.deepcopy(hosts) instance_load = self.get_instance_load(instance) + if not instance_load: + return s_host_vcpus = new_hosts[src_node.uuid]['vcpus'] d_host_vcpus = new_hosts[dst_node.uuid]['vcpus'] for metric in self.metrics: @@ -353,6 +353,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): dst_node = self.compute_model.get_node_by_uuid(dst_host) sd_case = self.calculate_migration_case( hosts, instance, src_node, dst_node) + if sd_case is None: + break weighted_sd = self.calculate_weighted_sd(sd_case[:-1]) @@ -361,6 +363,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): 'host': dst_node.uuid, 'value': weighted_sd, 's_host': src_node.uuid, 'instance': instance.uuid} instance_host_map.append(min_sd_case) + if sd_case is None: + continue return sorted(instance_host_map, key=lambda x: x['value']) def check_threshold(self): @@ -369,7 +373,12 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy): normalized_load = self.normalize_hosts_load(hosts_load) for metric in self.metrics: metric_sd = self.get_sd(normalized_load, metric) + LOG.info("Standard deviation for %s is %s." + % (metric, metric_sd)) if metric_sd > float(self.thresholds[metric]): + LOG.info("Standard deviation of %s exceeds" + " appropriate threshold %s." + % (metric, metric_sd)) return self.simulate_migrations(hosts_load) def add_migration(self, diff --git a/watcher/tests/decision_engine/model/ceilometer_metrics.py b/watcher/tests/decision_engine/model/ceilometer_metrics.py index a82b8eb36..ba754d533 100644 --- a/watcher/tests/decision_engine/model/ceilometer_metrics.py +++ b/watcher/tests/decision_engine/model/ceilometer_metrics.py @@ -183,6 +183,8 @@ class FakeCeilometerMetrics(object): # node 3 mock['Node_6_hostname_6'] = 8 + # This node doesn't send metrics + mock['LOST_NODE_hostname_7'] = None mock['Node_19_hostname_19'] = 10 # node 4 mock['INSTANCE_7_hostname_7'] = 4 @@ -197,7 +199,10 @@ class FakeCeilometerMetrics(object): # mock[uuid] = random.randint(1, 4) mock[uuid] = 8 - return float(mock[str(uuid)]) + if mock[str(uuid)] is not None: + return float(mock[str(uuid)]) + else: + return mock[str(uuid)] @staticmethod def get_average_usage_instance_cpu_wb(uuid): @@ -263,6 +268,8 @@ class FakeCeilometerMetrics(object): # node 4 mock['INSTANCE_7'] = 4 + + mock['LOST_INSTANCE'] = None if uuid not in mock.keys(): # mock[uuid] = random.randint(1, 4) mock[uuid] = 8 diff --git a/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml b/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml new file mode 100644 index 000000000..ad95c2fda --- /dev/null +++ b/watcher/tests/decision_engine/model/data/scenario_1_with_1_node_unavailable.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/watcher/tests/decision_engine/model/faker_cluster_state.py b/watcher/tests/decision_engine/model/faker_cluster_state.py index 6ae430af9..043d4e11b 100644 --- a/watcher/tests/decision_engine/model/faker_cluster_state.py +++ b/watcher/tests/decision_engine/model/faker_cluster_state.py @@ -118,6 +118,9 @@ class FakerModelCollector(base.BaseClusterDataModelCollector): def generate_scenario_1(self): return self.load_model('scenario_1.xml') + def generate_scenario_1_with_1_node_unavailable(self): + return self.load_model('scenario_1_with_1_node_unavailable.xml') + def generate_scenario_3_with_2_nodes(self): return self.load_model('scenario_3_with_2_nodes.xml') diff --git a/watcher/tests/decision_engine/model/gnocchi_metrics.py b/watcher/tests/decision_engine/model/gnocchi_metrics.py index 7d1ac78f9..be91bd216 100644 --- a/watcher/tests/decision_engine/model/gnocchi_metrics.py +++ b/watcher/tests/decision_engine/model/gnocchi_metrics.py @@ -175,6 +175,8 @@ class FakeGnocchiMetrics(object): # node 3 mock['Node_6_hostname_6'] = 8 + # This node doesn't send metrics + mock['LOST_NODE_hostname_7'] = None mock['Node_19_hostname_19'] = 10 # node 4 mock['INSTANCE_7_hostname_7'] = 4 @@ -188,7 +190,10 @@ class FakeGnocchiMetrics(object): if uuid not in mock.keys(): mock[uuid] = 8 - return float(mock[str(uuid)]) + if mock[str(uuid)] is not None: + return float(mock[str(uuid)]) + else: + return mock[str(uuid)] @staticmethod def get_average_usage_instance_cpu(*args, **kwargs): @@ -215,6 +220,8 @@ class FakeGnocchiMetrics(object): # node 4 mock['INSTANCE_7'] = 4 + + mock['LOST_INSTANCE'] = None if uuid not in mock.keys(): mock[uuid] = 8 diff --git a/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py b/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py index ac14b7b5a..521217935 100644 --- a/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py +++ b/watcher/tests/decision_engine/strategy/strategies/test_workload_stabilization.py @@ -120,6 +120,12 @@ class TestWorkloadStabilization(base.TestCase): self.assertEqual( instance_0_dict, self.strategy.get_instance_load(instance0)) + def test_get_instance_load_with_no_metrics(self): + model = self.fake_cluster.generate_scenario_1_with_1_node_unavailable() + self.m_model.return_value = model + lost_instance = model.get_instance_by_uuid("LOST_INSTANCE") + self.assertIsNone(self.strategy.get_instance_load(lost_instance)) + def test_normalize_hosts_load(self): self.m_model.return_value = self.fake_cluster.generate_scenario_1() fake_hosts = {'Node_0': {'cpu_util': 0.07, 'memory.resident': 7}, @@ -144,6 +150,12 @@ class TestWorkloadStabilization(base.TestCase): self.assertEqual(self.strategy.get_hosts_load(), self.hosts_load_assert) + def test_get_hosts_load_with_node_missing(self): + self.m_model.return_value = \ + self.fake_cluster.generate_scenario_1_with_1_node_unavailable() + self.assertEqual(self.hosts_load_assert, + self.strategy.get_hosts_load()) + def test_get_sd(self): test_cpu_sd = 0.296 test_ram_sd = 9.3