From 55537d254e2bf9d73477527bb20db8a974e6f62e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Fran=C3=A7oise?= Date: Fri, 16 Sep 2016 14:55:51 +0200 Subject: [PATCH] Fixed issue on compute nodes iteration In this changeset, I fixed the issue with the basic server consolidation strategy to now loop over all compute nodes as expected instead of stopping after the first one. Change-Id: If594f0df41e39dfb0ef8f0fce41822018490c4ec Closes-bug: #1548874 --- .../strategies/basic_consolidation.py | 119 ++++++++---------- .../model/data/scenario_8_with_4_nodes.xml | 16 +++ ..._9_with_3_active_plus_1_disabled_nodes.xml | 16 +++ .../model/faker_cluster_and_metrics.py | 7 ++ .../model/faker_cluster_state.py | 7 ++ .../strategies/test_basic_consolidation.py | 27 +++- 6 files changed, 123 insertions(+), 69 deletions(-) create mode 100644 watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml create mode 100644 watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml diff --git a/watcher/decision_engine/strategy/strategies/basic_consolidation.py b/watcher/decision_engine/strategy/strategies/basic_consolidation.py index d4a06fa17..fab766c4e 100644 --- a/watcher/decision_engine/strategy/strategies/basic_consolidation.py +++ b/watcher/decision_engine/strategy/strategies/basic_consolidation.py @@ -81,8 +81,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): self.number_of_released_nodes = 0 # set default value for the number of migrations self.number_of_migrations = 0 - # set default value for number of allowed migration attempts - self.migration_attempts = 0 # set default value for the efficacy self.efficacy = 100 @@ -94,21 +92,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): self.threshold_disk = 1 self.threshold_cores = 1 - # TODO(jed): target efficacy - self.target_efficacy = 60 - - # TODO(jed): weight - self.weight_cpu = 1 - self.weight_mem = 1 - self.weight_disk = 1 - - # TODO(jed): bound migration attempts (80 %) - self.bound_migration = 0.80 - @classmethod def get_name(cls): return "basic" + @property + def migration_attempts(self): + return self.input_parameters.get('migration_attempts', 0) + @classmethod def get_display_name(cls): return _("Basic offline consolidation") @@ -117,6 +108,22 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): def get_translatable_display_name(cls): return "Basic offline consolidation" + @classmethod + def get_schema(cls): + # Mandatory default setting for each element + return { + "properties": { + "migration_attempts": { + "description": "Maximum number of combinations to be " + "tried by the strategy while searching " + "for potential candidates. To remove the " + "limit, set it to 0 (by default)", + "type": "number", + "default": 0 + }, + }, + } + @property def ceilometer(self): if self._ceilometer is None: @@ -127,13 +134,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): def ceilometer(self, ceilometer): self._ceilometer = ceilometer - def compute_attempts(self, size_cluster): - """Upper bound of the number of migration - - :param size_cluster: The size of the cluster - """ - self.migration_attempts = size_cluster * self.bound_migration - def check_migration(self, source_node, destination_node, instance_to_migrate): """Check if the migration is possible @@ -199,16 +199,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): disk_capacity >= total_disk * self.threshold_disk and memory_capacity >= total_mem * self.threshold_mem) - def get_allowed_migration_attempts(self): - """Allowed migration - - Maximum allowed number of migrations this allows us to fix - the upper bound of the number of migrations. - - :return: - """ - return self.migration_attempts - def calculate_weight(self, compute_resource, total_cores_used, total_disk_used, total_memory_used): """Calculate weight of every resource @@ -331,8 +321,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): resource_id=resource_id, input_parameters=parameters) - def score_of_nodes(self, score): + def compute_score_of_nodes(self): """Calculate score of nodes based on load by VMs""" + score = [] for node in self.compute_model.get_all_compute_nodes().values(): count = self.compute_model.mapping.get_node_instances(node) if len(count) > 0: @@ -344,9 +335,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): score.append((node.uuid, result)) return score - def node_and_instance_score(self, sorted_score, score): + def node_and_instance_score(self, sorted_scores): """Get List of VMs from node""" - node_to_release = sorted_score[len(score) - 1][0] + node_to_release = sorted_scores[len(sorted_scores) - 1][0] instances_to_migrate = self.compute_model.mapping.get_node_instances( self.compute_model.get_node_by_uuid(node_to_release)) @@ -409,20 +400,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): if not self.compute_model: raise exception.ClusterStateNotDefined() + if len(self.compute_model.get_all_compute_nodes()) == 0: + raise exception.ClusterEmpty() + LOG.debug(self.compute_model.to_string()) def do_execute(self): - # todo(jed) clone model - self.efficacy = 100 unsuccessful_migration = 0 - first_migration = True - size_cluster = len(self.compute_model.get_all_compute_nodes()) - if size_cluster == 0: - raise exception.ClusterEmpty() - - self.compute_attempts(size_cluster) - for node_uuid, node in self.compute_model.get_all_compute_nodes( ).items(): node_instances = self.compute_model.mapping.get_node_instances( @@ -432,44 +417,44 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy): self.add_change_service_state( node_uuid, element.ServiceState.DISABLED.value) - while self.get_allowed_migration_attempts() >= unsuccessful_migration: - if not first_migration: - self.efficacy = self.calculate_migration_efficacy() - if self.efficacy < float(self.target_efficacy): - break - first_migration = False - score = [] - - score = self.score_of_nodes(score) - - # Sort compute nodes by Score decreasing - sorted_score = sorted(score, reverse=True, key=lambda x: (x[1])) - LOG.debug("Compute node(s) BFD %s", sorted_score) - - # Get Node to be released - if len(score) == 0: - LOG.warning(_LW( - "The workloads of the compute nodes" - " of the cluster is zero")) - break + scores = self.compute_score_of_nodes() + # Sort compute nodes by Score decreasing + sorted_scores = sorted(scores, reverse=True, key=lambda x: (x[1])) + LOG.debug("Compute node(s) BFD %s", sorted_scores) + # Get Node to be released + if len(scores) == 0: + LOG.warning(_LW( + "The workloads of the compute nodes" + " of the cluster is zero")) + return + while sorted_scores and ( + not self.migration_attempts or + self.migration_attempts >= unsuccessful_migration): node_to_release, instance_score = self.node_and_instance_score( - sorted_score, score) + sorted_scores) # Sort instances by Score sorted_instances = sorted( instance_score, reverse=True, key=lambda x: (x[1])) # BFD: Best Fit Decrease - LOG.debug("VM(s) BFD %s", sorted_instances) + LOG.debug("Instance(s) BFD %s", sorted_instances) migrations = self.calculate_num_migrations( - sorted_instances, node_to_release, sorted_score) + sorted_instances, node_to_release, sorted_scores) unsuccessful_migration = self.unsuccessful_migration_actualization( migrations, unsuccessful_migration) + + if not migrations: + # We don't have any possible migrations to perform on this node + # so we discard the node so we can try to migrate instances + # from the next one in the list + sorted_scores.pop() + infos = { - "number_of_migrations": self.number_of_migrations, - "number_of_nodes_released": self.number_of_released_nodes, + "released_compute_nodes_count": self.number_of_released_nodes, + "instance_migrations_count": self.number_of_migrations, "efficacy": self.efficacy } LOG.debug(infos) diff --git a/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml b/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml new file mode 100644 index 000000000..9abaf9d4b --- /dev/null +++ b/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml b/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml new file mode 100644 index 000000000..1c23eb07d --- /dev/null +++ b/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py b/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py index c551621f8..e59400235 100644 --- a/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py +++ b/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py @@ -70,6 +70,13 @@ class FakerModelCollector(base.BaseClusterDataModelCollector): """ return self.load_model('scenario_3_with_metrics.xml') + def generate_scenario_4(self): + """Simulates a cluster + + With 4 nodes and 6 instances spread on all nodes + """ + return self.load_model('scenario_4_with_metrics.xml') + class FakeCeilometerMetrics(object): def __init__(self, model): diff --git a/watcher/tests/decision_engine/model/faker_cluster_state.py b/watcher/tests/decision_engine/model/faker_cluster_state.py index af1484629..fb654c594 100644 --- a/watcher/tests/decision_engine/model/faker_cluster_state.py +++ b/watcher/tests/decision_engine/model/faker_cluster_state.py @@ -148,3 +148,10 @@ class FakerModelCollector(base.BaseClusterDataModelCollector): def generate_scenario_7_with_2_nodes(self): return self.load_model('scenario_7_with_2_nodes.xml') + + def generate_scenario_8_with_4_nodes(self): + return self.load_model('scenario_8_with_4_nodes.xml') + + def generate_scenario_9_with_3_active_plus_1_disabled_nodes(self): + return self.load_model( + 'scenario_9_with_3_active_plus_1_disabled_nodes.xml') diff --git a/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py b/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py index 1a1389a72..f75ad0ce2 100644 --- a/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py +++ b/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py @@ -184,14 +184,37 @@ class TestBasicConsolidation(base.TestCase): [action.get('action_type') for action in solution.actions]) expected_num_migrations = 1 - expected_power_state = 0 + expected_power_state = 1 num_migrations = actions_counter.get("migrate", 0) num_node_state_change = actions_counter.get( - "change_node_state", 0) + "change_nova_service_state", 0) self.assertEqual(expected_num_migrations, num_migrations) self.assertEqual(expected_power_state, num_node_state_change) + def test_basic_consolidation_execute_scenario_8_with_4_nodes(self): + model = self.fake_cluster.generate_scenario_8_with_4_nodes() + self.m_model.return_value = model + + solution = self.strategy.execute() + + actions_counter = collections.Counter( + [action.get('action_type') for action in solution.actions]) + + expected_num_migrations = 5 + expected_power_state = 3 + expected_global_efficacy = 60 + + num_migrations = actions_counter.get("migrate", 0) + num_node_state_change = actions_counter.get( + "change_nova_service_state", 0) + + global_efficacy_value = solution.global_efficacy.get("value", 0) + + self.assertEqual(expected_num_migrations, num_migrations) + self.assertEqual(expected_power_state, num_node_state_change) + self.assertEqual(expected_global_efficacy, global_efficacy_value) + def test_exception_stale_cdm(self): self.fake_cluster.set_cluster_data_model_as_stale() self.m_model.return_value = self.fake_cluster.cluster_data_model