Fixed issue on compute nodes iteration

In this changeset, I fixed the issue with the basic server consolidation strategy to now loop over all compute nodes as expected instead of stopping after the first one. Change-Id: If594f0df41e39dfb0ef8f0fce41822018490c4ec Closes-bug: #1548874
2016-09-16 14:55:51 +02:00
parent 1eb2b517ef
commit 4ad64c3d21
4 changed files with 109 additions and 69 deletions
--- a/watcher/decision_engine/strategy/strategies/basic_consolidation.py
+++ b/watcher/decision_engine/strategy/strategies/basic_consolidation.py
@@ -81,8 +81,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
        self.number_of_released_nodes = 0
        # set default value for the number of migrations
        self.number_of_migrations = 0
-        # set default value for number of allowed migration attempts
-        self.migration_attempts = 0

        # set default value for the efficacy
        self.efficacy = 100
@@ -94,21 +92,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
        self.threshold_disk = 1
        self.threshold_cores = 1

-        # TODO(jed): target efficacy
-        self.target_efficacy = 60
-
-        # TODO(jed): weight
-        self.weight_cpu = 1
-        self.weight_mem = 1
-        self.weight_disk = 1
-
-        # TODO(jed): bound migration attempts (80 %)
-        self.bound_migration = 0.80
-
    @classmethod
    def get_name(cls):
        return "basic"

+    @property
+    def migration_attempts(self):
+        return self.input_parameters.get('migration_attempts', 0)
+
    @classmethod
    def get_display_name(cls):
        return _("Basic offline consolidation")
@@ -117,6 +108,22 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
    def get_translatable_display_name(cls):
        return "Basic offline consolidation"

+    @classmethod
+    def get_schema(cls):
+        # Mandatory default setting for each element
+        return {
+            "properties": {
+                "migration_attempts": {
+                    "description": "Maximum number of combinations to be "
+                                   "tried by the strategy while searching "
+                                   "for potential candidates. To remove the "
+                                   "limit, set it to 0 (by default)",
+                    "type": "number",
+                    "default": 0
+                },
+            },
+        }
+
    @property
    def ceilometer(self):
        if self._ceilometer is None:
@@ -127,13 +134,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
    def ceilometer(self, ceilometer):
        self._ceilometer = ceilometer

-    def compute_attempts(self, size_cluster):
-        """Upper bound of the number of migration
-
-        :param size_cluster: The size of the cluster
-        """
-        self.migration_attempts = size_cluster * self.bound_migration
-
    def check_migration(self, source_node, destination_node,
                        instance_to_migrate):
        """Check if the migration is possible
@@ -199,16 +199,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                disk_capacity >= total_disk * self.threshold_disk and
                memory_capacity >= total_mem * self.threshold_mem)

-    def get_allowed_migration_attempts(self):
-        """Allowed migration
-
-        Maximum allowed number of migrations this allows us to fix
-        the upper bound of the number of migrations.
-
-        :return:
-        """
-        return self.migration_attempts
-
    def calculate_weight(self, compute_resource, total_cores_used,
                         total_disk_used, total_memory_used):
        """Calculate weight of every resource
@@ -331,8 +321,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                                 resource_id=resource_id,
                                 input_parameters=parameters)

-    def score_of_nodes(self, score):
+    def compute_score_of_nodes(self):
        """Calculate score of nodes based on load by VMs"""
+        score = []
        for node in self.compute_model.get_all_compute_nodes().values():
            count = self.compute_model.mapping.get_node_instances(node)
            if len(count) > 0:
@@ -344,9 +335,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                score.append((node.uuid, result))
        return score

-    def node_and_instance_score(self, sorted_score, score):
+    def node_and_instance_score(self, sorted_scores):
        """Get List of VMs from node"""
-        node_to_release = sorted_score[len(score) - 1][0]
+        node_to_release = sorted_scores[len(sorted_scores) - 1][0]
        instances_to_migrate = self.compute_model.mapping.get_node_instances(
            self.compute_model.get_node_by_uuid(node_to_release))

@@ -409,20 +400,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
        if not self.compute_model:
            raise exception.ClusterStateNotDefined()

+        if len(self.compute_model.get_all_compute_nodes()) == 0:
+            raise exception.ClusterEmpty()
+
        LOG.debug(self.compute_model.to_string())

    def do_execute(self):
-        # todo(jed) clone model
-        self.efficacy = 100
        unsuccessful_migration = 0

-        first_migration = True
-        size_cluster = len(self.compute_model.get_all_compute_nodes())
-        if size_cluster == 0:
-            raise exception.ClusterEmpty()
-
-        self.compute_attempts(size_cluster)
-
        for node_uuid, node in self.compute_model.get_all_compute_nodes(
        ).items():
            node_instances = self.compute_model.mapping.get_node_instances(
@@ -432,44 +417,44 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                    self.add_change_service_state(
                        node_uuid, element.ServiceState.DISABLED.value)

-        while self.get_allowed_migration_attempts() >= unsuccessful_migration:
-            if not first_migration:
-                self.efficacy = self.calculate_migration_efficacy()
-                if self.efficacy < float(self.target_efficacy):
-                    break
-            first_migration = False
-            score = []
-
-            score = self.score_of_nodes(score)
-
-            # Sort compute nodes by Score decreasing
-            sorted_score = sorted(score, reverse=True, key=lambda x: (x[1]))
-            LOG.debug("Compute node(s) BFD %s", sorted_score)
-
-            # Get Node to be released
-            if len(score) == 0:
-                LOG.warning(_LW(
-                    "The workloads of the compute nodes"
-                    " of the cluster is zero"))
-                break
+        scores = self.compute_score_of_nodes()
+        # Sort compute nodes by Score decreasing
+        sorted_scores = sorted(scores, reverse=True, key=lambda x: (x[1]))
+        LOG.debug("Compute node(s) BFD %s", sorted_scores)
+        # Get Node to be released
+        if len(scores) == 0:
+            LOG.warning(_LW(
+                "The workloads of the compute nodes"
+                " of the cluster is zero"))
+            return

+        while sorted_scores and (
+                not self.migration_attempts or
+                self.migration_attempts >= unsuccessful_migration):
            node_to_release, instance_score = self.node_and_instance_score(
-                sorted_score, score)
+                sorted_scores)

            # Sort instances by Score
            sorted_instances = sorted(
                instance_score, reverse=True, key=lambda x: (x[1]))
            # BFD: Best Fit Decrease
-            LOG.debug("VM(s) BFD %s", sorted_instances)
+            LOG.debug("Instance(s) BFD %s", sorted_instances)

            migrations = self.calculate_num_migrations(
-                sorted_instances, node_to_release, sorted_score)
+                sorted_instances, node_to_release, sorted_scores)

            unsuccessful_migration = self.unsuccessful_migration_actualization(
                migrations, unsuccessful_migration)
+
+            if not migrations:
+                # We don't have any possible migrations to perform on this node
+                # so we discard the node so we can try to migrate instances
+                # from the next one in the list
+                sorted_scores.pop()
+
        infos = {
-            "number_of_migrations": self.number_of_migrations,
-            "number_of_nodes_released": self.number_of_released_nodes,
+            "released_compute_nodes_count": self.number_of_released_nodes,
+            "instance_migrations_count": self.number_of_migrations,
            "efficacy": self.efficacy
        }
        LOG.debug(infos)
--- a/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml
+++ b/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml
@@ -0,0 +1,16 @@
+<ModelRoot>
+  <ComputeNode hostname="hostname_0" uuid="Node_0" id="0" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_0" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_1" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_2" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_1" uuid="Node_1" id="1" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_3" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_2" uuid="Node_2" id="2" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_4" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_3" uuid="Node_3" id="3" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_5" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+</ModelRoot>
--- a/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml
+++ b/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml
@@ -0,0 +1,16 @@
+<ModelRoot>
+  <ComputeNode hostname="hostname_0" uuid="Node_0" id="0" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_0" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_1" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_2" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_1" uuid="Node_1" id="1" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_3" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_2" uuid="Node_2" id="2" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_4" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_3" uuid="Node_3" id="3" state="up" human_id="" status="disabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_5" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+</ModelRoot>
--- a/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py
+++ b/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py
@@ -184,14 +184,37 @@ class TestBasicConsolidation(base.TestCase):
            [action.get('action_type') for action in solution.actions])

        expected_num_migrations = 1
-        expected_power_state = 0
+        expected_power_state = 1

        num_migrations = actions_counter.get("migrate", 0)
        num_node_state_change = actions_counter.get(
-            "change_node_state", 0)
+            "change_nova_service_state", 0)
        self.assertEqual(expected_num_migrations, num_migrations)
        self.assertEqual(expected_power_state, num_node_state_change)

+    def test_basic_consolidation_execute_scenario_8_with_4_nodes(self):
+        model = self.fake_cluster.generate_scenario_8_with_4_nodes()
+        self.m_model.return_value = model
+
+        solution = self.strategy.execute()
+
+        actions_counter = collections.Counter(
+            [action.get('action_type') for action in solution.actions])
+
+        expected_num_migrations = 5
+        expected_power_state = 3
+        expected_global_efficacy = 60
+
+        num_migrations = actions_counter.get("migrate", 0)
+        num_node_state_change = actions_counter.get(
+            "change_nova_service_state", 0)
+
+        global_efficacy_value = solution.global_efficacy.get("value", 0)
+
+        self.assertEqual(expected_num_migrations, num_migrations)
+        self.assertEqual(expected_power_state, num_node_state_change)
+        self.assertEqual(expected_global_efficacy, global_efficacy_value)
+
    def test_exception_stale_cdm(self):
        self.fake_cluster.set_cluster_data_model_as_stale()
        self.m_model.return_value = self.fake_cluster.cluster_data_model