From 55537d254e2bf9d73477527bb20db8a974e6f62e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vincent=20Fran=C3=A7oise?= <Vincent.FRANCOISE@b-com.com>
Date: Fri, 16 Sep 2016 14:55:51 +0200
Subject: [PATCH] Fixed issue on compute nodes iteration

In this changeset, I fixed the issue with the basic server
consolidation strategy to now loop over all compute nodes
as expected instead of stopping after the first one.

Change-Id: If594f0df41e39dfb0ef8f0fce41822018490c4ec
Closes-bug: #1548874
---
 .../strategies/basic_consolidation.py         | 119 ++++++++----------
 .../model/data/scenario_8_with_4_nodes.xml    |  16 +++
 ..._9_with_3_active_plus_1_disabled_nodes.xml |  16 +++
 .../model/faker_cluster_and_metrics.py        |   7 ++
 .../model/faker_cluster_state.py              |   7 ++
 .../strategies/test_basic_consolidation.py    |  27 +++-
 6 files changed, 123 insertions(+), 69 deletions(-)
 create mode 100644 watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml
 create mode 100644 watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml

diff --git a/watcher/decision_engine/strategy/strategies/basic_consolidation.py b/watcher/decision_engine/strategy/strategies/basic_consolidation.py
index d4a06fa17..fab766c4e 100644
--- a/watcher/decision_engine/strategy/strategies/basic_consolidation.py
+++ b/watcher/decision_engine/strategy/strategies/basic_consolidation.py
@@ -81,8 +81,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
         self.number_of_released_nodes = 0
         # set default value for the number of migrations
         self.number_of_migrations = 0
-        # set default value for number of allowed migration attempts
-        self.migration_attempts = 0
 
         # set default value for the efficacy
         self.efficacy = 100
@@ -94,21 +92,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
         self.threshold_disk = 1
         self.threshold_cores = 1
 
-        # TODO(jed): target efficacy
-        self.target_efficacy = 60
-
-        # TODO(jed): weight
-        self.weight_cpu = 1
-        self.weight_mem = 1
-        self.weight_disk = 1
-
-        # TODO(jed): bound migration attempts (80 %)
-        self.bound_migration = 0.80
-
     @classmethod
     def get_name(cls):
         return "basic"
 
+    @property
+    def migration_attempts(self):
+        return self.input_parameters.get('migration_attempts', 0)
+
     @classmethod
     def get_display_name(cls):
         return _("Basic offline consolidation")
@@ -117,6 +108,22 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
     def get_translatable_display_name(cls):
         return "Basic offline consolidation"
 
+    @classmethod
+    def get_schema(cls):
+        # Mandatory default setting for each element
+        return {
+            "properties": {
+                "migration_attempts": {
+                    "description": "Maximum number of combinations to be "
+                                   "tried by the strategy while searching "
+                                   "for potential candidates. To remove the "
+                                   "limit, set it to 0 (by default)",
+                    "type": "number",
+                    "default": 0
+                },
+            },
+        }
+
     @property
     def ceilometer(self):
         if self._ceilometer is None:
@@ -127,13 +134,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
     def ceilometer(self, ceilometer):
         self._ceilometer = ceilometer
 
-    def compute_attempts(self, size_cluster):
-        """Upper bound of the number of migration
-
-        :param size_cluster: The size of the cluster
-        """
-        self.migration_attempts = size_cluster * self.bound_migration
-
     def check_migration(self, source_node, destination_node,
                         instance_to_migrate):
         """Check if the migration is possible
@@ -199,16 +199,6 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                 disk_capacity >= total_disk * self.threshold_disk and
                 memory_capacity >= total_mem * self.threshold_mem)
 
-    def get_allowed_migration_attempts(self):
-        """Allowed migration
-
-        Maximum allowed number of migrations this allows us to fix
-        the upper bound of the number of migrations.
-
-        :return:
-        """
-        return self.migration_attempts
-
     def calculate_weight(self, compute_resource, total_cores_used,
                          total_disk_used, total_memory_used):
         """Calculate weight of every resource
@@ -331,8 +321,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                                  resource_id=resource_id,
                                  input_parameters=parameters)
 
-    def score_of_nodes(self, score):
+    def compute_score_of_nodes(self):
         """Calculate score of nodes based on load by VMs"""
+        score = []
         for node in self.compute_model.get_all_compute_nodes().values():
             count = self.compute_model.mapping.get_node_instances(node)
             if len(count) > 0:
@@ -344,9 +335,9 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                 score.append((node.uuid, result))
         return score
 
-    def node_and_instance_score(self, sorted_score, score):
+    def node_and_instance_score(self, sorted_scores):
         """Get List of VMs from node"""
-        node_to_release = sorted_score[len(score) - 1][0]
+        node_to_release = sorted_scores[len(sorted_scores) - 1][0]
         instances_to_migrate = self.compute_model.mapping.get_node_instances(
             self.compute_model.get_node_by_uuid(node_to_release))
 
@@ -409,20 +400,14 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
         if not self.compute_model:
             raise exception.ClusterStateNotDefined()
 
+        if len(self.compute_model.get_all_compute_nodes()) == 0:
+            raise exception.ClusterEmpty()
+
         LOG.debug(self.compute_model.to_string())
 
     def do_execute(self):
-        # todo(jed) clone model
-        self.efficacy = 100
         unsuccessful_migration = 0
 
-        first_migration = True
-        size_cluster = len(self.compute_model.get_all_compute_nodes())
-        if size_cluster == 0:
-            raise exception.ClusterEmpty()
-
-        self.compute_attempts(size_cluster)
-
         for node_uuid, node in self.compute_model.get_all_compute_nodes(
         ).items():
             node_instances = self.compute_model.mapping.get_node_instances(
@@ -432,44 +417,44 @@ class BasicConsolidation(base.ServerConsolidationBaseStrategy):
                     self.add_change_service_state(
                         node_uuid, element.ServiceState.DISABLED.value)
 
-        while self.get_allowed_migration_attempts() >= unsuccessful_migration:
-            if not first_migration:
-                self.efficacy = self.calculate_migration_efficacy()
-                if self.efficacy < float(self.target_efficacy):
-                    break
-            first_migration = False
-            score = []
-
-            score = self.score_of_nodes(score)
-
-            # Sort compute nodes by Score decreasing
-            sorted_score = sorted(score, reverse=True, key=lambda x: (x[1]))
-            LOG.debug("Compute node(s) BFD %s", sorted_score)
-
-            # Get Node to be released
-            if len(score) == 0:
-                LOG.warning(_LW(
-                    "The workloads of the compute nodes"
-                    " of the cluster is zero"))
-                break
+        scores = self.compute_score_of_nodes()
+        # Sort compute nodes by Score decreasing
+        sorted_scores = sorted(scores, reverse=True, key=lambda x: (x[1]))
+        LOG.debug("Compute node(s) BFD %s", sorted_scores)
+        # Get Node to be released
+        if len(scores) == 0:
+            LOG.warning(_LW(
+                "The workloads of the compute nodes"
+                " of the cluster is zero"))
+            return
 
+        while sorted_scores and (
+                not self.migration_attempts or
+                self.migration_attempts >= unsuccessful_migration):
             node_to_release, instance_score = self.node_and_instance_score(
-                sorted_score, score)
+                sorted_scores)
 
             # Sort instances by Score
             sorted_instances = sorted(
                 instance_score, reverse=True, key=lambda x: (x[1]))
             # BFD: Best Fit Decrease
-            LOG.debug("VM(s) BFD %s", sorted_instances)
+            LOG.debug("Instance(s) BFD %s", sorted_instances)
 
             migrations = self.calculate_num_migrations(
-                sorted_instances, node_to_release, sorted_score)
+                sorted_instances, node_to_release, sorted_scores)
 
             unsuccessful_migration = self.unsuccessful_migration_actualization(
                 migrations, unsuccessful_migration)
+
+            if not migrations:
+                # We don't have any possible migrations to perform on this node
+                # so we discard the node so we can try to migrate instances
+                # from the next one in the list
+                sorted_scores.pop()
+
         infos = {
-            "number_of_migrations": self.number_of_migrations,
-            "number_of_nodes_released": self.number_of_released_nodes,
+            "released_compute_nodes_count": self.number_of_released_nodes,
+            "instance_migrations_count": self.number_of_migrations,
             "efficacy": self.efficacy
         }
         LOG.debug(infos)
diff --git a/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml b/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml
new file mode 100644
index 000000000..9abaf9d4b
--- /dev/null
+++ b/watcher/tests/decision_engine/model/data/scenario_8_with_4_nodes.xml
@@ -0,0 +1,16 @@
+<ModelRoot>
+  <ComputeNode hostname="hostname_0" uuid="Node_0" id="0" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_0" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_1" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_2" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_1" uuid="Node_1" id="1" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_3" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_2" uuid="Node_2" id="2" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_4" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_3" uuid="Node_3" id="3" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_5" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+</ModelRoot>
diff --git a/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml b/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml
new file mode 100644
index 000000000..1c23eb07d
--- /dev/null
+++ b/watcher/tests/decision_engine/model/data/scenario_9_with_3_active_plus_1_disabled_nodes.xml
@@ -0,0 +1,16 @@
+<ModelRoot>
+  <ComputeNode hostname="hostname_0" uuid="Node_0" id="0" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_0" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_1" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_2" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_1" uuid="Node_1" id="1" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_3" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_2" uuid="Node_2" id="2" state="up" human_id="" status="enabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_4" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+  <ComputeNode hostname="hostname_3" uuid="Node_3" id="3" state="up" human_id="" status="disabled" ResourceType.cpu_cores="16" ResourceType.disk="250" ResourceType.disk_capacity="250" ResourceType.memory="64">
+    <Instance hostname="" human_id="" state="active" uuid="INSTANCE_5" ResourceType.cpu_cores="2" ResourceType.disk="20" ResourceType.disk_capacity="20" ResourceType.memory="2"/>
+  </ComputeNode>
+</ModelRoot>
diff --git a/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py b/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py
index c551621f8..e59400235 100644
--- a/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py
+++ b/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py
@@ -70,6 +70,13 @@ class FakerModelCollector(base.BaseClusterDataModelCollector):
         """
         return self.load_model('scenario_3_with_metrics.xml')
 
+    def generate_scenario_4(self):
+        """Simulates a cluster
+
+        With 4 nodes and 6 instances spread on all nodes
+        """
+        return self.load_model('scenario_4_with_metrics.xml')
+
 
 class FakeCeilometerMetrics(object):
     def __init__(self, model):
diff --git a/watcher/tests/decision_engine/model/faker_cluster_state.py b/watcher/tests/decision_engine/model/faker_cluster_state.py
index af1484629..fb654c594 100644
--- a/watcher/tests/decision_engine/model/faker_cluster_state.py
+++ b/watcher/tests/decision_engine/model/faker_cluster_state.py
@@ -148,3 +148,10 @@ class FakerModelCollector(base.BaseClusterDataModelCollector):
 
     def generate_scenario_7_with_2_nodes(self):
         return self.load_model('scenario_7_with_2_nodes.xml')
+
+    def generate_scenario_8_with_4_nodes(self):
+        return self.load_model('scenario_8_with_4_nodes.xml')
+
+    def generate_scenario_9_with_3_active_plus_1_disabled_nodes(self):
+        return self.load_model(
+            'scenario_9_with_3_active_plus_1_disabled_nodes.xml')
diff --git a/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py b/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py
index 1a1389a72..f75ad0ce2 100644
--- a/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py
+++ b/watcher/tests/decision_engine/strategy/strategies/test_basic_consolidation.py
@@ -184,14 +184,37 @@ class TestBasicConsolidation(base.TestCase):
             [action.get('action_type') for action in solution.actions])
 
         expected_num_migrations = 1
-        expected_power_state = 0
+        expected_power_state = 1
 
         num_migrations = actions_counter.get("migrate", 0)
         num_node_state_change = actions_counter.get(
-            "change_node_state", 0)
+            "change_nova_service_state", 0)
         self.assertEqual(expected_num_migrations, num_migrations)
         self.assertEqual(expected_power_state, num_node_state_change)
 
+    def test_basic_consolidation_execute_scenario_8_with_4_nodes(self):
+        model = self.fake_cluster.generate_scenario_8_with_4_nodes()
+        self.m_model.return_value = model
+
+        solution = self.strategy.execute()
+
+        actions_counter = collections.Counter(
+            [action.get('action_type') for action in solution.actions])
+
+        expected_num_migrations = 5
+        expected_power_state = 3
+        expected_global_efficacy = 60
+
+        num_migrations = actions_counter.get("migrate", 0)
+        num_node_state_change = actions_counter.get(
+            "change_nova_service_state", 0)
+
+        global_efficacy_value = solution.global_efficacy.get("value", 0)
+
+        self.assertEqual(expected_num_migrations, num_migrations)
+        self.assertEqual(expected_power_state, num_node_state_change)
+        self.assertEqual(expected_global_efficacy, global_efficacy_value)
+
     def test_exception_stale_cdm(self):
         self.fake_cluster.set_cluster_data_model_as_stale()
         self.m_model.return_value = self.fake_cluster.cluster_data_model