diff --git a/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml b/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml new file mode 100644 index 000000000..f2c91a385 --- /dev/null +++ b/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Host maintenance strategy should migrate servers based on backup node if specified + or rely on nova scheduler. It was enabling disabled hosts with watcher_disabled + reason and migrating servers to those nodes. It can impact customer workload. Compute + nodes were disabled for a reason. + + Host maintenance strategy is fixed now to support migrating servers only on backup + node or rely on nova scheduler if no backup node is provided. diff --git a/watcher/decision_engine/strategy/strategies/host_maintenance.py b/watcher/decision_engine/strategy/strategies/host_maintenance.py index ddf4335d8..da694612d 100644 --- a/watcher/decision_engine/strategy/strategies/host_maintenance.py +++ b/watcher/decision_engine/strategy/strategies/host_maintenance.py @@ -53,7 +53,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): INSTANCE_MIGRATION = "migrate" CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state" - REASON_FOR_DISABLE = 'watcher_disabled' def __init__(self, config, osc=None): super(HostMaintenance, self).__init__(config, osc) @@ -95,10 +94,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): cn.status == element.ServiceState.DISABLED.value and cn.disabled_reason == reason} - def get_disabled_compute_nodes(self): - return self.get_disabled_compute_nodes_with_reason( - self.REASON_FOR_DISABLE) - def get_instance_state_str(self, instance): """Get instance state in string format""" if isinstance(instance.state, str): @@ -215,8 +210,7 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): """safe maintain one compute node Migrate all instances of the maintenance_node intensively to the - backup host. If the user didn't give the backup host, it will - select one unused node to backup the maintaining node. + backup host. It calculate the resource both of the backup node and maintaining node to evaluate the migrations from maintaining node to backup node. @@ -233,22 +227,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): self.host_migration(maintenance_node, backup_node) return True - # If the user didn't give the backup host, select one unused - # node with required capacity, then migrates all instances - # from maintaining node to it. - nodes = sorted( - self.get_disabled_compute_nodes().values(), - key=lambda x: self.get_node_capacity(x)['cpu']) - if maintenance_node in nodes: - nodes.remove(maintenance_node) - - for node in nodes: - if self.host_fits(maintenance_node, node): - self.enable_compute_node_if_disabled(node) - self.add_action_maintain_compute_node(maintenance_node) - self.host_migration(maintenance_node, node) - return True - return False def try_maintain(self, maintenance_node): diff --git a/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py b/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py index f8080760c..d092c1501 100644 --- a/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py +++ b/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py @@ -167,12 +167,15 @@ class TestHostMaintenance(TestBaseStrategy): node_1 = model.get_node_by_uuid('Node_1') self.assertFalse(self.strategy.safe_maintain(node_0)) self.assertFalse(self.strategy.safe_maintain(node_1)) + # It will return true, if backup node is passed + self.assertTrue(self.strategy.safe_maintain(node_0, node_1)) model = self.fake_c_cluster.\ generate_scenario_1_with_all_nodes_disable() self.m_c_model.return_value = model node_0 = model.get_node_by_uuid('Node_0') - self.assertTrue(self.strategy.safe_maintain(node_0)) + # It will return false, if there is no backup node + self.assertFalse(self.strategy.safe_maintain(node_0)) def test_try_maintain(self): model = self.fake_c_cluster.generate_scenario_1()