From 9dea55bd64faf66e4de4c3220d8581683db0bf7c Mon Sep 17 00:00:00 2001 From: "Chandan Kumar (raukadah)" Date: Wed, 7 May 2025 08:55:11 +0530 Subject: [PATCH] Drop code from Host maintenance strategy migrating instance to disabled hosts Currently host maintenance strategy also migrate instances from maintenance node to watcher_disabled compute nodes. watcher_disabled compute nodes might be disabled for some other purpose by different strategy. If host maintenace use those compute nodes for migration, It might affect customer workloads. Host maintenance strategy should never touch disabled hosts unless the user specify a disable host as backup node. This cr drops the logic for using disabled compute node for maintenance. Host maintaince is already using nova schedular for migrating the instance, will use the same. If there is no available node, strategy will fail. Closes-Bug: #2109945 Change-Id: If9795fd06f684eb67d553405cebd8a30887c3997 Signed-off-by: Chandan Kumar (raukadah) --- ...gy-on-disabled-hosts-24084a22d4c8f914.yaml | 10 ++++++++ .../strategy/strategies/host_maintenance.py | 24 +------------------ .../strategies/test_host_maintenance.py | 5 +++- 3 files changed, 15 insertions(+), 24 deletions(-) create mode 100644 releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml diff --git a/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml b/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml new file mode 100644 index 000000000..f2c91a385 --- /dev/null +++ b/releasenotes/notes/donot-run-host-migration-strategy-on-disabled-hosts-24084a22d4c8f914.yaml @@ -0,0 +1,10 @@ +--- +fixes: + - | + Host maintenance strategy should migrate servers based on backup node if specified + or rely on nova scheduler. It was enabling disabled hosts with watcher_disabled + reason and migrating servers to those nodes. It can impact customer workload. Compute + nodes were disabled for a reason. + + Host maintenance strategy is fixed now to support migrating servers only on backup + node or rely on nova scheduler if no backup node is provided. diff --git a/watcher/decision_engine/strategy/strategies/host_maintenance.py b/watcher/decision_engine/strategy/strategies/host_maintenance.py index ddf4335d8..da694612d 100644 --- a/watcher/decision_engine/strategy/strategies/host_maintenance.py +++ b/watcher/decision_engine/strategy/strategies/host_maintenance.py @@ -53,7 +53,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): INSTANCE_MIGRATION = "migrate" CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state" - REASON_FOR_DISABLE = 'watcher_disabled' def __init__(self, config, osc=None): super(HostMaintenance, self).__init__(config, osc) @@ -95,10 +94,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): cn.status == element.ServiceState.DISABLED.value and cn.disabled_reason == reason} - def get_disabled_compute_nodes(self): - return self.get_disabled_compute_nodes_with_reason( - self.REASON_FOR_DISABLE) - def get_instance_state_str(self, instance): """Get instance state in string format""" if isinstance(instance.state, str): @@ -215,8 +210,7 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): """safe maintain one compute node Migrate all instances of the maintenance_node intensively to the - backup host. If the user didn't give the backup host, it will - select one unused node to backup the maintaining node. + backup host. It calculate the resource both of the backup node and maintaining node to evaluate the migrations from maintaining node to backup node. @@ -233,22 +227,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): self.host_migration(maintenance_node, backup_node) return True - # If the user didn't give the backup host, select one unused - # node with required capacity, then migrates all instances - # from maintaining node to it. - nodes = sorted( - self.get_disabled_compute_nodes().values(), - key=lambda x: self.get_node_capacity(x)['cpu']) - if maintenance_node in nodes: - nodes.remove(maintenance_node) - - for node in nodes: - if self.host_fits(maintenance_node, node): - self.enable_compute_node_if_disabled(node) - self.add_action_maintain_compute_node(maintenance_node) - self.host_migration(maintenance_node, node) - return True - return False def try_maintain(self, maintenance_node): diff --git a/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py b/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py index f8080760c..d092c1501 100644 --- a/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py +++ b/watcher/tests/decision_engine/strategy/strategies/test_host_maintenance.py @@ -167,12 +167,15 @@ class TestHostMaintenance(TestBaseStrategy): node_1 = model.get_node_by_uuid('Node_1') self.assertFalse(self.strategy.safe_maintain(node_0)) self.assertFalse(self.strategy.safe_maintain(node_1)) + # It will return true, if backup node is passed + self.assertTrue(self.strategy.safe_maintain(node_0, node_1)) model = self.fake_c_cluster.\ generate_scenario_1_with_all_nodes_disable() self.m_c_model.return_value = model node_0 = model.get_node_by_uuid('Node_0') - self.assertTrue(self.strategy.safe_maintain(node_0)) + # It will return false, if there is no backup node + self.assertFalse(self.strategy.safe_maintain(node_0)) def test_try_maintain(self): model = self.fake_c_cluster.generate_scenario_1()