Merge "Drop code from Host maintenance strategy migrating instance to disabled hosts"

This commit is contained in:
Zuul
2025-05-16 18:18:26 +00:00
committed by Gerrit Code Review
3 changed files with 15 additions and 24 deletions

View File

@@ -0,0 +1,10 @@
---
fixes:
- |
Host maintenance strategy should migrate servers based on backup node if specified
or rely on nova scheduler. It was enabling disabled hosts with watcher_disabled
reason and migrating servers to those nodes. It can impact customer workload. Compute
nodes were disabled for a reason.
Host maintenance strategy is fixed now to support migrating servers only on backup
node or rely on nova scheduler if no backup node is provided.

View File

@@ -53,7 +53,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
INSTANCE_MIGRATION = "migrate"
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
REASON_FOR_DISABLE = 'watcher_disabled'
def __init__(self, config, osc=None):
super(HostMaintenance, self).__init__(config, osc)
@@ -95,10 +94,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
cn.status == element.ServiceState.DISABLED.value and
cn.disabled_reason == reason}
def get_disabled_compute_nodes(self):
return self.get_disabled_compute_nodes_with_reason(
self.REASON_FOR_DISABLE)
def get_instance_state_str(self, instance):
"""Get instance state in string format"""
if isinstance(instance.state, str):
@@ -215,8 +210,7 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
"""safe maintain one compute node
Migrate all instances of the maintenance_node intensively to the
backup host. If the user didn't give the backup host, it will
select one unused node to backup the maintaining node.
backup host.
It calculate the resource both of the backup node and maintaining
node to evaluate the migrations from maintaining node to backup node.
@@ -233,22 +227,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
self.host_migration(maintenance_node, backup_node)
return True
# If the user didn't give the backup host, select one unused
# node with required capacity, then migrates all instances
# from maintaining node to it.
nodes = sorted(
self.get_disabled_compute_nodes().values(),
key=lambda x: self.get_node_capacity(x)['cpu'])
if maintenance_node in nodes:
nodes.remove(maintenance_node)
for node in nodes:
if self.host_fits(maintenance_node, node):
self.enable_compute_node_if_disabled(node)
self.add_action_maintain_compute_node(maintenance_node)
self.host_migration(maintenance_node, node)
return True
return False
def try_maintain(self, maintenance_node):

View File

@@ -167,12 +167,15 @@ class TestHostMaintenance(TestBaseStrategy):
node_1 = model.get_node_by_uuid('Node_1')
self.assertFalse(self.strategy.safe_maintain(node_0))
self.assertFalse(self.strategy.safe_maintain(node_1))
# It will return true, if backup node is passed
self.assertTrue(self.strategy.safe_maintain(node_0, node_1))
model = self.fake_c_cluster.\
generate_scenario_1_with_all_nodes_disable()
self.m_c_model.return_value = model
node_0 = model.get_node_by_uuid('Node_0')
self.assertTrue(self.strategy.safe_maintain(node_0))
# It will return false, if there is no backup node
self.assertFalse(self.strategy.safe_maintain(node_0))
def test_try_maintain(self):
model = self.fake_c_cluster.generate_scenario_1()