Fix workload_stabilization unavailable nodes and instances

This patch set excludes nodes and instances from auditing
if appropriate metrics aren't available.

Change-Id: I87c6c249e3962f45d082f92d7e6e0be04e101799
Closes-Bug: #1736982
This commit is contained in:
Alexander Chadin
2018-01-23 11:19:57 +03:00
parent a1c575bfc5
commit 701b258dc7
6 changed files with 95 additions and 7 deletions

View File

@@ -202,7 +202,7 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
"No values returned by %(resource_id)s "
"for %(metric_name)s" % dict(
resource_id=instance.uuid, metric_name=meter))
avg_meter = 0
return
if meter == 'cpu_util':
avg_meter /= float(100)
instance_load[meter] = avg_meter
@@ -242,12 +242,10 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
self.periods['node'], 'mean', granularity=self.granularity)
if avg_meter is None:
if meter_name == 'hardware.memory.used':
avg_meter = node.memory
if meter_name == 'compute.node.cpu.percent':
avg_meter = 1
LOG.warning('No values returned by node %s for %s',
node_id, meter_name)
del hosts_load[node_id]
break
else:
if meter_name == 'hardware.memory.used':
avg_meter /= oslo_utils.units.Ki
@@ -296,6 +294,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
migration_case = []
new_hosts = copy.deepcopy(hosts)
instance_load = self.get_instance_load(instance)
if not instance_load:
return
s_host_vcpus = new_hosts[src_node.uuid]['vcpus']
d_host_vcpus = new_hosts[dst_node.uuid]['vcpus']
for metric in self.metrics:
@@ -353,6 +353,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
dst_node = self.compute_model.get_node_by_uuid(dst_host)
sd_case = self.calculate_migration_case(
hosts, instance, src_node, dst_node)
if sd_case is None:
break
weighted_sd = self.calculate_weighted_sd(sd_case[:-1])
@@ -361,6 +363,8 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
'host': dst_node.uuid, 'value': weighted_sd,
's_host': src_node.uuid, 'instance': instance.uuid}
instance_host_map.append(min_sd_case)
if sd_case is None:
continue
return sorted(instance_host_map, key=lambda x: x['value'])
def check_threshold(self):
@@ -369,7 +373,12 @@ class WorkloadStabilization(base.WorkloadStabilizationBaseStrategy):
normalized_load = self.normalize_hosts_load(hosts_load)
for metric in self.metrics:
metric_sd = self.get_sd(normalized_load, metric)
LOG.info("Standard deviation for %s is %s."
% (metric, metric_sd))
if metric_sd > float(self.thresholds[metric]):
LOG.info("Standard deviation of %s exceeds"
" appropriate threshold %s."
% (metric, metric_sd))
return self.simulate_migrations(hosts_load)
def add_migration(self,

View File

@@ -183,6 +183,8 @@ class FakeCeilometerMetrics(object):
# node 3
mock['Node_6_hostname_6'] = 8
# This node doesn't send metrics
mock['LOST_NODE_hostname_7'] = None
mock['Node_19_hostname_19'] = 10
# node 4
mock['INSTANCE_7_hostname_7'] = 4
@@ -197,7 +199,10 @@ class FakeCeilometerMetrics(object):
# mock[uuid] = random.randint(1, 4)
mock[uuid] = 8
return float(mock[str(uuid)])
if mock[str(uuid)] is not None:
return float(mock[str(uuid)])
else:
return mock[str(uuid)]
@staticmethod
def get_average_usage_instance_cpu_wb(uuid):
@@ -263,6 +268,8 @@ class FakeCeilometerMetrics(object):
# node 4
mock['INSTANCE_7'] = 4
mock['LOST_INSTANCE'] = None
if uuid not in mock.keys():
# mock[uuid] = random.randint(1, 4)
mock[uuid] = 8

View File

@@ -0,0 +1,50 @@
<ModelRoot>
<ComputeNode human_id="" uuid="Node_0" status="enabled" state="up" id="0" hostname="hostname_0" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="INSTANCE_0" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_1" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<ComputeNode human_id="" uuid="Node_1" status="enabled" state="up" id="1" hostname="hostname_1" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="INSTANCE_2" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<ComputeNode human_id="" uuid="Node_2" status="enabled" state="up" id="2" hostname="hostname_2" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="INSTANCE_3" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_4" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_5" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<ComputeNode human_id="" uuid="Node_3" status="enabled" state="up" id="3" hostname="hostname_3" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="INSTANCE_6" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<ComputeNode human_id="" uuid="Node_4" status="enabled" state="up" id="4" hostname="hostname_4" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="INSTANCE_7" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<ComputeNode human_id="" uuid="LOST_NODE" status="enabled" state="up" id="1" hostname="hostname_7" vcpus="40" disk="250" disk_capacity="250" memory="132">
<Instance state="active" human_id="" uuid="LOST_INSTANCE" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ComputeNode>
<Instance state="active" human_id="" uuid="INSTANCE_10" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_11" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_12" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_13" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_14" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_15" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_16" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_17" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_18" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_19" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_20" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_21" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_22" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_23" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_24" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_25" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_26" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_27" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_28" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_29" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_30" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_31" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_32" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_33" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_34" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_8" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
<Instance state="active" human_id="" uuid="INSTANCE_9" vcpus="10" disk="20" disk_capacity="20" memory="2" metadata='{"optimize": true,"top": "floor", "nested": {"x": "y"}}'/>
</ModelRoot>

View File

@@ -118,6 +118,9 @@ class FakerModelCollector(base.BaseClusterDataModelCollector):
def generate_scenario_1(self):
return self.load_model('scenario_1.xml')
def generate_scenario_1_with_1_node_unavailable(self):
return self.load_model('scenario_1_with_1_node_unavailable.xml')
def generate_scenario_3_with_2_nodes(self):
return self.load_model('scenario_3_with_2_nodes.xml')

View File

@@ -175,6 +175,8 @@ class FakeGnocchiMetrics(object):
# node 3
mock['Node_6_hostname_6'] = 8
# This node doesn't send metrics
mock['LOST_NODE_hostname_7'] = None
mock['Node_19_hostname_19'] = 10
# node 4
mock['INSTANCE_7_hostname_7'] = 4
@@ -188,7 +190,10 @@ class FakeGnocchiMetrics(object):
if uuid not in mock.keys():
mock[uuid] = 8
return float(mock[str(uuid)])
if mock[str(uuid)] is not None:
return float(mock[str(uuid)])
else:
return mock[str(uuid)]
@staticmethod
def get_average_usage_instance_cpu(*args, **kwargs):
@@ -215,6 +220,8 @@ class FakeGnocchiMetrics(object):
# node 4
mock['INSTANCE_7'] = 4
mock['LOST_INSTANCE'] = None
if uuid not in mock.keys():
mock[uuid] = 8

View File

@@ -120,6 +120,12 @@ class TestWorkloadStabilization(base.TestCase):
self.assertEqual(
instance_0_dict, self.strategy.get_instance_load(instance0))
def test_get_instance_load_with_no_metrics(self):
model = self.fake_cluster.generate_scenario_1_with_1_node_unavailable()
self.m_model.return_value = model
lost_instance = model.get_instance_by_uuid("LOST_INSTANCE")
self.assertIsNone(self.strategy.get_instance_load(lost_instance))
def test_normalize_hosts_load(self):
self.m_model.return_value = self.fake_cluster.generate_scenario_1()
fake_hosts = {'Node_0': {'cpu_util': 0.07, 'memory.resident': 7},
@@ -144,6 +150,12 @@ class TestWorkloadStabilization(base.TestCase):
self.assertEqual(self.strategy.get_hosts_load(),
self.hosts_load_assert)
def test_get_hosts_load_with_node_missing(self):
self.m_model.return_value = \
self.fake_cluster.generate_scenario_1_with_1_node_unavailable()
self.assertEqual(self.hosts_load_assert,
self.strategy.get_hosts_load())
def test_get_sd(self):
test_cpu_sd = 0.296
test_ram_sd = 9.3