Files
watcher/watcher/tests/decision_engine/model/faker_cluster_and_metrics.py
Lucian Petrut 424e9a76af vm workload consolidation: use actual host metrics
The "vm workload consolidation" strategy is summing up instance
usage in order to estimate host usage.

The problem is that some infrastructure services (e.g. OVS or Ceph
clients) may also use a significant amount of resources, which
would be ignored. This can impact Watcher's ability to detect
overloaded nodes and correctly rebalance the workload.

This commit will use the host metrics, if available. The proposed
implementation uses the maximum value between the host metric
and the sum of the instance metrics.

Note that we're holding a dict of host metric deltas in order to
account for planned migrations.

Change-Id: I82f474ee613f6c9a7c0a9d24a05cba41d2f68edb
2023-10-27 21:54:42 +03:00

191 lines
6.9 KiB
Python

# -*- encoding: utf-8 -*-
#
# Authors: Vojtech CIMA <cima@zhaw.ch>
# Bruno GRAZIOLI <gaea@zhaw.ch>
# Sean MURPHY <murp@zhaw.ch>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from unittest import mock
from watcher.decision_engine.model.collector import base
from watcher.decision_engine.model import model_root as modelroot
class FakerModelCollector(base.BaseClusterDataModelCollector):
def __init__(self, config=None, osc=None, audit_scope=None):
if config is None:
config = mock.Mock()
super(FakerModelCollector, self).__init__(config)
@property
def notification_endpoints(self):
return []
def get_audit_scope_handler(self, audit_scope):
return None
def execute(self):
return self.generate_scenario_1()
def load_data(self, filename):
cwd = os.path.abspath(os.path.dirname(__file__))
data_folder = os.path.join(cwd, "data")
with open(os.path.join(data_folder, filename), 'rb') as xml_file:
xml_data = xml_file.read()
return xml_data
def load_model(self, filename):
return modelroot.ModelRoot.from_xml(self.load_data(filename))
def generate_scenario_1(self):
"""Simulates cluster with 2 nodes and 2 instances using 1:1 mapping"""
return self.load_model('scenario_1_with_metrics.xml')
def generate_scenario_2(self):
"""Simulates a cluster
With 4 nodes and 6 instances all mapped to a single node
"""
return self.load_model('scenario_2_with_metrics.xml')
def generate_scenario_3(self):
"""Simulates a cluster
With 4 nodes and 6 instances all mapped to one node
"""
return self.load_model('scenario_3_with_metrics.xml')
def generate_scenario_4(self):
"""Simulates a cluster
With 4 nodes and 6 instances spread on all nodes
"""
return self.load_model('scenario_4_with_metrics.xml')
class FakeGnocchiMetrics(object):
def __init__(self, model):
self.model = model
def mock_get_statistics(self, resource=None, resource_type=None,
meter_name=None, period=300, aggregate='mean',
granularity=300):
if meter_name == 'host_cpu_usage':
return self.get_compute_node_cpu_util(
resource, period, aggregate, granularity)
elif meter_name == 'host_ram_usage':
return self.get_compute_node_ram_util(
resource, period, aggregate, granularity)
elif meter_name == 'instance_cpu_usage':
return self.get_instance_cpu_util(
resource, period, aggregate, granularity)
elif meter_name == 'instance_ram_usage':
return self.get_instance_ram_util(
resource, period, aggregate, granularity)
elif meter_name == 'instance_root_disk_size':
return self.get_instance_disk_root_size(
resource, period, aggregate, granularity)
def get_compute_node_cpu_util(self, resource, period,
aggregate, granularity):
"""Calculates node utilization dynamicaly.
node CPU utilization should consider
and corelate with actual instance-node mappings
provided within a cluster model.
Returns relative node CPU utilization <0, 100>.
:param r_id: resource id
"""
node = self.model.get_node_by_uuid(resource.uuid)
instances = self.model.get_node_instances(node)
util_sum = 0.0
for instance in instances:
total_cpu_util = instance.vcpus * self.get_instance_cpu_util(
instance, period, aggregate, granularity)
util_sum += total_cpu_util / 100.0
util_sum /= node.vcpus
return util_sum * 100.0
def get_compute_node_ram_util(self, resource, period, aggregate,
granularity):
# Returns mock host ram usage in KB based on the allocated
# instances.
node = self.model.get_node_by_uuid(resource.uuid)
instances = self.model.get_node_instances(node)
util_sum = 0.0
for instance in instances:
util_sum += self.get_instance_ram_util(
instance, period, aggregate, granularity)
return util_sum / 1024
@staticmethod
def get_instance_cpu_util(resource, period, aggregate,
granularity):
instance_cpu_util = dict()
instance_cpu_util['INSTANCE_0'] = 10
instance_cpu_util['INSTANCE_1'] = 30
instance_cpu_util['INSTANCE_2'] = 60
instance_cpu_util['INSTANCE_3'] = 20
instance_cpu_util['INSTANCE_4'] = 40
instance_cpu_util['INSTANCE_5'] = 50
instance_cpu_util['INSTANCE_6'] = 100
instance_cpu_util['INSTANCE_7'] = 100
instance_cpu_util['INSTANCE_8'] = 100
instance_cpu_util['INSTANCE_9'] = 100
return instance_cpu_util[str(resource.uuid)]
@staticmethod
def get_instance_ram_util(resource, period, aggregate,
granularity):
instance_ram_util = dict()
instance_ram_util['INSTANCE_0'] = 1
instance_ram_util['INSTANCE_1'] = 2
instance_ram_util['INSTANCE_2'] = 4
instance_ram_util['INSTANCE_3'] = 8
instance_ram_util['INSTANCE_4'] = 3
instance_ram_util['INSTANCE_5'] = 2
instance_ram_util['INSTANCE_6'] = 1
instance_ram_util['INSTANCE_7'] = 2
instance_ram_util['INSTANCE_8'] = 4
instance_ram_util['INSTANCE_9'] = 8
return instance_ram_util[str(resource.uuid)]
@staticmethod
def get_instance_disk_root_size(resource, period, aggregate,
granularity):
instance_disk_util = dict()
instance_disk_util['INSTANCE_0'] = 10
instance_disk_util['INSTANCE_1'] = 15
instance_disk_util['INSTANCE_2'] = 30
instance_disk_util['INSTANCE_3'] = 35
instance_disk_util['INSTANCE_4'] = 20
instance_disk_util['INSTANCE_5'] = 25
instance_disk_util['INSTANCE_6'] = 25
instance_disk_util['INSTANCE_7'] = 25
instance_disk_util['INSTANCE_8'] = 25
instance_disk_util['INSTANCE_9'] = 25
return instance_disk_util[str(resource.uuid)]
# TODO(lpetrut): consider dropping Ceilometer support, it was deprecated
# in Ocata.
class FakeCeilometerMetrics(FakeGnocchiMetrics):
pass