From 8eab47e107e76e8cace55abb9b884d40328b12f5 Mon Sep 17 00:00:00 2001 From: Nikolay Tatarinov Date: Fri, 3 Oct 2025 11:57:15 +0300 Subject: [PATCH] Added prometheus instance cpu and ram metrics --- .../datasources/prometheus_base.py | 16 ++++++++++++++-- .../strategy/strategies/host_maintenance.py | 12 ++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/watcher/decision_engine/datasources/prometheus_base.py b/watcher/decision_engine/datasources/prometheus_base.py index dc4cb92c8..4c39a6e45 100644 --- a/watcher/decision_engine/datasources/prometheus_base.py +++ b/watcher/decision_engine/datasources/prometheus_base.py @@ -40,8 +40,8 @@ class PrometheusBase(base.DataSourceBase): host_inlet_temp=None, host_airflow=None, host_power=None, - instance_cpu_usage='ceilometer_cpu', - instance_ram_usage='ceilometer_memory_usage', + instance_cpu_usage='libvirt_domain_info_cpu_time_seconds_total', + instance_ram_usage='libvirt_domain_info_memory_usage_bytes', instance_ram_allocated='instance.memory', instance_l3_cache_usage=None, instance_root_disk_size='instance.disk', @@ -311,6 +311,18 @@ class PrometheusBase(base.DataSourceBase): 'agg': aggregate, 'meter': meter, 'period': period, 'vcpus': vcpus} ) + elif meter == 'libvirt_domain_info_cpu_time_seconds_total': + query_args = ( + "(rate(%(meter)s{%(label)s='%(label_value)s'}[%(period)ss]))" + % {'label': uuid_label_key, 'label_value': instance_label, + 'meter': meter, 'period': period} + ) + elif meter == 'libvirt_domain_info_memory_usage_bytes': + query_args = ( + "%(agg)s_over_time(%(meter)s{%(label)s='%(label_value)s'}[%(period)ss]) / 1024" + % {'label': uuid_label_key, 'label_value': instance_label, + 'meter': meter, 'period': period, 'agg': aggregate} + ) else: raise exception.InvalidParameter( message=(_("Cannot process prometheus meter %s") % meter) diff --git a/watcher/decision_engine/strategy/strategies/host_maintenance.py b/watcher/decision_engine/strategy/strategies/host_maintenance.py index 9c5926468..f8b905b51 100644 --- a/watcher/decision_engine/strategy/strategies/host_maintenance.py +++ b/watcher/decision_engine/strategy/strategies/host_maintenance.py @@ -89,6 +89,14 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): "will backup the maintenance node.", "type": "string", }, + { + "disabled_reason": { + "description": "Describe reason for moving host" + "into maintenance mode.", + "type": "string", + "default": False, + } + } "disable_live_migration": { "description": "Disable live migration in maintenance. " "If True, active instances will be cold " @@ -295,6 +303,10 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy): maintenance_node = self.input_parameters.get('maintenance_node') backup_node = self.input_parameters.get('backup_node') + disabled_reason = self.input_parameters.get('disabled_reason') + + if disabled_reason: + self.REASON_FOR_MAINTAINING = disabled_reason # if no VMs in the maintenance_node, just maintain the compute node src_node = self.compute_model.get_node_by_name(maintenance_node)