Compare commits

...

5 Commits

Author SHA1 Message Date
Zuul
090b695956 Merge "Handle deprecated "cpu_util" metric" into stable/2023.2 2025-02-04 07:54:48 +00:00
Lucian Petrut
3874ca82de Unblock the CI gate
The Nova collector json schema validation started [1][2] failing after
the jsonschema upper constraint was bumped from 4.17.3 to 4.19.1 [3].

The reason is that jsonschema v4.18.0a1 switched to a reference
resolving library [4], which treats the aggregate "id" as a jsonschema
id and expects it to be a string [5]. For this reason, we're now getting
AttributeError exceptions.

As a workaround, we'll rename the "id" ref element as "host_aggr_id".

Also, the watcher-tempest-multinode job is configured to use Focal,
which is no longer supported by Devstack [6]. That being considered,
we'll switch to Ubuntu Jammy (22.04).

While at it, we're disabling Cinder Backup, which isn't used while
testing Watched. It currently causes Devstack failures since it
uses the Swift backend by default, which is disabled.

[1] https://paste.opendev.org/raw/bjQ1uIdbDMnmA1UEhxLL/
[2] https://paste.opendev.org/raw/bNgxqulBwBLYB7tNhrU4/
[3] ab0dcbdda2
[4] https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
[5] c23a5dc1c9/referencing/jsonschema.py (L54-L55C18)
[6] https://paste.openstack.org/raw/bSoSyXgbtmq6d9768HQn/

Change-Id: I300620c2ec4857b1e0d402a9b57a637f576eeb24
(cherry picked from commit 922478fbda)
2024-11-13 18:06:57 +00:00
Lucian Petrut
b5431b2e53 Handle deprecated "cpu_util" metric
The "cpu_util" metric has been deprecated a few years ago.
We'll obtain the same result by converting the cumulative cpu
time to a percentage, leveraging the rate of change aggregation.

Change-Id: I18fe0de6f74c785e674faceea0c48f44055818fe
(cherry picked from commit 00fea975e2)
2024-11-05 19:41:59 +00:00
OpenStack Release Bot
c3640ed64e Update TOX_CONSTRAINTS_FILE for stable/2023.2
Update the URL to the upper-constraints file to point to the redirect
rule on releases.openstack.org so that anyone working on this branch
will switch to the correct upper-constraints list automatically when
the requirements repository branches.

Until the requirements repository has as stable/2023.2 branch, tests will
continue to use the upper-constraints list on master.

Change-Id: I92ae6b2f247ac85ce8c5a6fe64d1894eab4e76e9
2023-09-14 01:24:42 +00:00
OpenStack Release Bot
1f8566a9ea Update .gitreview for stable/2023.2
Change-Id: I0f3d01f625b3ccbc43ebc22776ea1a6c66ca4616
2023-09-14 01:24:41 +00:00
13 changed files with 82 additions and 36 deletions

View File

@@ -2,3 +2,4 @@
host=review.opendev.org
port=29418
project=openstack/watcher.git
defaultbranch=stable/2023.2

View File

@@ -89,7 +89,7 @@
- job:
name: watcher-tempest-multinode
parent: watcher-tempest-functional
nodeset: openstack-two-node-focal
nodeset: openstack-two-node-jammy
roles:
- zuul: openstack/tempest
group-vars:
@@ -107,6 +107,7 @@
watcher-api: false
watcher-decision-engine: true
watcher-applier: false
c-bak: false
ceilometer: false
ceilometer-acompute: false
ceilometer-acentral: false

View File

@@ -372,7 +372,7 @@ You can configure and install Ceilometer by following the documentation below :
#. https://docs.openstack.org/ceilometer/latest
The built-in strategy 'basic_consolidation' provided by watcher requires
"**compute.node.cpu.percent**" and "**cpu_util**" measurements to be collected
"**compute.node.cpu.percent**" and "**cpu**" measurements to be collected
by Ceilometer.
The measurements available depend on the hypervisors that OpenStack manages on
the specific implementation.

View File

@@ -300,6 +300,6 @@ Using that you can now query the values for that specific metric:
.. code-block:: py
avg_meter = self.datasource_backend.statistic_aggregation(
instance.uuid, 'cpu_util', self.periods['instance'],
instance.uuid, 'instance_cpu_usage', self.periods['instance'],
self.granularity,
aggregation=self.aggregation_method['instance'])

View File

@@ -26,8 +26,7 @@ metric service name plugins comment
``compute_monitors`` option
to ``cpu.virt_driver`` in
the nova.conf.
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
============================ ============ ======= ===========================
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute

View File

@@ -22,8 +22,7 @@ The *vm_workload_consolidation* strategy requires the following metrics:
============================ ============ ======= =========================
metric service name plugins comment
============================ ============ ======= =========================
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
``memory.resident`` ceilometer_ none
``memory`` ceilometer_ none
``disk.root.size`` ceilometer_ none

View File

@@ -27,9 +27,8 @@ metric service name plugins comment
to ``cpu.virt_driver`` in the
nova.conf.
``hardware.memory.used`` ceilometer_ SNMP_
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``memory.resident`` ceilometer_ none
``cpu`` ceilometer_ none
``instance_ram_usage`` ceilometer_ none
============================ ============ ======= =============================
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
@@ -107,10 +106,10 @@ parameter type default Value description
period of all received ones.
==================== ====== ===================== =============================
.. |metrics| replace:: ["cpu_util", "memory.resident"]
.. |thresholds| replace:: {"cpu_util": 0.2, "memory.resident": 0.2}
.. |weights| replace:: {"cpu_util_weight": 1.0, "memory.resident_weight": 1.0}
.. |instance_metrics| replace:: {"cpu_util": "compute.node.cpu.percent", "memory.resident": "hardware.memory.used"}
.. |metrics| replace:: ["instance_cpu_usage", "instance_ram_usage"]
.. |thresholds| replace:: {"instance_cpu_usage": 0.2, "instance_ram_usage": 0.2}
.. |weights| replace:: {"instance_cpu_usage_weight": 1.0, "instance_ram_usage_weight": 1.0}
.. |instance_metrics| replace:: {"instance_cpu_usage": "compute.node.cpu.percent", "instance_ram_usage": "hardware.memory.used"}
.. |periods| replace:: {"instance": 720, "node": 600}
Efficacy Indicator
@@ -136,8 +135,8 @@ How to use it ?
at1 workload_balancing --strategy workload_stabilization
$ openstack optimize audit create -a at1 \
-p thresholds='{"memory.resident": 0.05}' \
-p metrics='["memory.resident"]'
-p thresholds='{"instance_ram_usage": 0.05}' \
-p metrics='["instance_ram_usage"]'
External Links
--------------

View File

@@ -24,8 +24,7 @@ The *workload_balance* strategy requires the following metrics:
======================= ============ ======= =========================
metric service name plugins comment
======================= ============ ======= =========================
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
``memory.resident`` ceilometer_ none
======================= ============ ======= =========================
@@ -65,15 +64,16 @@ Configuration
Strategy parameters are:
============== ====== ============= ====================================
============== ====== ==================== ====================================
parameter type default Value description
============== ====== ============= ====================================
``metrics`` String 'cpu_util' Workload balance base on cpu or ram
utilization. choice: ['cpu_util',
'memory.resident']
============== ====== ==================== ====================================
``metrics`` String 'instance_cpu_usage' Workload balance base on cpu or ram
utilization. Choices:
['instance_cpu_usage',
'instance_ram_usage']
``threshold`` Number 25.0 Workload threshold for migration
``period`` Number 300 Aggregate time period of ceilometer
============== ====== ============= ====================================
============== ====== ==================== ====================================
Efficacy Indicator
------------------
@@ -95,7 +95,7 @@ How to use it ?
at1 workload_balancing --strategy workload_balance
$ openstack optimize audit create -a at1 -p threshold=26.0 \
-p period=310 -p metrics=cpu_util
-p period=310 -p metrics=instance_cpu_usage
External Links
--------------

View File

@@ -8,7 +8,7 @@ basepython = python3
usedevelop = True
allowlist_externals = find
rm
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} {opts} {packages}
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2023.2} {opts} {packages}
setenv =
VIRTUAL_ENV={envdir}
deps =

View File

@@ -38,7 +38,7 @@ class GnocchiHelper(base.DataSourceBase):
host_inlet_temp='hardware.ipmi.node.temperature',
host_airflow='hardware.ipmi.node.airflow',
host_power='hardware.ipmi.node.power',
instance_cpu_usage='cpu_util',
instance_cpu_usage='cpu',
instance_ram_usage='memory.resident',
instance_ram_allocated='memory',
instance_l3_cache_usage='cpu_l3_cache',
@@ -93,6 +93,25 @@ class GnocchiHelper(base.DataSourceBase):
resource_id = resources[0]['id']
if meter_name == "instance_cpu_usage":
if resource_type != "instance":
LOG.warning("Unsupported resource type for metric "
"'instance_cpu_usage': ", resource_type)
return
# The "cpu_util" gauge (percentage) metric has been removed.
# We're going to obtain the same result by using the rate of change
# aggregate operation.
if aggregate not in ("mean", "rate:mean"):
LOG.warning("Unsupported aggregate for instance_cpu_usage "
"metric: %s. "
"Supported aggregates: mean, rate:mean ",
aggregate)
return
# TODO(lpetrut): consider supporting other aggregates.
aggregate = "rate:mean"
raw_kwargs = dict(
metric=meter,
start=start_time,
@@ -117,6 +136,17 @@ class GnocchiHelper(base.DataSourceBase):
# Airflow from hardware.ipmi.node.airflow is reported as
# 1/10 th of actual CFM
return_value *= 10
if meter_name == "instance_cpu_usage":
# "rate:mean" can return negative values for migrated vms.
return_value = max(0, return_value)
# We're converting the cumulative cpu time (ns) to cpu usage
# percentage.
vcpus = resource.vcpus
if not vcpus:
LOG.warning("instance vcpu count not set, assuming 1")
vcpus = 1
return_value *= 100 / (granularity * 10e+8) / vcpus
return return_value

View File

@@ -48,7 +48,7 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"type": "array",
"items": {
"anyOf": [
{"$ref": HOST_AGGREGATES + "id"},
{"$ref": HOST_AGGREGATES + "host_aggr_id"},
{"$ref": HOST_AGGREGATES + "name"},
]
}
@@ -98,7 +98,8 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"type": "array",
"items": {
"anyOf": [
{"$ref": HOST_AGGREGATES + "id"},
{"$ref":
HOST_AGGREGATES + "host_aggr_id"},
{"$ref": HOST_AGGREGATES + "name"},
]
}
@@ -129,7 +130,7 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"additionalProperties": False
},
"host_aggregates": {
"id": {
"host_aggr_id": {
"properties": {
"id": {
"oneOf": [

View File

@@ -295,7 +295,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
self.threshold)
return self.solution
# choose the server with largest cpu_util
# choose the server with largest cpu usage
source_nodes = sorted(source_nodes,
reverse=True,
key=lambda x: (x[self._meter]))

View File

@@ -40,17 +40,25 @@ class TestGnocchiHelper(base.BaseTestCase):
self.addCleanup(stat_agg_patcher.stop)
def test_gnocchi_statistic_aggregation(self, mock_gnocchi):
vcpus = 2
mock_instance = mock.Mock(
id='16a86790-327a-45f9-bc82-45839f062fdc',
vcpus=vcpus)
gnocchi = mock.MagicMock()
# cpu time rate of change (ns)
mock_rate_measure = 360 * 10e+8 * vcpus * 5.5 / 100
expected_result = 5.5
expected_measures = [["2017-02-02T09:00:00.000000", 360, 5.5]]
expected_measures = [
["2017-02-02T09:00:00.000000", 360, mock_rate_measure]]
gnocchi.metric.get_measures.return_value = expected_measures
mock_gnocchi.return_value = gnocchi
helper = gnocchi_helper.GnocchiHelper()
result = helper.statistic_aggregation(
resource=mock.Mock(id='16a86790-327a-45f9-bc82-45839f062fdc'),
resource=mock_instance,
resource_type='instance',
meter_name='instance_cpu_usage',
period=300,
@@ -59,6 +67,14 @@ class TestGnocchiHelper(base.BaseTestCase):
)
self.assertEqual(expected_result, result)
gnocchi.metric.get_measures.assert_called_once_with(
metric="cpu",
start=mock.ANY,
stop=mock.ANY,
resource_id=mock_instance.uuid,
granularity=360,
aggregation="rate:mean")
def test_gnocchi_statistic_series(self, mock_gnocchi):
gnocchi = mock.MagicMock()
expected_result = {