Compare commits

..

15 Commits

Author SHA1 Message Date
Zuul
090b695956 Merge "Handle deprecated "cpu_util" metric" into stable/2023.2 2025-02-04 07:54:48 +00:00
Lucian Petrut
3874ca82de Unblock the CI gate
The Nova collector json schema validation started [1][2] failing after
the jsonschema upper constraint was bumped from 4.17.3 to 4.19.1 [3].

The reason is that jsonschema v4.18.0a1 switched to a reference
resolving library [4], which treats the aggregate "id" as a jsonschema
id and expects it to be a string [5]. For this reason, we're now getting
AttributeError exceptions.

As a workaround, we'll rename the "id" ref element as "host_aggr_id".

Also, the watcher-tempest-multinode job is configured to use Focal,
which is no longer supported by Devstack [6]. That being considered,
we'll switch to Ubuntu Jammy (22.04).

While at it, we're disabling Cinder Backup, which isn't used while
testing Watched. It currently causes Devstack failures since it
uses the Swift backend by default, which is disabled.

[1] https://paste.opendev.org/raw/bjQ1uIdbDMnmA1UEhxLL/
[2] https://paste.opendev.org/raw/bNgxqulBwBLYB7tNhrU4/
[3] ab0dcbdda2
[4] https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
[5] c23a5dc1c9/referencing/jsonschema.py (L54-L55C18)
[6] https://paste.openstack.org/raw/bSoSyXgbtmq6d9768HQn/

Change-Id: I300620c2ec4857b1e0d402a9b57a637f576eeb24
(cherry picked from commit 922478fbda)
2024-11-13 18:06:57 +00:00
Lucian Petrut
b5431b2e53 Handle deprecated "cpu_util" metric
The "cpu_util" metric has been deprecated a few years ago.
We'll obtain the same result by converting the cumulative cpu
time to a percentage, leveraging the rate of change aggregation.

Change-Id: I18fe0de6f74c785e674faceea0c48f44055818fe
(cherry picked from commit 00fea975e2)
2024-11-05 19:41:59 +00:00
OpenStack Release Bot
c3640ed64e Update TOX_CONSTRAINTS_FILE for stable/2023.2
Update the URL to the upper-constraints file to point to the redirect
rule on releases.openstack.org so that anyone working on this branch
will switch to the correct upper-constraints list automatically when
the requirements repository branches.

Until the requirements repository has as stable/2023.2 branch, tests will
continue to use the upper-constraints list on master.

Change-Id: I92ae6b2f247ac85ce8c5a6fe64d1894eab4e76e9
2023-09-14 01:24:42 +00:00
OpenStack Release Bot
1f8566a9ea Update .gitreview for stable/2023.2
Change-Id: I0f3d01f625b3ccbc43ebc22776ea1a6c66ca4616
2023-09-14 01:24:41 +00:00
Zuul
1e11c490a7 Merge "Add timeout option for Grafana request" 2023-08-29 11:21:46 +00:00
Zuul
8a7a8db661 Merge "Imported Translations from Zanata" 2023-08-28 06:21:40 +00:00
BubaVV
0610070e59 Add timeout option for Grafana request
Implemented config option to setup Grafana API request timeout

Change-Id: I8cbf8ce22f199fe22c0b162ba1f419169881f193
2023-08-23 17:46:19 +03:00
OpenStack Proposal Bot
a0997a0423 Imported Translations from Zanata
For more information about this automatic import see:
https://docs.openstack.org/i18n/latest/reviewing-translation-import.html

Change-Id: I37201577bd8d9c53db8ce6700f47d911359da6d2
2023-08-14 04:24:29 +00:00
chenker
4ea3eada3e Fix watcher comment
Change-Id: I4512cf1032e08934886d5e3ca858b3e05c3da76c
2023-08-13 00:00:12 +00:00
Zuul
cd1c0f3054 Merge "Imported Translations from Zanata" 2023-03-08 07:04:33 +00:00
OpenStack Proposal Bot
684350977d Imported Translations from Zanata
For more information about this automatic import see:
https://docs.openstack.org/i18n/latest/reviewing-translation-import.html

Change-Id: I4ee251e6d37a1b955c22dc6fdc04c1a08c9ae9b8
2023-03-02 03:28:31 +00:00
OpenStack Release Bot
d28630b759 Update master for stable/2023.1
Add file to the reno documentation build to show release notes for
stable/2023.1.

Use pbr instruction to increment the minor version number
automatically so that master versions are higher than the versions on
stable/2023.1.

Sem-Ver: feature
Change-Id: Ia585893e7fef42e9991a2b81f604d1ff28c0a5ad
2023-02-28 13:31:08 +00:00
Zuul
f7fbaf46a2 Merge "Use new get_rpc_client API from oslo.messaging" 2023-02-09 01:25:15 +00:00
Tobias Urdin
e36b77ad6d Use new get_rpc_client API from oslo.messaging
Use the new API that is consistent with
the existing API instead of instantiating the client
class directly.

This was introduced in release 14.1.0 here [1] and
added into oslo.messaging here [2]

[1] https://review.opendev.org/c/openstack/requirements/+/869340
[2] https://review.opendev.org/c/openstack/oslo.messaging/+/862419

Change-Id: I43c399a0c68473e40b8b71e9617c8334a439e675
2023-01-19 20:50:26 +00:00
21 changed files with 110 additions and 46 deletions

View File

@@ -2,3 +2,4 @@
host=review.opendev.org
port=29418
project=openstack/watcher.git
defaultbranch=stable/2023.2

View File

@@ -89,7 +89,7 @@
- job:
name: watcher-tempest-multinode
parent: watcher-tempest-functional
nodeset: openstack-two-node-focal
nodeset: openstack-two-node-jammy
roles:
- zuul: openstack/tempest
group-vars:
@@ -107,6 +107,7 @@
watcher-api: false
watcher-decision-engine: true
watcher-applier: false
c-bak: false
ceilometer: false
ceilometer-acompute: false
ceilometer-acentral: false

View File

@@ -372,7 +372,7 @@ You can configure and install Ceilometer by following the documentation below :
#. https://docs.openstack.org/ceilometer/latest
The built-in strategy 'basic_consolidation' provided by watcher requires
"**compute.node.cpu.percent**" and "**cpu_util**" measurements to be collected
"**compute.node.cpu.percent**" and "**cpu**" measurements to be collected
by Ceilometer.
The measurements available depend on the hypervisors that OpenStack manages on
the specific implementation.

View File

@@ -300,6 +300,6 @@ Using that you can now query the values for that specific metric:
.. code-block:: py
avg_meter = self.datasource_backend.statistic_aggregation(
instance.uuid, 'cpu_util', self.periods['instance'],
instance.uuid, 'instance_cpu_usage', self.periods['instance'],
self.granularity,
aggregation=self.aggregation_method['instance'])

View File

@@ -26,8 +26,7 @@ metric service name plugins comment
``compute_monitors`` option
to ``cpu.virt_driver`` in
the nova.conf.
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
============================ ============ ======= ===========================
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute

View File

@@ -22,8 +22,7 @@ The *vm_workload_consolidation* strategy requires the following metrics:
============================ ============ ======= =========================
metric service name plugins comment
============================ ============ ======= =========================
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
``memory.resident`` ceilometer_ none
``memory`` ceilometer_ none
``disk.root.size`` ceilometer_ none

View File

@@ -27,9 +27,8 @@ metric service name plugins comment
to ``cpu.virt_driver`` in the
nova.conf.
``hardware.memory.used`` ceilometer_ SNMP_
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``memory.resident`` ceilometer_ none
``cpu`` ceilometer_ none
``instance_ram_usage`` ceilometer_ none
============================ ============ ======= =============================
.. _ceilometer: https://docs.openstack.org/ceilometer/latest/admin/telemetry-measurements.html#openstack-compute
@@ -107,10 +106,10 @@ parameter type default Value description
period of all received ones.
==================== ====== ===================== =============================
.. |metrics| replace:: ["cpu_util", "memory.resident"]
.. |thresholds| replace:: {"cpu_util": 0.2, "memory.resident": 0.2}
.. |weights| replace:: {"cpu_util_weight": 1.0, "memory.resident_weight": 1.0}
.. |instance_metrics| replace:: {"cpu_util": "compute.node.cpu.percent", "memory.resident": "hardware.memory.used"}
.. |metrics| replace:: ["instance_cpu_usage", "instance_ram_usage"]
.. |thresholds| replace:: {"instance_cpu_usage": 0.2, "instance_ram_usage": 0.2}
.. |weights| replace:: {"instance_cpu_usage_weight": 1.0, "instance_ram_usage_weight": 1.0}
.. |instance_metrics| replace:: {"instance_cpu_usage": "compute.node.cpu.percent", "instance_ram_usage": "hardware.memory.used"}
.. |periods| replace:: {"instance": 720, "node": 600}
Efficacy Indicator
@@ -136,8 +135,8 @@ How to use it ?
at1 workload_balancing --strategy workload_stabilization
$ openstack optimize audit create -a at1 \
-p thresholds='{"memory.resident": 0.05}' \
-p metrics='["memory.resident"]'
-p thresholds='{"instance_ram_usage": 0.05}' \
-p metrics='["instance_ram_usage"]'
External Links
--------------

View File

@@ -24,8 +24,7 @@ The *workload_balance* strategy requires the following metrics:
======================= ============ ======= =========================
metric service name plugins comment
======================= ============ ======= =========================
``cpu_util`` ceilometer_ none cpu_util has been removed
since Stein.
``cpu`` ceilometer_ none
``memory.resident`` ceilometer_ none
======================= ============ ======= =========================
@@ -65,15 +64,16 @@ Configuration
Strategy parameters are:
============== ====== ============= ====================================
parameter type default Value description
============== ====== ============= ====================================
``metrics`` String 'cpu_util' Workload balance base on cpu or ram
utilization. choice: ['cpu_util',
'memory.resident']
``threshold`` Number 25.0 Workload threshold for migration
``period`` Number 300 Aggregate time period of ceilometer
============== ====== ============= ====================================
============== ====== ==================== ====================================
parameter type default Value description
============== ====== ==================== ====================================
``metrics`` String 'instance_cpu_usage' Workload balance base on cpu or ram
utilization. Choices:
['instance_cpu_usage',
'instance_ram_usage']
``threshold`` Number 25.0 Workload threshold for migration
``period`` Number 300 Aggregate time period of ceilometer
============== ====== ==================== ====================================
Efficacy Indicator
------------------
@@ -95,7 +95,7 @@ How to use it ?
at1 workload_balancing --strategy workload_balance
$ openstack optimize audit create -a at1 -p threshold=26.0 \
-p period=310 -p metrics=cpu_util
-p period=310 -p metrics=instance_cpu_usage
External Links
--------------

View File

@@ -0,0 +1,6 @@
===========================
2023.1 Series Release Notes
===========================
.. release-notes::
:branch: stable/2023.1

View File

@@ -21,6 +21,7 @@ Contents:
:maxdepth: 1
unreleased
2023.1
zed
yoga
xena

View File

@@ -2,15 +2,16 @@
# Andi Chandler <andi@gowling.com>, 2018. #zanata
# Andi Chandler <andi@gowling.com>, 2020. #zanata
# Andi Chandler <andi@gowling.com>, 2022. #zanata
# Andi Chandler <andi@gowling.com>, 2023. #zanata
msgid ""
msgstr ""
"Project-Id-Version: python-watcher\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2022-08-29 03:02+0000\n"
"POT-Creation-Date: 2023-08-14 03:05+0000\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"PO-Revision-Date: 2022-05-31 08:39+0000\n"
"PO-Revision-Date: 2023-06-21 07:54+0000\n"
"Last-Translator: Andi Chandler <andi@gowling.com>\n"
"Language-Team: English (United Kingdom)\n"
"Language: en_GB\n"
@@ -59,6 +60,9 @@ msgstr "1.9.0"
msgid "2.0.0"
msgstr "2.0.0"
msgid "2023.1 Series Release Notes"
msgstr "2023.1 Series Release Notes"
msgid "3.0.0"
msgstr "3.0.0"
@@ -969,6 +973,9 @@ msgstr "Xena Series Release Notes"
msgid "Yoga Series Release Notes"
msgstr "Yoga Series Release Notes"
msgid "Zed Series Release Notes"
msgstr "Zed Series Release Notes"
msgid "``[watcher_datasources] datasources = gnocchi,monasca,ceilometer``"
msgstr "``[watcher_datasources] datasources = gnocchi,monasca,ceilometer``"

View File

@@ -17,7 +17,7 @@ oslo.context>=2.21.0 # Apache-2.0
oslo.db>=4.44.0 # Apache-2.0
oslo.i18n>=3.20.0 # Apache-2.0
oslo.log>=3.37.0 # Apache-2.0
oslo.messaging>=8.1.2 # Apache-2.0
oslo.messaging>=14.1.0 # Apache-2.0
oslo.policy>=3.6.0 # Apache-2.0
oslo.reports>=1.27.0 # Apache-2.0
oslo.serialization>=2.25.0 # Apache-2.0

View File

@@ -8,7 +8,7 @@ basepython = python3
usedevelop = True
allowlist_externals = find
rm
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} {opts} {packages}
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2023.2} {opts} {packages}
setenv =
VIRTUAL_ENV={envdir}
deps =
@@ -30,7 +30,7 @@ passenv =
commands =
doc8 doc/source/ CONTRIBUTING.rst HACKING.rst README.rst
flake8
bandit -r watcher -x watcher/tests/* -n5 -ll -s B320
#bandit -r watcher -x watcher/tests/* -n5 -ll -s B320
[testenv:venv]
setenv = PYTHONHASHSEED=0

View File

@@ -121,7 +121,7 @@ class RequestContextSerializer(messaging.Serializer):
def get_client(target, version_cap=None, serializer=None):
assert TRANSPORT is not None
serializer = RequestContextSerializer(serializer)
return messaging.RPCClient(
return messaging.get_rpc_client(
TRANSPORT,
target,
version_cap=version_cap,

View File

@@ -134,7 +134,13 @@ GRAFANA_CLIENT_OPTS = [
"InfluxDB this will be the retention period. "
"These queries will need to be constructed using tools "
"such as Postman. Example: SELECT cpu FROM {4}."
"cpu_percent WHERE host == '{1}' AND time > now()-{2}s")]
"cpu_percent WHERE host == '{1}' AND time > now()-{2}s"),
cfg.IntOpt('http_timeout',
min=0,
default=60,
mutable=True,
help='Timeout for Grafana request')
]
def register_opts(conf):

View File

@@ -38,7 +38,7 @@ class GnocchiHelper(base.DataSourceBase):
host_inlet_temp='hardware.ipmi.node.temperature',
host_airflow='hardware.ipmi.node.airflow',
host_power='hardware.ipmi.node.power',
instance_cpu_usage='cpu_util',
instance_cpu_usage='cpu',
instance_ram_usage='memory.resident',
instance_ram_allocated='memory',
instance_l3_cache_usage='cpu_l3_cache',
@@ -93,6 +93,25 @@ class GnocchiHelper(base.DataSourceBase):
resource_id = resources[0]['id']
if meter_name == "instance_cpu_usage":
if resource_type != "instance":
LOG.warning("Unsupported resource type for metric "
"'instance_cpu_usage': ", resource_type)
return
# The "cpu_util" gauge (percentage) metric has been removed.
# We're going to obtain the same result by using the rate of change
# aggregate operation.
if aggregate not in ("mean", "rate:mean"):
LOG.warning("Unsupported aggregate for instance_cpu_usage "
"metric: %s. "
"Supported aggregates: mean, rate:mean ",
aggregate)
return
# TODO(lpetrut): consider supporting other aggregates.
aggregate = "rate:mean"
raw_kwargs = dict(
metric=meter,
start=start_time,
@@ -117,6 +136,17 @@ class GnocchiHelper(base.DataSourceBase):
# Airflow from hardware.ipmi.node.airflow is reported as
# 1/10 th of actual CFM
return_value *= 10
if meter_name == "instance_cpu_usage":
# "rate:mean" can return negative values for migrated vms.
return_value = max(0, return_value)
# We're converting the cumulative cpu time (ns) to cpu usage
# percentage.
vcpus = resource.vcpus
if not vcpus:
LOG.warning("instance vcpu count not set, assuming 1")
vcpus = 1
return_value *= 100 / (granularity * 10e+8) / vcpus
return return_value

View File

@@ -138,7 +138,8 @@ class GrafanaHelper(base.DataSourceBase):
raise exception.DataSourceNotAvailable(self.NAME)
resp = requests.get(self._base_url + str(project_id) + '/query',
params=params, headers=self._headers)
params=params, headers=self._headers,
timeout=CONF.grafana_client.http_timeout)
if resp.status_code == HTTPStatus.OK:
return resp
elif resp.status_code == HTTPStatus.BAD_REQUEST:

View File

@@ -48,7 +48,7 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"type": "array",
"items": {
"anyOf": [
{"$ref": HOST_AGGREGATES + "id"},
{"$ref": HOST_AGGREGATES + "host_aggr_id"},
{"$ref": HOST_AGGREGATES + "name"},
]
}
@@ -98,7 +98,8 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"type": "array",
"items": {
"anyOf": [
{"$ref": HOST_AGGREGATES + "id"},
{"$ref":
HOST_AGGREGATES + "host_aggr_id"},
{"$ref": HOST_AGGREGATES + "name"},
]
}
@@ -129,7 +130,7 @@ class NovaClusterDataModelCollector(base.BaseClusterDataModelCollector):
"additionalProperties": False
},
"host_aggregates": {
"id": {
"host_aggr_id": {
"properties": {
"id": {
"oneOf": [

View File

@@ -252,9 +252,6 @@ class BaseStrategy(loadable.Loadable, metaclass=abc.ABCMeta):
if not self.compute_model:
raise exception.ClusterStateNotDefined()
if self.compute_model.stale:
raise exception.ClusterStateStale()
LOG.debug(self.compute_model.to_string())
def execute(self, audit=None):

View File

@@ -295,7 +295,7 @@ class WorkloadBalance(base.WorkloadStabilizationBaseStrategy):
self.threshold)
return self.solution
# choose the server with largest cpu_util
# choose the server with largest cpu usage
source_nodes = sorted(source_nodes,
reverse=True,
key=lambda x: (x[self._meter]))

View File

@@ -40,17 +40,25 @@ class TestGnocchiHelper(base.BaseTestCase):
self.addCleanup(stat_agg_patcher.stop)
def test_gnocchi_statistic_aggregation(self, mock_gnocchi):
vcpus = 2
mock_instance = mock.Mock(
id='16a86790-327a-45f9-bc82-45839f062fdc',
vcpus=vcpus)
gnocchi = mock.MagicMock()
# cpu time rate of change (ns)
mock_rate_measure = 360 * 10e+8 * vcpus * 5.5 / 100
expected_result = 5.5
expected_measures = [["2017-02-02T09:00:00.000000", 360, 5.5]]
expected_measures = [
["2017-02-02T09:00:00.000000", 360, mock_rate_measure]]
gnocchi.metric.get_measures.return_value = expected_measures
mock_gnocchi.return_value = gnocchi
helper = gnocchi_helper.GnocchiHelper()
result = helper.statistic_aggregation(
resource=mock.Mock(id='16a86790-327a-45f9-bc82-45839f062fdc'),
resource=mock_instance,
resource_type='instance',
meter_name='instance_cpu_usage',
period=300,
@@ -59,6 +67,14 @@ class TestGnocchiHelper(base.BaseTestCase):
)
self.assertEqual(expected_result, result)
gnocchi.metric.get_measures.assert_called_once_with(
metric="cpu",
start=mock.ANY,
stop=mock.ANY,
resource_id=mock_instance.uuid,
granularity=360,
aggregation="rate:mean")
def test_gnocchi_statistic_series(self, mock_gnocchi):
gnocchi = mock.MagicMock()
expected_result = {