Compare commits
28 Commits
master
...
stable/202
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
024815af71 | ||
|
|
ffec800f59 | ||
|
|
defd3953d8 | ||
|
|
de75a2a5b2 | ||
|
|
7c15812d68 | ||
|
|
c47f6fb66c | ||
|
|
a4ece6f084 | ||
|
|
758acdfb99 | ||
|
|
b65cfc283a | ||
|
|
f5a21ba43e | ||
|
|
ba417b38bf | ||
|
|
38622442f2 | ||
|
|
c7fde92411 | ||
|
|
e5b5ff5d56 | ||
|
|
3a923dbf16 | ||
|
|
fb85b27ae3 | ||
|
|
53872f9af2 | ||
|
|
c0ebb8ddb3 | ||
|
|
1d7f163651 | ||
|
|
c6ceaacf27 | ||
|
|
f4bfb10525 | ||
|
|
8a99d4c5c1 | ||
|
|
ce9f0b4c1e | ||
|
|
e385ece629 | ||
|
|
c6505ad06f | ||
|
|
64f70b948d | ||
|
|
68c9ce65d2 | ||
|
|
5fa0926528 |
@@ -2,3 +2,4 @@
|
|||||||
host=review.opendev.org
|
host=review.opendev.org
|
||||||
port=29418
|
port=29418
|
||||||
project=openstack/watcher.git
|
project=openstack/watcher.git
|
||||||
|
defaultbranch=stable/2025.1
|
||||||
|
|||||||
17
.zuul.yaml
17
.zuul.yaml
@@ -34,6 +34,7 @@
|
|||||||
vars:
|
vars:
|
||||||
tempest_concurrency: 1
|
tempest_concurrency: 1
|
||||||
tempest_test_regex: watcher_tempest_plugin.tests.scenario.test_execute_strategies
|
tempest_test_regex: watcher_tempest_plugin.tests.scenario.test_execute_strategies
|
||||||
|
tempest_exclude_regex: .*\[.*\breal_load\b.*\].*
|
||||||
|
|
||||||
- job:
|
- job:
|
||||||
name: watcher-tempest-multinode
|
name: watcher-tempest-multinode
|
||||||
@@ -213,10 +214,12 @@
|
|||||||
CEILOMETER_BACKENDS: "sg-core"
|
CEILOMETER_BACKENDS: "sg-core"
|
||||||
CEILOMETER_PIPELINE_INTERVAL: 15
|
CEILOMETER_PIPELINE_INTERVAL: 15
|
||||||
CEILOMETER_ALARM_THRESHOLD: 6000000000
|
CEILOMETER_ALARM_THRESHOLD: 6000000000
|
||||||
NODE_EXPORTER_ENABLE: false
|
|
||||||
PROMETHEUS_ENABLE: false
|
|
||||||
PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core,node-exporter"
|
|
||||||
PROMETHEUS_CONFIG_FILE: "/home/zuul/prometheus.yml"
|
PROMETHEUS_CONFIG_FILE: "/home/zuul/prometheus.yml"
|
||||||
|
# Disable sg_core prometheus config copy
|
||||||
|
PROMETHEUS_ENABLE: false
|
||||||
|
# PROMETHEUS_CONFIG_FILE var conflicts with sg_core var
|
||||||
|
# to avoid issue, set PROMETHEUS_CONF_DIR
|
||||||
|
PROMETHEUS_CONF_DIR: "/home/zuul"
|
||||||
devstack_local_conf:
|
devstack_local_conf:
|
||||||
post-config:
|
post-config:
|
||||||
$WATCHER_CONF:
|
$WATCHER_CONF:
|
||||||
@@ -253,6 +256,7 @@
|
|||||||
tempest_plugins:
|
tempest_plugins:
|
||||||
- watcher-tempest-plugin
|
- watcher-tempest-plugin
|
||||||
tempest_test_regex: watcher_tempest_plugin.tests.scenario.test_execute_strategies
|
tempest_test_regex: watcher_tempest_plugin.tests.scenario.test_execute_strategies
|
||||||
|
tempest_exclude_regex: .*\[.*\breal_load\b.*\].*
|
||||||
tempest_concurrency: 1
|
tempest_concurrency: 1
|
||||||
tox_envlist: all
|
tox_envlist: all
|
||||||
zuul_copy_output:
|
zuul_copy_output:
|
||||||
@@ -261,7 +265,6 @@
|
|||||||
subnode:
|
subnode:
|
||||||
devstack_plugins:
|
devstack_plugins:
|
||||||
ceilometer: https://opendev.org/openstack/ceilometer
|
ceilometer: https://opendev.org/openstack/ceilometer
|
||||||
sg-core: https://github.com/openstack-k8s-operators/sg-core
|
|
||||||
devstack-plugin-prometheus: https://opendev.org/openstack/devstack-plugin-prometheus
|
devstack-plugin-prometheus: https://opendev.org/openstack/devstack-plugin-prometheus
|
||||||
devstack_services:
|
devstack_services:
|
||||||
ceilometer-acompute: true
|
ceilometer-acompute: true
|
||||||
@@ -271,9 +274,9 @@
|
|||||||
devstack_localrc:
|
devstack_localrc:
|
||||||
CEILOMETER_BACKEND: "none"
|
CEILOMETER_BACKEND: "none"
|
||||||
CEILOMETER_BACKENDS: "none"
|
CEILOMETER_BACKENDS: "none"
|
||||||
# sg_core related var
|
# avoid collecting real host cpu metric since tests
|
||||||
NODE_EXPORTER_ENABLE: false
|
# will inject fake metrics when needed
|
||||||
PROMETHEUS_ENABLE: false
|
NODE_EXPORTER_COLLECTOR_EXCLUDE: "cpu"
|
||||||
devstack_local_conf:
|
devstack_local_conf:
|
||||||
post-config:
|
post-config:
|
||||||
$WATCHER_CONF:
|
$WATCHER_CONF:
|
||||||
|
|||||||
@@ -55,11 +55,7 @@ else
|
|||||||
WATCHER_BIN_DIR=$(get_python_exec_prefix)
|
WATCHER_BIN_DIR=$(get_python_exec_prefix)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# There are 2 modes, which is "uwsgi" which runs with an apache
|
WATCHER_UWSGI=watcher.wsgi.api:application
|
||||||
# proxy uwsgi in front of it, or "mod_wsgi", which runs in
|
|
||||||
# apache. mod_wsgi is deprecated, don't use it.
|
|
||||||
WATCHER_USE_WSGI_MODE=${WATCHER_USE_WSGI_MODE:-$WSGI_MODE}
|
|
||||||
WATCHER_UWSGI=$WATCHER_BIN_DIR/watcher-api-wsgi
|
|
||||||
WATCHER_UWSGI_CONF=$WATCHER_CONF_DIR/watcher-uwsgi.ini
|
WATCHER_UWSGI_CONF=$WATCHER_CONF_DIR/watcher-uwsgi.ini
|
||||||
|
|
||||||
if is_suse; then
|
if is_suse; then
|
||||||
@@ -73,11 +69,7 @@ WATCHER_SERVICE_PORT=${WATCHER_SERVICE_PORT:-9322}
|
|||||||
WATCHER_SERVICE_PORT_INT=${WATCHER_SERVICE_PORT_INT:-19322}
|
WATCHER_SERVICE_PORT_INT=${WATCHER_SERVICE_PORT_INT:-19322}
|
||||||
WATCHER_SERVICE_PROTOCOL=${WATCHER_SERVICE_PROTOCOL:-$SERVICE_PROTOCOL}
|
WATCHER_SERVICE_PROTOCOL=${WATCHER_SERVICE_PROTOCOL:-$SERVICE_PROTOCOL}
|
||||||
|
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "uwsgi" ]]; then
|
WATCHER_API_URL="$WATCHER_SERVICE_PROTOCOL://$WATCHER_SERVICE_HOST/infra-optim"
|
||||||
WATCHER_API_URL="$WATCHER_SERVICE_PROTOCOL://$WATCHER_SERVICE_HOST/infra-optim"
|
|
||||||
else
|
|
||||||
WATCHER_API_URL="$WATCHER_SERVICE_PROTOCOL://$WATCHER_SERVICE_HOST:$WATCHER_SERVICE_PORT"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Entry Points
|
# Entry Points
|
||||||
# ------------
|
# ------------
|
||||||
@@ -101,11 +93,7 @@ function _cleanup_watcher_apache_wsgi {
|
|||||||
# runs that a clean run would need to clean up
|
# runs that a clean run would need to clean up
|
||||||
function cleanup_watcher {
|
function cleanup_watcher {
|
||||||
sudo rm -rf $WATCHER_STATE_PATH
|
sudo rm -rf $WATCHER_STATE_PATH
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "uwsgi" ]]; then
|
|
||||||
remove_uwsgi_config "$WATCHER_UWSGI_CONF" "$WATCHER_UWSGI"
|
remove_uwsgi_config "$WATCHER_UWSGI_CONF" "$WATCHER_UWSGI"
|
||||||
else
|
|
||||||
_cleanup_watcher_apache_wsgi
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# configure_watcher() - Set config files, create data dirs, etc
|
# configure_watcher() - Set config files, create data dirs, etc
|
||||||
@@ -154,31 +142,6 @@ function create_watcher_accounts {
|
|||||||
"$WATCHER_API_URL"
|
"$WATCHER_API_URL"
|
||||||
}
|
}
|
||||||
|
|
||||||
# _config_watcher_apache_wsgi() - Set WSGI config files of watcher
|
|
||||||
function _config_watcher_apache_wsgi {
|
|
||||||
local watcher_apache_conf
|
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "mod_wsgi" ]]; then
|
|
||||||
local service_port=$WATCHER_SERVICE_PORT
|
|
||||||
if is_service_enabled tls-proxy; then
|
|
||||||
service_port=$WATCHER_SERVICE_PORT_INT
|
|
||||||
service_protocol="http"
|
|
||||||
fi
|
|
||||||
sudo mkdir -p $WATCHER_WSGI_DIR
|
|
||||||
sudo cp $WATCHER_DIR/watcher/api/app.wsgi $WATCHER_WSGI_DIR/app.wsgi
|
|
||||||
watcher_apache_conf=$(apache_site_config_for watcher-api)
|
|
||||||
sudo cp $WATCHER_DEVSTACK_FILES_DIR/apache-watcher-api.template $watcher_apache_conf
|
|
||||||
sudo sed -e "
|
|
||||||
s|%WATCHER_SERVICE_PORT%|$service_port|g;
|
|
||||||
s|%WATCHER_WSGI_DIR%|$WATCHER_WSGI_DIR|g;
|
|
||||||
s|%USER%|$STACK_USER|g;
|
|
||||||
s|%APIWORKERS%|$API_WORKERS|g;
|
|
||||||
s|%APACHE_NAME%|$APACHE_NAME|g;
|
|
||||||
" -i $watcher_apache_conf
|
|
||||||
enable_apache_site watcher-api
|
|
||||||
fi
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# create_watcher_conf() - Create a new watcher.conf file
|
# create_watcher_conf() - Create a new watcher.conf file
|
||||||
function create_watcher_conf {
|
function create_watcher_conf {
|
||||||
# (Re)create ``watcher.conf``
|
# (Re)create ``watcher.conf``
|
||||||
@@ -196,11 +159,6 @@ function create_watcher_conf {
|
|||||||
iniset $WATCHER_CONF api host "$(ipv6_unquote $WATCHER_SERVICE_HOST)"
|
iniset $WATCHER_CONF api host "$(ipv6_unquote $WATCHER_SERVICE_HOST)"
|
||||||
iniset $WATCHER_CONF api port "$WATCHER_SERVICE_PORT_INT"
|
iniset $WATCHER_CONF api port "$WATCHER_SERVICE_PORT_INT"
|
||||||
# iniset $WATCHER_CONF api enable_ssl_api "True"
|
# iniset $WATCHER_CONF api enable_ssl_api "True"
|
||||||
else
|
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "mod_wsgi" ]]; then
|
|
||||||
iniset $WATCHER_CONF api host "$(ipv6_unquote $WATCHER_SERVICE_HOST)"
|
|
||||||
iniset $WATCHER_CONF api port "$WATCHER_SERVICE_PORT"
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
iniset $WATCHER_CONF oslo_policy policy_file $WATCHER_POLICY_YAML
|
iniset $WATCHER_CONF oslo_policy policy_file $WATCHER_POLICY_YAML
|
||||||
@@ -228,12 +186,8 @@ function create_watcher_conf {
|
|||||||
# Format logging
|
# Format logging
|
||||||
setup_logging $WATCHER_CONF
|
setup_logging $WATCHER_CONF
|
||||||
|
|
||||||
#config apache files
|
write_uwsgi_config "$WATCHER_UWSGI_CONF" "$WATCHER_UWSGI" "/infra-optim" "" "watcher-api"
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "uwsgi" ]]; then
|
|
||||||
write_uwsgi_config "$WATCHER_UWSGI_CONF" "$WATCHER_UWSGI" "/infra-optim"
|
|
||||||
else
|
|
||||||
_config_watcher_apache_wsgi
|
|
||||||
fi
|
|
||||||
# Register SSL certificates if provided
|
# Register SSL certificates if provided
|
||||||
if is_ssl_enabled_service watcher; then
|
if is_ssl_enabled_service watcher; then
|
||||||
ensure_certificates WATCHER
|
ensure_certificates WATCHER
|
||||||
@@ -273,9 +227,6 @@ function install_watcherclient {
|
|||||||
function install_watcher {
|
function install_watcher {
|
||||||
git_clone $WATCHER_REPO $WATCHER_DIR $WATCHER_BRANCH
|
git_clone $WATCHER_REPO $WATCHER_DIR $WATCHER_BRANCH
|
||||||
setup_develop $WATCHER_DIR
|
setup_develop $WATCHER_DIR
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "mod_wsgi" ]]; then
|
|
||||||
install_apache_wsgi
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# start_watcher_api() - Start the API process ahead of other things
|
# start_watcher_api() - Start the API process ahead of other things
|
||||||
@@ -289,19 +240,10 @@ function start_watcher_api {
|
|||||||
service_port=$WATCHER_SERVICE_PORT_INT
|
service_port=$WATCHER_SERVICE_PORT_INT
|
||||||
service_protocol="http"
|
service_protocol="http"
|
||||||
fi
|
fi
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "uwsgi" ]]; then
|
|
||||||
run_process "watcher-api" "$(which uwsgi) --procname-prefix watcher-api --ini $WATCHER_UWSGI_CONF"
|
run_process "watcher-api" "$(which uwsgi) --procname-prefix watcher-api --ini $WATCHER_UWSGI_CONF"
|
||||||
watcher_url=$service_protocol://$SERVICE_HOST/infra-optim
|
watcher_url=$service_protocol://$SERVICE_HOST/infra-optim
|
||||||
else
|
# TODO(sean-k-mooney): we should probably check that we can hit
|
||||||
watcher_url=$service_protocol://$SERVICE_HOST:$service_port
|
# the microversion endpoint and get a valid response.
|
||||||
enable_apache_site watcher-api
|
|
||||||
restart_apache_server
|
|
||||||
# Start proxies if enabled
|
|
||||||
if is_service_enabled tls-proxy; then
|
|
||||||
start_tls_proxy watcher '*' $WATCHER_SERVICE_PORT $WATCHER_SERVICE_HOST $WATCHER_SERVICE_PORT_INT
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Waiting for watcher-api to start..."
|
echo "Waiting for watcher-api to start..."
|
||||||
if ! wait_for_service $SERVICE_TIMEOUT $watcher_url; then
|
if ! wait_for_service $SERVICE_TIMEOUT $watcher_url; then
|
||||||
die $LINENO "watcher-api did not start"
|
die $LINENO "watcher-api did not start"
|
||||||
@@ -319,17 +261,25 @@ function start_watcher {
|
|||||||
|
|
||||||
# stop_watcher() - Stop running processes (non-screen)
|
# stop_watcher() - Stop running processes (non-screen)
|
||||||
function stop_watcher {
|
function stop_watcher {
|
||||||
if [[ "$WATCHER_USE_WSGI_MODE" == "uwsgi" ]]; then
|
|
||||||
stop_process watcher-api
|
stop_process watcher-api
|
||||||
else
|
|
||||||
disable_apache_site watcher-api
|
|
||||||
restart_apache_server
|
|
||||||
fi
|
|
||||||
for serv in watcher-decision-engine watcher-applier; do
|
for serv in watcher-decision-engine watcher-applier; do
|
||||||
stop_process $serv
|
stop_process $serv
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# configure_tempest_for_watcher() - Configure Tempest for watcher
|
||||||
|
function configure_tempest_for_watcher {
|
||||||
|
# Set default microversion for watcher-tempest-plugin
|
||||||
|
# Please make sure to update this when the microversion is updated, otherwise
|
||||||
|
# new tests may be skipped.
|
||||||
|
TEMPEST_WATCHER_MIN_MICROVERSION=${TEMPEST_WATCHER_MIN_MICROVERSION:-"1.0"}
|
||||||
|
TEMPEST_WATCHER_MAX_MICROVERSION=${TEMPEST_WATCHER_MAX_MICROVERSION:-"1.4"}
|
||||||
|
|
||||||
|
# Set microversion options in tempest.conf
|
||||||
|
iniset $TEMPEST_CONFIG optimize min_microversion $TEMPEST_WATCHER_MIN_MICROVERSION
|
||||||
|
iniset $TEMPEST_CONFIG optimize max_microversion $TEMPEST_WATCHER_MAX_MICROVERSION
|
||||||
|
}
|
||||||
|
|
||||||
# Restore xtrace
|
# Restore xtrace
|
||||||
$_XTRACE_WATCHER
|
$_XTRACE_WATCHER
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,9 @@ if is_service_enabled watcher-api watcher-decision-engine watcher-applier; then
|
|||||||
# Start the watcher components
|
# Start the watcher components
|
||||||
echo_summary "Starting watcher"
|
echo_summary "Starting watcher"
|
||||||
start_watcher
|
start_watcher
|
||||||
|
elif [[ "$1" == "stack" && "$2" == "test-config" ]]; then
|
||||||
|
echo_summary "Configuring tempest for watcher"
|
||||||
|
configure_tempest_for_watcher
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$1" == "unstack" ]]; then
|
if [[ "$1" == "unstack" ]]; then
|
||||||
|
|||||||
@@ -29,19 +29,19 @@ This default can be overridden when a deployer uses a different label to
|
|||||||
identify the exporter host (for example ``hostname`` or ``host``, or any other
|
identify the exporter host (for example ``hostname`` or ``host``, or any other
|
||||||
label, as long as it identifies the host).
|
label, as long as it identifies the host).
|
||||||
|
|
||||||
Internally this label is used in creating a ``fqdn_instance_map``, mapping
|
Internally this label is used in creating ``fqdn_instance_labels``, containing
|
||||||
the fqdn with the Prometheus instance label associated with each exporter.
|
the list of values assigned to the the label in the Prometheus targets.
|
||||||
The keys of the resulting fqdn_instance_map are expected to match the
|
The elements of the resulting fqdn_instance_labels are expected to match the
|
||||||
``ComputeNode.hostname`` used in the Watcher decision engine cluster model.
|
``ComputeNode.hostname`` used in the Watcher decision engine cluster model.
|
||||||
An example ``fqdn_instance_map`` is the following:
|
An example ``fqdn_instance_labels`` is the following:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
{
|
[
|
||||||
'ena.controlplane.domain': '10.1.2.1:9100',
|
'ena.controlplane.domain',
|
||||||
'dio.controlplane.domain': '10.1.2.2:9100',
|
'dio.controlplane.domain',
|
||||||
'tria.controlplane.domain': '10.1.2.3:9100'
|
'tria.controlplane.domain',
|
||||||
}
|
]
|
||||||
|
|
||||||
For instance metrics, it is required that Prometheus contains a label
|
For instance metrics, it is required that Prometheus contains a label
|
||||||
with the uuid of the OpenStack instance in each relevant metric. By default,
|
with the uuid of the OpenStack instance in each relevant metric. By default,
|
||||||
|
|||||||
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["pbr>=6.0.0", "setuptools>=64.0.0"]
|
||||||
|
build-backend = "pbr.build"
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
A new module, ``watcher.wsgi``, has been added as a place to gather WSGI
|
||||||
|
``application`` objects. This is intended to ease deployment by providing
|
||||||
|
a consistent location for these objects. For example, if using uWSGI then
|
||||||
|
instead of:
|
||||||
|
|
||||||
|
.. code-block:: ini
|
||||||
|
|
||||||
|
[uwsgi]
|
||||||
|
wsgi-file = /bin/watcher-api-wsgi
|
||||||
|
|
||||||
|
You can now use:
|
||||||
|
|
||||||
|
.. code-block:: ini
|
||||||
|
|
||||||
|
[uwsgi]
|
||||||
|
module = watcher.wsgi.api:application
|
||||||
|
|
||||||
|
This also simplifies deployment with other WSGI servers that expect module
|
||||||
|
paths such as gunicorn.
|
||||||
|
deprecations:
|
||||||
|
- |
|
||||||
|
The watcher-api-wsgi console script is deprecated for removal
|
||||||
|
in a future release. This artifact is generated using a setup-tools
|
||||||
|
extension that is provide by PBR which is also deprecated.
|
||||||
|
due to the changes in python packaging this custom extensions
|
||||||
|
is planned to be removed form all OpenStack projects in a future
|
||||||
|
PBR release in favor of module based wsgi applications entry points.
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
When using prometheus datasource and more that one target has the same value
|
||||||
|
for the ``fqdn_label``, the driver used the wrong instance label to query for host
|
||||||
|
metrics. The ``instance`` label is no longer used in the queries but the ``fqdn_label``
|
||||||
|
which identifies all the metrics for a specific compute node.
|
||||||
|
see Bug 2103451: https://bugs.launchpad.net/watcher/+bug/2103451 for more info.
|
||||||
10
releasenotes/notes/bug-2110947.yaml
Normal file
10
releasenotes/notes/bug-2110947.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Previously, when users attempted to create a new audit without providing
|
||||||
|
a name and a goal or an audit template, the API returned error 500 and an
|
||||||
|
incorrect error message was displayed.
|
||||||
|
|
||||||
|
Now, Watcher displays a helpful message and returns HTTP error 400.
|
||||||
|
|
||||||
|
For more info see: https://bugs.launchpad.net/watcher/+bug/2110947
|
||||||
47
releasenotes/notes/bug-2112187-763bae283e0b736d.yaml
Normal file
47
releasenotes/notes/bug-2112187-763bae283e0b736d.yaml
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
---
|
||||||
|
security:
|
||||||
|
- |
|
||||||
|
Watchers no longer forges requests on behalf of a tenant when
|
||||||
|
swapping volumes. Prior to this release watcher had 2 implementations
|
||||||
|
of moving a volume, it could use cinders volume migrate api or its own
|
||||||
|
internal implementation that directly calls nova volume attachment update
|
||||||
|
api. The former is safe and the recommend way to move volumes between
|
||||||
|
cinder storage backend the internal implementation was insecure, fragile
|
||||||
|
due to a lack of error handling and capable of deleting user data.
|
||||||
|
|
||||||
|
Insecure: the internal volume migration operation created a new keystone
|
||||||
|
user with a weak name and password and added it to the tenants project
|
||||||
|
with the admin role. It then used that user to forge request on behalf
|
||||||
|
of the tenant with admin right to swap the volume. if the applier was
|
||||||
|
restarted during the execution of this operation it would never be cleaned
|
||||||
|
up.
|
||||||
|
|
||||||
|
Fragile: the error handling was minimal, the swap volume api is async
|
||||||
|
so watcher has to poll for completion, there was no support to resume
|
||||||
|
that if interrupted of the time out was exceeded.
|
||||||
|
|
||||||
|
Data-loss: while the internal polling logic returned success or failure
|
||||||
|
watcher did not check the result, once the function returned it
|
||||||
|
unconditionally deleted the source volume. For larger volumes this
|
||||||
|
could result in irretrievable data loss.
|
||||||
|
|
||||||
|
Finally if a volume was swapped using the internal workflow it put
|
||||||
|
the nova instance in an out of sync state. If the VM was live migrated
|
||||||
|
after the swap volume completed successfully prior to a hard reboot
|
||||||
|
then the migration would fail or succeed and break tenant isolation.
|
||||||
|
|
||||||
|
see: https://bugs.launchpad.net/nova/+bug/2112187 for details.
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
All code related to creating keystone user and granting roles has been
|
||||||
|
removed. The internal swap volume implementation has been removed and
|
||||||
|
replaced by cinders volume migrate api. Note as part of this change
|
||||||
|
Watcher will no longer attempt volume migrations or retypes if the
|
||||||
|
instance is in the `Verify Resize` task state. This resolves several
|
||||||
|
issues related to volume migration in the zone migration and
|
||||||
|
Storage capacity balance strategies. While efforts have been made
|
||||||
|
to maintain backward compatibility these changes are required to
|
||||||
|
address a security weakness in watcher's prior approach.
|
||||||
|
|
||||||
|
see: https://bugs.launchpad.net/nova/+bug/2112187 for more context.
|
||||||
|
|
||||||
14
releasenotes/notes/bug-2113776-4bd314fb46623fbc.yaml
Normal file
14
releasenotes/notes/bug-2113776-4bd314fb46623fbc.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
When running an audit with the `workload_stabilization` strategy with
|
||||||
|
`instance_ram_usage` metric in a deployment with prometheus datasource,
|
||||||
|
the host metric for the ram usage was wrongly reported with the incorrect
|
||||||
|
unit which lead to incorrect standard deviation and action plans due to the
|
||||||
|
application of the wrong scale factor in the algorithm.
|
||||||
|
|
||||||
|
The host ram usage metric is now properly reported in KB when using a
|
||||||
|
prometheus datasource and the strategy `workload_stabilization` calculates
|
||||||
|
the standard deviation properly.
|
||||||
|
|
||||||
|
For more details: https://launchpad.net/bugs/2113776
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Host maintenance strategy should migrate servers based on backup node if specified
|
||||||
|
or rely on nova scheduler. It was enabling disabled hosts with watcher_disabled
|
||||||
|
reason and migrating servers to those nodes. It can impact customer workload. Compute
|
||||||
|
nodes were disabled for a reason.
|
||||||
|
|
||||||
|
Host maintenance strategy is fixed now to support migrating servers only on backup
|
||||||
|
node or rely on nova scheduler if no backup node is provided.
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Previously, if an action failed in an action plan, the state of the
|
||||||
|
action plan was reported as SUCCEEDED if the execution of the action has
|
||||||
|
finished regardless of the outcome.
|
||||||
|
|
||||||
|
Watcher will now reflect the actual state of all the actions in the plan
|
||||||
|
after the execution has finished. If any action has status FAILED, it
|
||||||
|
will set the state of the action plan as FAILED. This is the expected
|
||||||
|
behavior according to Watcher documentation.
|
||||||
|
|
||||||
|
For more info see: https://bugs.launchpad.net/watcher/+bug/2106407
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
`Bug #2110538 <https://bugs.launchpad.net/watcher/+bug/2110538>`_:
|
||||||
|
Corrected the HTTP error code returned when watcher users try to create
|
||||||
|
audits with invalid parameters. The API now correctly returns a 400 Bad
|
||||||
|
Request error.
|
||||||
@@ -6,6 +6,7 @@ description_file =
|
|||||||
author = OpenStack
|
author = OpenStack
|
||||||
author_email = openstack-discuss@lists.openstack.org
|
author_email = openstack-discuss@lists.openstack.org
|
||||||
home_page = https://docs.openstack.org/watcher/latest/
|
home_page = https://docs.openstack.org/watcher/latest/
|
||||||
|
# TODO(sean-k-mooney): bump to >= 3.10 before m3.
|
||||||
python_requires = >=3.9
|
python_requires = >=3.9
|
||||||
classifier =
|
classifier =
|
||||||
Environment :: OpenStack
|
Environment :: OpenStack
|
||||||
@@ -17,7 +18,6 @@ classifier =
|
|||||||
Programming Language :: Python :: Implementation :: CPython
|
Programming Language :: Python :: Implementation :: CPython
|
||||||
Programming Language :: Python :: 3 :: Only
|
Programming Language :: Python :: 3 :: Only
|
||||||
Programming Language :: Python :: 3
|
Programming Language :: Python :: 3
|
||||||
Programming Language :: Python :: 3.9
|
|
||||||
Programming Language :: Python :: 3.10
|
Programming Language :: Python :: 3.10
|
||||||
Programming Language :: Python :: 3.11
|
Programming Language :: Python :: 3.11
|
||||||
Programming Language :: Python :: 3.12
|
Programming Language :: Python :: 3.12
|
||||||
|
|||||||
13
tox.ini
13
tox.ini
@@ -8,7 +8,7 @@ basepython = python3
|
|||||||
usedevelop = True
|
usedevelop = True
|
||||||
allowlist_externals = find
|
allowlist_externals = find
|
||||||
rm
|
rm
|
||||||
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master} {opts} {packages}
|
install_command = pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/2025.1} {opts} {packages}
|
||||||
setenv =
|
setenv =
|
||||||
VIRTUAL_ENV={envdir}
|
VIRTUAL_ENV={envdir}
|
||||||
OS_STDOUT_CAPTURE=1
|
OS_STDOUT_CAPTURE=1
|
||||||
@@ -106,8 +106,10 @@ commands =
|
|||||||
make -C doc/build/pdf
|
make -C doc/build/pdf
|
||||||
|
|
||||||
[testenv:releasenotes]
|
[testenv:releasenotes]
|
||||||
deps = -r{toxinidir}/doc/requirements.txt
|
deps = {[testenv:docs]deps}
|
||||||
commands = sphinx-build -a -W -E -d releasenotes/build/doctrees --keep-going -b html releasenotes/source releasenotes/build/html
|
commands =
|
||||||
|
rm -rf releasenotes/build
|
||||||
|
sphinx-build -W --keep-going -b html -j auto releasenotes/source releasenotes/build/html
|
||||||
|
|
||||||
[testenv:bandit]
|
[testenv:bandit]
|
||||||
skip_install = true
|
skip_install = true
|
||||||
@@ -146,8 +148,3 @@ extension =
|
|||||||
N342 = checks:no_redundant_import_alias
|
N342 = checks:no_redundant_import_alias
|
||||||
N366 = checks:import_stock_mock
|
N366 = checks:import_stock_mock
|
||||||
paths = ./watcher/hacking
|
paths = ./watcher/hacking
|
||||||
|
|
||||||
[doc8]
|
|
||||||
extension=.rst
|
|
||||||
# todo: stop ignoring doc/source/man when https://bugs.launchpad.net/doc8/+bug/1502391 is fixed
|
|
||||||
ignore-path=doc/source/image_src,doc/source/man,doc/source/api
|
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ import datetime
|
|||||||
from dateutil import tz
|
from dateutil import tz
|
||||||
|
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
|
import jsonschema
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
import pecan
|
import pecan
|
||||||
@@ -114,6 +115,11 @@ class AuditPostType(wtypes.Base):
|
|||||||
if self.audit_type not in audit_type_values:
|
if self.audit_type not in audit_type_values:
|
||||||
raise exception.AuditTypeNotFound(audit_type=self.audit_type)
|
raise exception.AuditTypeNotFound(audit_type=self.audit_type)
|
||||||
|
|
||||||
|
if not self.audit_template_uuid and not self.goal:
|
||||||
|
message = _(
|
||||||
|
'A valid goal or audit_template_id must be provided')
|
||||||
|
raise exception.Invalid(message)
|
||||||
|
|
||||||
if (self.audit_type == objects.audit.AuditType.ONESHOT.value and
|
if (self.audit_type == objects.audit.AuditType.ONESHOT.value and
|
||||||
self.interval not in (wtypes.Unset, None)):
|
self.interval not in (wtypes.Unset, None)):
|
||||||
raise exception.AuditIntervalNotAllowed(audit_type=self.audit_type)
|
raise exception.AuditIntervalNotAllowed(audit_type=self.audit_type)
|
||||||
@@ -612,11 +618,6 @@ class AuditsController(rest.RestController):
|
|||||||
if self.from_audits:
|
if self.from_audits:
|
||||||
raise exception.OperationNotPermitted
|
raise exception.OperationNotPermitted
|
||||||
|
|
||||||
if not audit._goal_uuid:
|
|
||||||
raise exception.Invalid(
|
|
||||||
message=_('A valid goal_id or audit_template_id '
|
|
||||||
'must be provided'))
|
|
||||||
|
|
||||||
strategy_uuid = audit.strategy_uuid
|
strategy_uuid = audit.strategy_uuid
|
||||||
no_schema = True
|
no_schema = True
|
||||||
if strategy_uuid is not None:
|
if strategy_uuid is not None:
|
||||||
@@ -627,8 +628,12 @@ class AuditsController(rest.RestController):
|
|||||||
if schema:
|
if schema:
|
||||||
# validate input parameter with default value feedback
|
# validate input parameter with default value feedback
|
||||||
no_schema = False
|
no_schema = False
|
||||||
utils.StrictDefaultValidatingDraft4Validator(schema).validate(
|
try:
|
||||||
audit.parameters)
|
utils.StrictDefaultValidatingDraft4Validator(
|
||||||
|
schema).validate(audit.parameters)
|
||||||
|
except jsonschema.exceptions.ValidationError as e:
|
||||||
|
raise exception.Invalid(
|
||||||
|
_('Invalid parameters for strategy: %s') % e)
|
||||||
|
|
||||||
if no_schema and audit.parameters:
|
if no_schema and audit.parameters:
|
||||||
raise exception.Invalid(_('Specify parameters but no predefined '
|
raise exception.Invalid(_('Specify parameters but no predefined '
|
||||||
|
|||||||
@@ -56,12 +56,30 @@ class DefaultActionPlanHandler(base.BaseActionPlanHandler):
|
|||||||
applier = default.DefaultApplier(self.ctx, self.service)
|
applier = default.DefaultApplier(self.ctx, self.service)
|
||||||
applier.execute(self.action_plan_uuid)
|
applier.execute(self.action_plan_uuid)
|
||||||
|
|
||||||
action_plan.state = objects.action_plan.State.SUCCEEDED
|
# If any action has failed the action plan should be FAILED
|
||||||
|
# Define default values for successful execution
|
||||||
|
ap_state = objects.action_plan.State.SUCCEEDED
|
||||||
|
notification_kwargs = {
|
||||||
|
'phase': fields.NotificationPhase.END
|
||||||
|
}
|
||||||
|
|
||||||
|
failed_filter = {'action_plan_uuid': self.action_plan_uuid,
|
||||||
|
'state': objects.action.State.FAILED}
|
||||||
|
failed_actions = objects.Action.list(
|
||||||
|
self.ctx, filters=failed_filter, eager=True)
|
||||||
|
if failed_actions:
|
||||||
|
ap_state = objects.action_plan.State.FAILED
|
||||||
|
notification_kwargs = {
|
||||||
|
'phase': fields.NotificationPhase.ERROR,
|
||||||
|
'priority': fields.NotificationPriority.ERROR
|
||||||
|
}
|
||||||
|
|
||||||
|
action_plan.state = ap_state
|
||||||
action_plan.save()
|
action_plan.save()
|
||||||
notifications.action_plan.send_action_notification(
|
notifications.action_plan.send_action_notification(
|
||||||
self.ctx, action_plan,
|
self.ctx, action_plan,
|
||||||
action=fields.NotificationAction.EXECUTION,
|
action=fields.NotificationAction.EXECUTION,
|
||||||
phase=fields.NotificationPhase.END)
|
**notification_kwargs)
|
||||||
|
|
||||||
except exception.ActionPlanCancelled as e:
|
except exception.ActionPlanCancelled as e:
|
||||||
LOG.exception(e)
|
LOG.exception(e)
|
||||||
|
|||||||
@@ -17,14 +17,11 @@ import jsonschema
|
|||||||
|
|
||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
|
||||||
from cinderclient import client as cinder_client
|
|
||||||
from watcher._i18n import _
|
from watcher._i18n import _
|
||||||
from watcher.applier.actions import base
|
from watcher.applier.actions import base
|
||||||
from watcher.common import cinder_helper
|
from watcher.common import cinder_helper
|
||||||
from watcher.common import exception
|
from watcher.common import exception
|
||||||
from watcher.common import keystone_helper
|
|
||||||
from watcher.common import nova_helper
|
from watcher.common import nova_helper
|
||||||
from watcher.common import utils
|
|
||||||
from watcher import conf
|
from watcher import conf
|
||||||
|
|
||||||
CONF = conf.CONF
|
CONF = conf.CONF
|
||||||
@@ -70,8 +67,6 @@ class VolumeMigrate(base.BaseAction):
|
|||||||
|
|
||||||
def __init__(self, config, osc=None):
|
def __init__(self, config, osc=None):
|
||||||
super(VolumeMigrate, self).__init__(config)
|
super(VolumeMigrate, self).__init__(config)
|
||||||
self.temp_username = utils.random_string(10)
|
|
||||||
self.temp_password = utils.random_string(10)
|
|
||||||
self.cinder_util = cinder_helper.CinderHelper(osc=self.osc)
|
self.cinder_util = cinder_helper.CinderHelper(osc=self.osc)
|
||||||
self.nova_util = nova_helper.NovaHelper(osc=self.osc)
|
self.nova_util = nova_helper.NovaHelper(osc=self.osc)
|
||||||
|
|
||||||
@@ -134,83 +129,42 @@ class VolumeMigrate(base.BaseAction):
|
|||||||
|
|
||||||
def _can_swap(self, volume):
|
def _can_swap(self, volume):
|
||||||
"""Judge volume can be swapped"""
|
"""Judge volume can be swapped"""
|
||||||
|
# TODO(sean-k-mooney): rename this to _can_migrate and update
|
||||||
|
# tests to reflect that.
|
||||||
|
|
||||||
|
# cinder volume migration can migrate volumes that are not
|
||||||
|
# attached to instances or nova can migrate the data for cinder
|
||||||
|
# if the volume is in-use. If the volume has no attachments
|
||||||
|
# allow cinder to decided if it can be migrated.
|
||||||
if not volume.attachments:
|
if not volume.attachments:
|
||||||
return False
|
LOG.debug(f"volume: {volume.id} has no attachments")
|
||||||
instance_id = volume.attachments[0]['server_id']
|
|
||||||
instance_status = self.nova_util.find_instance(instance_id).status
|
|
||||||
|
|
||||||
if (volume.status == 'in-use' and
|
|
||||||
instance_status in ('ACTIVE', 'PAUSED', 'RESIZED')):
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
# since it has attachments we need to validate nova's constraints
|
||||||
|
instance_id = volume.attachments[0]['server_id']
|
||||||
def _create_user(self, volume, user):
|
instance_status = self.nova_util.find_instance(instance_id).status
|
||||||
"""Create user with volume attribute and user information"""
|
LOG.debug(
|
||||||
keystone_util = keystone_helper.KeystoneHelper(osc=self.osc)
|
f"volume: {volume.id} is attached to instance: {instance_id} "
|
||||||
project_id = getattr(volume, 'os-vol-tenant-attr:tenant_id')
|
f"in instance status: {instance_status}")
|
||||||
user['project'] = project_id
|
# NOTE(sean-k-mooney): This used to allow RESIZED which
|
||||||
user['domain'] = keystone_util.get_project(project_id).domain_id
|
# is the resize_verify task state, that is not an acceptable time
|
||||||
user['roles'] = ['admin']
|
# to migrate volumes, if nova does not block this in the API
|
||||||
return keystone_util.create_user(user)
|
# today that is probably a bug. PAUSED is also questionable but
|
||||||
|
# it should generally be safe.
|
||||||
def _get_cinder_client(self, session):
|
return (volume.status == 'in-use' and
|
||||||
"""Get cinder client by session"""
|
instance_status in ('ACTIVE', 'PAUSED'))
|
||||||
return cinder_client.Client(
|
|
||||||
CONF.cinder_client.api_version,
|
|
||||||
session=session,
|
|
||||||
endpoint_type=CONF.cinder_client.endpoint_type)
|
|
||||||
|
|
||||||
def _swap_volume(self, volume, dest_type):
|
|
||||||
"""Swap volume to dest_type
|
|
||||||
|
|
||||||
Limitation note: only for compute libvirt driver
|
|
||||||
"""
|
|
||||||
if not dest_type:
|
|
||||||
raise exception.Invalid(
|
|
||||||
message=(_("destination type is required when "
|
|
||||||
"migration type is swap")))
|
|
||||||
|
|
||||||
|
def _migrate(self, volume_id, dest_node, dest_type):
|
||||||
|
try:
|
||||||
|
volume = self.cinder_util.get_volume(volume_id)
|
||||||
|
# for backward compatibility map swap to migrate.
|
||||||
|
if self.migration_type in (self.SWAP, self.MIGRATE):
|
||||||
if not self._can_swap(volume):
|
if not self._can_swap(volume):
|
||||||
raise exception.Invalid(
|
raise exception.Invalid(
|
||||||
message=(_("Invalid state for swapping volume")))
|
message=(_("Invalid state for swapping volume")))
|
||||||
|
return self.cinder_util.migrate(volume, dest_node)
|
||||||
user_info = {
|
|
||||||
'name': self.temp_username,
|
|
||||||
'password': self.temp_password}
|
|
||||||
user = self._create_user(volume, user_info)
|
|
||||||
keystone_util = keystone_helper.KeystoneHelper(osc=self.osc)
|
|
||||||
try:
|
|
||||||
session = keystone_util.create_session(
|
|
||||||
user.id, self.temp_password)
|
|
||||||
temp_cinder = self._get_cinder_client(session)
|
|
||||||
|
|
||||||
# swap volume
|
|
||||||
new_volume = self.cinder_util.create_volume(
|
|
||||||
temp_cinder, volume, dest_type)
|
|
||||||
self.nova_util.swap_volume(volume, new_volume)
|
|
||||||
|
|
||||||
# delete old volume
|
|
||||||
self.cinder_util.delete_volume(volume)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
keystone_util.delete_user(user)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def _migrate(self, volume_id, dest_node, dest_type):
|
|
||||||
|
|
||||||
try:
|
|
||||||
volume = self.cinder_util.get_volume(volume_id)
|
|
||||||
if self.migration_type == self.SWAP:
|
|
||||||
if dest_node:
|
|
||||||
LOG.warning("dest_node is ignored")
|
|
||||||
return self._swap_volume(volume, dest_type)
|
|
||||||
elif self.migration_type == self.RETYPE:
|
elif self.migration_type == self.RETYPE:
|
||||||
return self.cinder_util.retype(volume, dest_type)
|
return self.cinder_util.retype(volume, dest_type)
|
||||||
elif self.migration_type == self.MIGRATE:
|
|
||||||
return self.cinder_util.migrate(volume, dest_node)
|
|
||||||
else:
|
else:
|
||||||
raise exception.Invalid(
|
raise exception.Invalid(
|
||||||
message=(_("Migration of type '%(migration_type)s' is not "
|
message=(_("Migration of type '%(migration_type)s' is not "
|
||||||
|
|||||||
@@ -15,8 +15,6 @@
|
|||||||
from oslo_log import log
|
from oslo_log import log
|
||||||
|
|
||||||
from keystoneauth1.exceptions import http as ks_exceptions
|
from keystoneauth1.exceptions import http as ks_exceptions
|
||||||
from keystoneauth1 import loading
|
|
||||||
from keystoneauth1 import session
|
|
||||||
from watcher._i18n import _
|
from watcher._i18n import _
|
||||||
from watcher.common import clients
|
from watcher.common import clients
|
||||||
from watcher.common import exception
|
from watcher.common import exception
|
||||||
@@ -90,35 +88,3 @@ class KeystoneHelper(object):
|
|||||||
message=(_("Domain name seems ambiguous: %s") %
|
message=(_("Domain name seems ambiguous: %s") %
|
||||||
name_or_id))
|
name_or_id))
|
||||||
return domains[0]
|
return domains[0]
|
||||||
|
|
||||||
def create_session(self, user_id, password):
|
|
||||||
user = self.get_user(user_id)
|
|
||||||
loader = loading.get_plugin_loader('password')
|
|
||||||
auth = loader.load_from_options(
|
|
||||||
auth_url=CONF.watcher_clients_auth.auth_url,
|
|
||||||
password=password,
|
|
||||||
user_id=user_id,
|
|
||||||
project_id=user.default_project_id)
|
|
||||||
return session.Session(auth=auth)
|
|
||||||
|
|
||||||
def create_user(self, user):
|
|
||||||
project = self.get_project(user['project'])
|
|
||||||
domain = self.get_domain(user['domain'])
|
|
||||||
_user = self.keystone.users.create(
|
|
||||||
user['name'],
|
|
||||||
password=user['password'],
|
|
||||||
domain=domain,
|
|
||||||
project=project,
|
|
||||||
)
|
|
||||||
for role in user['roles']:
|
|
||||||
role = self.get_role(role)
|
|
||||||
self.keystone.roles.grant(
|
|
||||||
role.id, user=_user.id, project=project.id)
|
|
||||||
return _user
|
|
||||||
|
|
||||||
def delete_user(self, user):
|
|
||||||
try:
|
|
||||||
user = self.get_user(user)
|
|
||||||
self.keystone.users.delete(user)
|
|
||||||
except exception.Invalid:
|
|
||||||
pass
|
|
||||||
|
|||||||
@@ -19,9 +19,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import datetime
|
import datetime
|
||||||
import inspect
|
import inspect
|
||||||
import random
|
|
||||||
import re
|
import re
|
||||||
import string
|
|
||||||
|
|
||||||
from croniter import croniter
|
from croniter import croniter
|
||||||
import eventlet
|
import eventlet
|
||||||
@@ -160,14 +158,10 @@ def extend_with_strict_schema(validator_class):
|
|||||||
StrictDefaultValidatingDraft4Validator = extend_with_default(
|
StrictDefaultValidatingDraft4Validator = extend_with_default(
|
||||||
extend_with_strict_schema(validators.Draft4Validator))
|
extend_with_strict_schema(validators.Draft4Validator))
|
||||||
|
|
||||||
|
|
||||||
Draft4Validator = validators.Draft4Validator
|
Draft4Validator = validators.Draft4Validator
|
||||||
|
|
||||||
|
|
||||||
def random_string(n):
|
|
||||||
return ''.join([random.choice(
|
|
||||||
string.ascii_letters + string.digits) for i in range(n)])
|
|
||||||
|
|
||||||
|
|
||||||
# Some clients (e.g. MAAS) use asyncio, which isn't compatible with Eventlet.
|
# Some clients (e.g. MAAS) use asyncio, which isn't compatible with Eventlet.
|
||||||
# As a workaround, we're delegating such calls to a native thread.
|
# As a workaround, we're delegating such calls to a native thread.
|
||||||
def async_compat_call(f, *args, **kwargs):
|
def async_compat_call(f, *args, **kwargs):
|
||||||
|
|||||||
@@ -178,7 +178,7 @@ class DataSourceBase(object):
|
|||||||
granularity=None):
|
granularity=None):
|
||||||
"""Get the ram usage for a host such as a compute_node
|
"""Get the ram usage for a host such as a compute_node
|
||||||
|
|
||||||
:return: ram usage as float in megabytes
|
:return: ram usage as float in kibibytes
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -51,10 +51,10 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
|
|
||||||
The prometheus helper uses the PrometheusAPIClient provided by
|
The prometheus helper uses the PrometheusAPIClient provided by
|
||||||
python-observabilityclient.
|
python-observabilityclient.
|
||||||
The prometheus_fqdn_instance_map maps the fqdn of each node to the
|
The prometheus_fqdn_labels contains a list the values contained in
|
||||||
Prometheus instance label added to all metrics on that node. When
|
the fqdn_label in the Prometheus instance. When making queries to
|
||||||
making queries to Prometheus we use the instance label to specify
|
Prometheus we use the fqdn_label to specify the node for which
|
||||||
the node for which metrics are to be retrieved.
|
metrics are to be retrieved.
|
||||||
host, port and fqdn_label come from watcher_client
|
host, port and fqdn_label come from watcher_client
|
||||||
config. The prometheus_fqdn_label allows override of the required label
|
config. The prometheus_fqdn_label allows override of the required label
|
||||||
in Prometheus scrape configs that specifies each target's fqdn.
|
in Prometheus scrape configs that specifies each target's fqdn.
|
||||||
@@ -63,8 +63,8 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
self.prometheus_fqdn_label = (
|
self.prometheus_fqdn_label = (
|
||||||
CONF.prometheus_client.fqdn_label
|
CONF.prometheus_client.fqdn_label
|
||||||
)
|
)
|
||||||
self.prometheus_fqdn_instance_map = (
|
self.prometheus_fqdn_labels = (
|
||||||
self._build_prometheus_fqdn_instance_map()
|
self._build_prometheus_fqdn_labels()
|
||||||
)
|
)
|
||||||
self.prometheus_host_instance_map = (
|
self.prometheus_host_instance_map = (
|
||||||
self._build_prometheus_host_instance_map()
|
self._build_prometheus_host_instance_map()
|
||||||
@@ -136,73 +136,71 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
|
|
||||||
return the_client
|
return the_client
|
||||||
|
|
||||||
def _build_prometheus_fqdn_instance_map(self):
|
def _build_prometheus_fqdn_labels(self):
|
||||||
"""Build the fqdn<-->instance_label mapping needed for queries
|
"""Build the list of fqdn_label values to be used in host queries
|
||||||
|
|
||||||
Watcher knows nodes by their hostname. In Prometheus however the
|
Watcher knows nodes by their hostname. In Prometheus however the
|
||||||
scrape targets (also known as 'instances') are specified by I.P.
|
scrape targets (also known as 'instances') are specified by I.P.
|
||||||
(or hostname) and port number. This function creates a mapping between
|
(or hostname) and port number and fqdn is stored in a custom 'fqdn'
|
||||||
the fully qualified domain name of each node and the corresponding
|
label added to Prometheus scrape_configs. Operators can use a
|
||||||
instance label used in the scrape config. This relies on a custom
|
different custom label instead by setting the prometheus_fqdn_label
|
||||||
'fqdn' label added to Prometheus scrape_configs. Operators can use
|
|
||||||
a different custom label instead by setting the prometheus_fqdn_label
|
|
||||||
config option under the prometheus_client section of watcher config.
|
config option under the prometheus_client section of watcher config.
|
||||||
The built prometheus_fqdn_instance_map is used to match watcher
|
The built prometheus_fqdn_labels is created with the full list
|
||||||
node.hostname if watcher stores fqdn and otherwise the
|
of values of the prometheus_fqdn_label label in Prometheus. This will
|
||||||
host_instance_map is used instead.
|
be used to create a map of hostname<-->fqdn and to identify if a target
|
||||||
:return a dict mapping fqdn to instance label. For example:
|
exist in prometheus for the compute nodes before sending the query.
|
||||||
{'marios-env-again.controlplane.domain': '10.1.2.3:9100'}
|
:return a set of values of the fqdn label. For example:
|
||||||
|
{'foo.example.com', 'bar.example.com'}
|
||||||
|
{'foo', 'bar'}
|
||||||
"""
|
"""
|
||||||
prometheus_targets = self.prometheus._get(
|
prometheus_targets = self.prometheus._get(
|
||||||
"targets?state=active")['data']['activeTargets']
|
"targets?state=active")['data']['activeTargets']
|
||||||
# >>> prometheus_targets[0]['labels']
|
# >>> prometheus_targets[0]['labels']
|
||||||
# {'fqdn': 'marios-env-again.controlplane.domain',
|
# {'fqdn': 'marios-env-again.controlplane.domain',
|
||||||
# 'instance': 'localhost:9100', 'job': 'node'}
|
# 'instance': 'localhost:9100', 'job': 'node'}
|
||||||
fqdn_instance_map = {
|
fqdn_instance_labels = set()
|
||||||
fqdn: instance for (fqdn, instance) in (
|
for target in prometheus_targets:
|
||||||
(target['labels'].get(self.prometheus_fqdn_label),
|
if target.get('labels', {}).get(self.prometheus_fqdn_label):
|
||||||
target['labels'].get('instance'))
|
fqdn_instance_labels.add(
|
||||||
for target in prometheus_targets
|
target['labels'].get(self.prometheus_fqdn_label))
|
||||||
if target.get('labels', {}).get(self.prometheus_fqdn_label)
|
|
||||||
)
|
if not fqdn_instance_labels:
|
||||||
}
|
|
||||||
if not fqdn_instance_map:
|
|
||||||
LOG.error(
|
LOG.error(
|
||||||
"Could not create fqdn instance map from Prometheus "
|
"Could not create fqdn labels list from Prometheus "
|
||||||
"targets config. Prometheus returned the following: %s",
|
"targets config. Prometheus returned the following: %s",
|
||||||
prometheus_targets
|
prometheus_targets
|
||||||
)
|
)
|
||||||
return {}
|
return set()
|
||||||
return fqdn_instance_map
|
return fqdn_instance_labels
|
||||||
|
|
||||||
def _build_prometheus_host_instance_map(self):
|
def _build_prometheus_host_instance_map(self):
|
||||||
"""Build the hostname<-->instance_label mapping needed for queries
|
"""Build the hostname<-->instance_label mapping needed for queries
|
||||||
|
|
||||||
The prometheus_fqdn_instance_map has the fully qualified domain name
|
The prometheus_fqdn_labels has the fully qualified domain name
|
||||||
for hosts. This will create a duplicate map containing only the host
|
for hosts. This will create a duplicate map containing only the host
|
||||||
name part. Depending on the watcher node.hostname either the
|
name part. Depending on the watcher node.hostname either the
|
||||||
fqdn_instance_map or the host_instance_map will be used to resolve
|
fqdn_instance_labels or the host_instance_map will be used to resolve
|
||||||
the correct prometheus instance label for queries. In the event the
|
the correct prometheus fqdn_label for queries. In the event the
|
||||||
fqdn_instance_map keys are not valid fqdn (for example it contains
|
fqdn_instance_labels elements are not valid fqdn (for example it has
|
||||||
hostnames, not fqdn) the host_instance_map cannot be created and
|
hostnames, not fqdn) the host_instance_map cannot be created and
|
||||||
an empty dictionary is returned with a warning logged.
|
an empty dictionary is returned with a warning logged.
|
||||||
:return a dict mapping hostname to instance label. For example:
|
:return a dict mapping hostname to instance label. For example:
|
||||||
{'marios-env-again': 'localhost:9100'}
|
{'foo': 'foo.example.com', 'bar': 'bar.example.com'}
|
||||||
"""
|
"""
|
||||||
if not self.prometheus_fqdn_instance_map:
|
if not self.prometheus_fqdn_labels:
|
||||||
LOG.error("Cannot build host_instance_map without "
|
LOG.error("Cannot build host_instance_map without "
|
||||||
"fqdn_instance_map")
|
"fqdn_instance_labels")
|
||||||
return {}
|
return {}
|
||||||
host_instance_map = {
|
host_instance_map = {
|
||||||
host: instance for (host, instance) in (
|
host: fqdn for (host, fqdn) in (
|
||||||
(fqdn.split('.')[0], inst)
|
(fqdn.split('.')[0], fqdn)
|
||||||
for fqdn, inst in self.prometheus_fqdn_instance_map.items()
|
for fqdn in self.prometheus_fqdn_labels
|
||||||
if '.' in fqdn
|
if '.' in fqdn
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if not host_instance_map:
|
if not host_instance_map:
|
||||||
LOG.warning("Creating empty host instance map. Are the keys "
|
LOG.warning("Creating empty host instance map. Are the keys "
|
||||||
"in prometheus_fqdn_instance_map valid fqdn?")
|
"in prometheus_fqdn_labels valid fqdn?")
|
||||||
return {}
|
return {}
|
||||||
return host_instance_map
|
return host_instance_map
|
||||||
|
|
||||||
@@ -210,23 +208,25 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
"""Resolve the prometheus instance label to use in queries
|
"""Resolve the prometheus instance label to use in queries
|
||||||
|
|
||||||
Given the watcher node.hostname, resolve the prometheus instance
|
Given the watcher node.hostname, resolve the prometheus instance
|
||||||
label for use in queries, first trying the fqdn_instance_map and
|
label for use in queries, first trying the fqdn_instance_labels and
|
||||||
then the host_instance_map (watcher.node_name can be fqdn or hostname).
|
then the host_instance_map (watcher.node_name can be fqdn or hostname).
|
||||||
If the name is not resolved after the first attempt, rebuild the fqdn
|
If the name is not resolved after the first attempt, rebuild the fqdn
|
||||||
and host instance maps and try again. This allows for new hosts added
|
and host instance maps and try again. This allows for new hosts added
|
||||||
after the initialisation of the fqdn_instance_map.
|
after the initialisation of the fqdn_instance_labels.
|
||||||
:param node_name: the watcher node.hostname
|
:param node_name: the watcher node.hostname
|
||||||
:return String for the prometheus instance label and None if not found
|
:return String for the prometheus instance label and None if not found
|
||||||
"""
|
"""
|
||||||
def _query_maps(node):
|
def _query_maps(node):
|
||||||
return self.prometheus_fqdn_instance_map.get(
|
if node in self.prometheus_fqdn_labels:
|
||||||
node, self.prometheus_host_instance_map.get(node, None))
|
return node
|
||||||
|
else:
|
||||||
|
return self.prometheus_host_instance_map.get(node, None)
|
||||||
|
|
||||||
instance_label = _query_maps(node_name)
|
instance_label = _query_maps(node_name)
|
||||||
# refresh the fqdn and host instance maps and retry
|
# refresh the fqdn and host instance maps and retry
|
||||||
if not instance_label:
|
if not instance_label:
|
||||||
self.prometheus_fqdn_instance_map = (
|
self.prometheus_fqdn_labels = (
|
||||||
self._build_prometheus_fqdn_instance_map()
|
self._build_prometheus_fqdn_labels()
|
||||||
)
|
)
|
||||||
self.prometheus_host_instance_map = (
|
self.prometheus_host_instance_map = (
|
||||||
self._build_prometheus_host_instance_map()
|
self._build_prometheus_host_instance_map()
|
||||||
@@ -264,8 +264,8 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
This function builds and returns the string query that will be sent
|
This function builds and returns the string query that will be sent
|
||||||
to the Prometheus server /query endpoint. For host cpu usage we use:
|
to the Prometheus server /query endpoint. For host cpu usage we use:
|
||||||
|
|
||||||
100 - (avg by (instance)(rate(node_cpu_seconds_total{mode='idle',
|
100 - (avg by (fqdn)(rate(node_cpu_seconds_total{mode='idle',
|
||||||
instance='some_host'}[300s])) * 100)
|
fqdn='some_host'}[300s])) * 100)
|
||||||
|
|
||||||
so using prometheus rate function over the specified period, we average
|
so using prometheus rate function over the specified period, we average
|
||||||
per instance (all cpus) idle time and then 'everything else' is cpu
|
per instance (all cpus) idle time and then 'everything else' is cpu
|
||||||
@@ -276,7 +276,7 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
(node_memory_MemTotal_bytes{instance='the_host'} -
|
(node_memory_MemTotal_bytes{instance='the_host'} -
|
||||||
avg_over_time(
|
avg_over_time(
|
||||||
node_memory_MemAvailable_bytes{instance='the_host'}[300s]))
|
node_memory_MemAvailable_bytes{instance='the_host'}[300s]))
|
||||||
/ 1024 / 1024
|
/ 1024
|
||||||
|
|
||||||
So we take total and subtract available memory to determine
|
So we take total and subtract available memory to determine
|
||||||
how much is in use. We use the prometheus xxx_over_time functions
|
how much is in use. We use the prometheus xxx_over_time functions
|
||||||
@@ -307,17 +307,22 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
|
|
||||||
if meter == 'node_cpu_seconds_total':
|
if meter == 'node_cpu_seconds_total':
|
||||||
query_args = (
|
query_args = (
|
||||||
"100 - (%s by (instance)(rate(%s"
|
"100 - (%(agg)s by (%(label)s)(rate(%(meter)s"
|
||||||
"{mode='idle',instance='%s'}[%ss])) * 100)" %
|
"{mode='idle',%(label)s='%(label_value)s'}[%(period)ss])) "
|
||||||
(aggregate, meter, instance_label, period)
|
"* 100)"
|
||||||
|
% {'label': self.prometheus_fqdn_label,
|
||||||
|
'label_value': instance_label, 'agg': aggregate,
|
||||||
|
'meter': meter, 'period': period}
|
||||||
)
|
)
|
||||||
elif meter == 'node_memory_MemAvailable_bytes':
|
elif meter == 'node_memory_MemAvailable_bytes':
|
||||||
|
# Prometheus metric is in B and we need to return KB
|
||||||
query_args = (
|
query_args = (
|
||||||
"(node_memory_MemTotal_bytes{instance='%s'} "
|
"(node_memory_MemTotal_bytes{%(label)s='%(label_value)s'} "
|
||||||
"- %s_over_time(%s{instance='%s'}[%ss])) "
|
"- %(agg)s_over_time(%(meter)s{%(label)s='%(label_value)s'}"
|
||||||
"/ 1024 / 1024" %
|
"[%(period)ss])) / 1024"
|
||||||
(instance_label, aggregate, meter,
|
% {'label': self.prometheus_fqdn_label,
|
||||||
instance_label, period)
|
'label_value': instance_label, 'agg': aggregate,
|
||||||
|
'meter': meter, 'period': period}
|
||||||
)
|
)
|
||||||
elif meter == 'ceilometer_memory_usage':
|
elif meter == 'ceilometer_memory_usage':
|
||||||
query_args = (
|
query_args = (
|
||||||
@@ -338,10 +343,12 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
)
|
)
|
||||||
vcpus = 1
|
vcpus = 1
|
||||||
query_args = (
|
query_args = (
|
||||||
"clamp_max((%s by (instance)(rate(%s{%s='%s'}[%ss]))/10e+8) "
|
"clamp_max((%(agg)s by (%(label)s)"
|
||||||
"*(100/%s), 100)" %
|
"(rate(%(meter)s{%(label)s='%(label_value)s'}[%(period)ss]))"
|
||||||
(aggregate, meter, uuid_label_key, instance_label, period,
|
"/10e+8) *(100/%(vcpus)s), 100)"
|
||||||
vcpus)
|
% {'label': uuid_label_key, 'label_value': instance_label,
|
||||||
|
'agg': aggregate, 'meter': meter, 'period': period,
|
||||||
|
'vcpus': vcpus}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise exception.InvalidParameter(
|
raise exception.InvalidParameter(
|
||||||
@@ -460,8 +467,8 @@ class PrometheusHelper(base.DataSourceBase):
|
|||||||
This calculates the host cpu usage and returns it as a percentage
|
This calculates the host cpu usage and returns it as a percentage
|
||||||
The calculation is made by using the cpu 'idle' time, per
|
The calculation is made by using the cpu 'idle' time, per
|
||||||
instance (so all CPUs are included). For example the query looks like
|
instance (so all CPUs are included). For example the query looks like
|
||||||
(100 - (avg by (instance)(rate(node_cpu_seconds_total
|
(100 - (avg by (fqdn)(rate(node_cpu_seconds_total
|
||||||
{mode='idle',instance='localhost:9100'}[300s])) * 100))
|
{mode='idle',fqdn='compute1.example.com'}[300s])) * 100))
|
||||||
"""
|
"""
|
||||||
aggregate = self._invert_max_min_aggregate(aggregate)
|
aggregate = self._invert_max_min_aggregate(aggregate)
|
||||||
cpu_usage = self.statistic_aggregation(
|
cpu_usage = self.statistic_aggregation(
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
|
|||||||
|
|
||||||
INSTANCE_MIGRATION = "migrate"
|
INSTANCE_MIGRATION = "migrate"
|
||||||
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
|
CHANGE_NOVA_SERVICE_STATE = "change_nova_service_state"
|
||||||
REASON_FOR_DISABLE = 'watcher_disabled'
|
|
||||||
|
|
||||||
def __init__(self, config, osc=None):
|
def __init__(self, config, osc=None):
|
||||||
super(HostMaintenance, self).__init__(config, osc)
|
super(HostMaintenance, self).__init__(config, osc)
|
||||||
@@ -95,10 +94,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
|
|||||||
cn.status == element.ServiceState.DISABLED.value and
|
cn.status == element.ServiceState.DISABLED.value and
|
||||||
cn.disabled_reason == reason}
|
cn.disabled_reason == reason}
|
||||||
|
|
||||||
def get_disabled_compute_nodes(self):
|
|
||||||
return self.get_disabled_compute_nodes_with_reason(
|
|
||||||
self.REASON_FOR_DISABLE)
|
|
||||||
|
|
||||||
def get_instance_state_str(self, instance):
|
def get_instance_state_str(self, instance):
|
||||||
"""Get instance state in string format"""
|
"""Get instance state in string format"""
|
||||||
if isinstance(instance.state, str):
|
if isinstance(instance.state, str):
|
||||||
@@ -195,7 +190,7 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
|
|||||||
'source_node': src_node.uuid,
|
'source_node': src_node.uuid,
|
||||||
'resource_name': instance.name}
|
'resource_name': instance.name}
|
||||||
if des_node:
|
if des_node:
|
||||||
params['destination_node'] = des_node.uuid
|
params['destination_node'] = des_node.hostname
|
||||||
self.solution.add_action(action_type=self.INSTANCE_MIGRATION,
|
self.solution.add_action(action_type=self.INSTANCE_MIGRATION,
|
||||||
resource_id=instance.uuid,
|
resource_id=instance.uuid,
|
||||||
input_parameters=params)
|
input_parameters=params)
|
||||||
@@ -215,8 +210,7 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
|
|||||||
"""safe maintain one compute node
|
"""safe maintain one compute node
|
||||||
|
|
||||||
Migrate all instances of the maintenance_node intensively to the
|
Migrate all instances of the maintenance_node intensively to the
|
||||||
backup host. If the user didn't give the backup host, it will
|
backup host.
|
||||||
select one unused node to backup the maintaining node.
|
|
||||||
|
|
||||||
It calculate the resource both of the backup node and maintaining
|
It calculate the resource both of the backup node and maintaining
|
||||||
node to evaluate the migrations from maintaining node to backup node.
|
node to evaluate the migrations from maintaining node to backup node.
|
||||||
@@ -233,22 +227,6 @@ class HostMaintenance(base.HostMaintenanceBaseStrategy):
|
|||||||
self.host_migration(maintenance_node, backup_node)
|
self.host_migration(maintenance_node, backup_node)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# If the user didn't give the backup host, select one unused
|
|
||||||
# node with required capacity, then migrates all instances
|
|
||||||
# from maintaining node to it.
|
|
||||||
nodes = sorted(
|
|
||||||
self.get_disabled_compute_nodes().values(),
|
|
||||||
key=lambda x: self.get_node_capacity(x)['cpu'])
|
|
||||||
if maintenance_node in nodes:
|
|
||||||
nodes.remove(maintenance_node)
|
|
||||||
|
|
||||||
for node in nodes:
|
|
||||||
if self.host_fits(maintenance_node, node):
|
|
||||||
self.enable_compute_node_if_disabled(node)
|
|
||||||
self.add_action_maintain_compute_node(maintenance_node)
|
|
||||||
self.host_migration(maintenance_node, node)
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def try_maintain(self, maintenance_node):
|
def try_maintain(self, maintenance_node):
|
||||||
|
|||||||
@@ -57,8 +57,19 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
|
|||||||
self.planned_cold_count = 0
|
self.planned_cold_count = 0
|
||||||
self.volume_count = 0
|
self.volume_count = 0
|
||||||
self.planned_volume_count = 0
|
self.planned_volume_count = 0
|
||||||
self.volume_update_count = 0
|
|
||||||
self.planned_volume_update_count = 0
|
# TODO(sean-n-mooney) This is backward compatibility
|
||||||
|
# for calling the swap code paths. Swap is now an alias
|
||||||
|
# for migrate, we should clean this up in a future
|
||||||
|
# cycle.
|
||||||
|
@property
|
||||||
|
def volume_update_count(self):
|
||||||
|
return self.volume_count
|
||||||
|
|
||||||
|
# same as above clean up later.
|
||||||
|
@property
|
||||||
|
def planned_volume_update_count(self):
|
||||||
|
return self.planned_volume_count
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_name(cls):
|
def get_name(cls):
|
||||||
@@ -312,8 +323,8 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
|
|||||||
planned_cold_migrate_instance_count=self.planned_cold_count,
|
planned_cold_migrate_instance_count=self.planned_cold_count,
|
||||||
volume_migrate_count=self.volume_count,
|
volume_migrate_count=self.volume_count,
|
||||||
planned_volume_migrate_count=self.planned_volume_count,
|
planned_volume_migrate_count=self.planned_volume_count,
|
||||||
volume_update_count=self.volume_update_count,
|
volume_update_count=self.volume_count,
|
||||||
planned_volume_update_count=self.planned_volume_update_count
|
planned_volume_update_count=self.planned_volume_count
|
||||||
)
|
)
|
||||||
|
|
||||||
def set_migration_count(self, targets):
|
def set_migration_count(self, targets):
|
||||||
@@ -328,10 +339,7 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
|
|||||||
elif self.is_cold(instance):
|
elif self.is_cold(instance):
|
||||||
self.cold_count += 1
|
self.cold_count += 1
|
||||||
for volume in targets.get('volume', []):
|
for volume in targets.get('volume', []):
|
||||||
if self.is_available(volume):
|
|
||||||
self.volume_count += 1
|
self.volume_count += 1
|
||||||
elif self.is_in_use(volume):
|
|
||||||
self.volume_update_count += 1
|
|
||||||
|
|
||||||
def is_live(self, instance):
|
def is_live(self, instance):
|
||||||
status = getattr(instance, 'status')
|
status = getattr(instance, 'status')
|
||||||
@@ -404,13 +412,10 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
|
|||||||
LOG.debug(src_type)
|
LOG.debug(src_type)
|
||||||
LOG.debug("%s %s", dst_pool, dst_type)
|
LOG.debug("%s %s", dst_pool, dst_type)
|
||||||
|
|
||||||
if self.is_available(volume):
|
|
||||||
if src_type == dst_type:
|
if src_type == dst_type:
|
||||||
self._volume_migrate(volume, dst_pool)
|
self._volume_migrate(volume, dst_pool)
|
||||||
else:
|
else:
|
||||||
self._volume_retype(volume, dst_type)
|
self._volume_retype(volume, dst_type)
|
||||||
elif self.is_in_use(volume):
|
|
||||||
self._volume_update(volume, dst_type)
|
|
||||||
|
|
||||||
# if with_attached_volume is True, migrate attaching instances
|
# if with_attached_volume is True, migrate attaching instances
|
||||||
if self.with_attached_volume:
|
if self.with_attached_volume:
|
||||||
@@ -464,16 +469,6 @@ class ZoneMigration(base.ZoneMigrationBaseStrategy):
|
|||||||
input_parameters=parameters)
|
input_parameters=parameters)
|
||||||
self.planned_cold_count += 1
|
self.planned_cold_count += 1
|
||||||
|
|
||||||
def _volume_update(self, volume, dst_type):
|
|
||||||
parameters = {"migration_type": "swap",
|
|
||||||
"destination_type": dst_type,
|
|
||||||
"resource_name": volume.name}
|
|
||||||
self.solution.add_action(
|
|
||||||
action_type="volume_migrate",
|
|
||||||
resource_id=volume.id,
|
|
||||||
input_parameters=parameters)
|
|
||||||
self.planned_volume_update_count += 1
|
|
||||||
|
|
||||||
def _volume_migrate(self, volume, dst_pool):
|
def _volume_migrate(self, volume, dst_pool):
|
||||||
parameters = {"migration_type": "migrate",
|
parameters = {"migration_type": "migrate",
|
||||||
"destination_node": dst_pool,
|
"destination_node": dst_pool,
|
||||||
|
|||||||
@@ -827,17 +827,41 @@ class TestPost(api_base.FunctionalTest):
|
|||||||
self.assertIn(expected_error_msg, response.json['error_message'])
|
self.assertIn(expected_error_msg, response.json['error_message'])
|
||||||
assert not mock_trigger_audit.called
|
assert not mock_trigger_audit.called
|
||||||
|
|
||||||
|
@mock.patch.object(deapi.DecisionEngineAPI, 'trigger_audit')
|
||||||
|
def test_create_audit_with_missing_parameter(
|
||||||
|
self, mock_trigger_audit):
|
||||||
|
mock_trigger_audit.return_value = mock.ANY
|
||||||
|
audit_template = self.prepare_audit_template_strategy_with_parameter()
|
||||||
|
|
||||||
|
audit_dict = api_utils.audit_post_data(
|
||||||
|
parameters={})
|
||||||
|
|
||||||
|
audit_dict['audit_template_uuid'] = audit_template['uuid']
|
||||||
|
del_keys = ['uuid', 'goal_id', 'strategy_id', 'state', 'interval',
|
||||||
|
'scope', 'next_run_time', 'hostname']
|
||||||
|
for k in del_keys:
|
||||||
|
del audit_dict[k]
|
||||||
|
|
||||||
|
response = self.post_json('/audits', audit_dict, expect_errors=True)
|
||||||
|
self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_int)
|
||||||
|
self.assertEqual("application/json", response.content_type)
|
||||||
|
expected_error_msg = (
|
||||||
|
"Invalid parameters for strategy: 'fake1' is a required property")
|
||||||
|
self.assertTrue(response.json['error_message'])
|
||||||
|
self.assertIn(expected_error_msg, response.json['error_message'])
|
||||||
|
assert not mock_trigger_audit.called
|
||||||
|
|
||||||
def prepare_audit_template_strategy_with_parameter(self):
|
def prepare_audit_template_strategy_with_parameter(self):
|
||||||
fake_spec = {
|
fake_spec = {
|
||||||
"properties": {
|
"properties": {
|
||||||
"fake1": {
|
"fake1": {
|
||||||
"description": "number parameter example",
|
"description": "number parameter example",
|
||||||
"type": "number",
|
"type": "number",
|
||||||
"default": 3.2,
|
|
||||||
"minimum": 1.0,
|
"minimum": 1.0,
|
||||||
"maximum": 10.2,
|
"maximum": 10.2,
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
'required': ['fake1']
|
||||||
}
|
}
|
||||||
template_uuid = 'e74c40e0-d825-11e2-a28f-0800200c9a67'
|
template_uuid = 'e74c40e0-d825-11e2-a28f-0800200c9a67'
|
||||||
strategy_uuid = 'e74c40e0-d825-11e2-a28f-0800200c9a68'
|
strategy_uuid = 'e74c40e0-d825-11e2-a28f-0800200c9a68'
|
||||||
@@ -987,6 +1011,27 @@ class TestPost(api_base.FunctionalTest):
|
|||||||
self.assertEqual(HTTPStatus.CREATED, response.status_int)
|
self.assertEqual(HTTPStatus.CREATED, response.status_int)
|
||||||
self.assertTrue(response.json['force'])
|
self.assertTrue(response.json['force'])
|
||||||
|
|
||||||
|
@mock.patch.object(deapi.DecisionEngineAPI, 'trigger_audit')
|
||||||
|
def test_create_audit_with_no_goal_no_name(self, mock_trigger_audit):
|
||||||
|
mock_trigger_audit.return_value = mock.ANY
|
||||||
|
|
||||||
|
audit_dict = post_get_test_audit(
|
||||||
|
params_to_exclude=['uuid', 'state', 'interval', 'scope',
|
||||||
|
'next_run_time', 'hostname', 'goal',
|
||||||
|
'audit_template_uuid', 'name'])
|
||||||
|
|
||||||
|
response = self.post_json(
|
||||||
|
'/audits',
|
||||||
|
audit_dict,
|
||||||
|
expect_errors=True,
|
||||||
|
headers={'OpenStack-API-Version': 'infra-optim 1.2'})
|
||||||
|
self.assertEqual('application/json', response.content_type)
|
||||||
|
self.assertEqual(HTTPStatus.BAD_REQUEST, response.status_int)
|
||||||
|
expected_msg = 'A valid goal or audit_template_id must be provided'
|
||||||
|
self.assertTrue(response.json['error_message'])
|
||||||
|
self.assertIn(expected_msg, response.json['error_message'])
|
||||||
|
assert not mock_trigger_audit.called
|
||||||
|
|
||||||
|
|
||||||
class TestDelete(api_base.FunctionalTest):
|
class TestDelete(api_base.FunctionalTest):
|
||||||
|
|
||||||
|
|||||||
@@ -124,3 +124,30 @@ class TestDefaultActionPlanHandler(base.DbTestCase):
|
|||||||
self.context, mock.MagicMock(), self.action_plan.uuid)
|
self.context, mock.MagicMock(), self.action_plan.uuid)
|
||||||
command.execute()
|
command.execute()
|
||||||
self.assertEqual(ap_objects.State.CANCELLED, self.action_plan.state)
|
self.assertEqual(ap_objects.State.CANCELLED, self.action_plan.state)
|
||||||
|
|
||||||
|
@mock.patch.object(objects.ActionPlan, "get_by_uuid")
|
||||||
|
@mock.patch.object(objects.Action, "list")
|
||||||
|
def test_launch_action_plan_failed_actions(self, m_action_list,
|
||||||
|
m_get_action_plan):
|
||||||
|
m_get_action_plan.return_value = self.action_plan
|
||||||
|
failed_action = self.action
|
||||||
|
failed_action.state = objects.action.State.FAILED
|
||||||
|
m_action_list.return_value = [failed_action]
|
||||||
|
command = default.DefaultActionPlanHandler(
|
||||||
|
self.context, mock.MagicMock(), self.action_plan.uuid)
|
||||||
|
command.execute()
|
||||||
|
expected_calls = [
|
||||||
|
mock.call(self.context, self.action_plan,
|
||||||
|
action=objects.fields.NotificationAction.EXECUTION,
|
||||||
|
phase=objects.fields.NotificationPhase.START),
|
||||||
|
mock.call(self.context, self.action_plan,
|
||||||
|
action=objects.fields.NotificationAction.EXECUTION,
|
||||||
|
priority=objects.fields.NotificationPriority.ERROR,
|
||||||
|
phase=objects.fields.NotificationPhase.ERROR)]
|
||||||
|
|
||||||
|
self.assertEqual(ap_objects.State.FAILED, self.action_plan.state)
|
||||||
|
self.assertEqual(
|
||||||
|
expected_calls,
|
||||||
|
self.m_action_plan_notifications
|
||||||
|
.send_action_notification
|
||||||
|
.call_args_list)
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ from watcher.common import cinder_helper
|
|||||||
from watcher.common import clients
|
from watcher.common import clients
|
||||||
from watcher.common import keystone_helper
|
from watcher.common import keystone_helper
|
||||||
from watcher.common import nova_helper
|
from watcher.common import nova_helper
|
||||||
from watcher.common import utils as w_utils
|
|
||||||
from watcher.tests import base
|
from watcher.tests import base
|
||||||
|
|
||||||
|
|
||||||
@@ -102,12 +101,15 @@ class TestMigration(base.TestCase):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fake_volume(**kwargs):
|
def fake_volume(**kwargs):
|
||||||
|
# FIXME(sean-k-mooney): we should be using real objects in this
|
||||||
|
# test or at lease something more Representative of the real data
|
||||||
volume = mock.MagicMock()
|
volume = mock.MagicMock()
|
||||||
volume.id = kwargs.get('id', TestMigration.VOLUME_UUID)
|
volume.id = kwargs.get('id', TestMigration.VOLUME_UUID)
|
||||||
volume.size = kwargs.get('size', '1')
|
volume.size = kwargs.get('size', '1')
|
||||||
volume.status = kwargs.get('status', 'available')
|
volume.status = kwargs.get('status', 'available')
|
||||||
volume.snapshot_id = kwargs.get('snapshot_id', None)
|
volume.snapshot_id = kwargs.get('snapshot_id', None)
|
||||||
volume.availability_zone = kwargs.get('availability_zone', 'nova')
|
volume.availability_zone = kwargs.get('availability_zone', 'nova')
|
||||||
|
volume.attachments = kwargs.get('attachments', [])
|
||||||
return volume
|
return volume
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -175,42 +177,14 @@ class TestMigration(base.TestCase):
|
|||||||
"storage1-typename",
|
"storage1-typename",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_swap_success(self):
|
|
||||||
volume = self.fake_volume(
|
|
||||||
status='in-use', attachments=[{'server_id': 'server_id'}])
|
|
||||||
self.m_n_helper.find_instance.return_value = self.fake_instance()
|
|
||||||
|
|
||||||
new_volume = self.fake_volume(id=w_utils.generate_uuid())
|
|
||||||
user = mock.Mock()
|
|
||||||
session = mock.MagicMock()
|
|
||||||
self.m_k_helper.create_user.return_value = user
|
|
||||||
self.m_k_helper.create_session.return_value = session
|
|
||||||
self.m_c_helper.get_volume.return_value = volume
|
|
||||||
self.m_c_helper.create_volume.return_value = new_volume
|
|
||||||
|
|
||||||
result = self.action_swap.execute()
|
|
||||||
self.assertTrue(result)
|
|
||||||
|
|
||||||
self.m_n_helper.swap_volume.assert_called_once_with(
|
|
||||||
volume,
|
|
||||||
new_volume
|
|
||||||
)
|
|
||||||
self.m_k_helper.delete_user.assert_called_once_with(user)
|
|
||||||
|
|
||||||
def test_swap_fail(self):
|
|
||||||
# _can_swap fail
|
|
||||||
instance = self.fake_instance(status='STOPPED')
|
|
||||||
self.m_n_helper.find_instance.return_value = instance
|
|
||||||
|
|
||||||
result = self.action_swap.execute()
|
|
||||||
self.assertFalse(result)
|
|
||||||
|
|
||||||
def test_can_swap_success(self):
|
def test_can_swap_success(self):
|
||||||
volume = self.fake_volume(
|
volume = self.fake_volume(
|
||||||
status='in-use', attachments=[{'server_id': 'server_id'}])
|
status='in-use', attachments=[
|
||||||
instance = self.fake_instance()
|
{'server_id': TestMigration.INSTANCE_UUID}])
|
||||||
|
|
||||||
|
instance = self.fake_instance()
|
||||||
self.m_n_helper.find_instance.return_value = instance
|
self.m_n_helper.find_instance.return_value = instance
|
||||||
|
|
||||||
result = self.action_swap._can_swap(volume)
|
result = self.action_swap._can_swap(volume)
|
||||||
self.assertTrue(result)
|
self.assertTrue(result)
|
||||||
|
|
||||||
@@ -219,16 +193,33 @@ class TestMigration(base.TestCase):
|
|||||||
result = self.action_swap._can_swap(volume)
|
result = self.action_swap._can_swap(volume)
|
||||||
self.assertTrue(result)
|
self.assertTrue(result)
|
||||||
|
|
||||||
instance = self.fake_instance(status='RESIZED')
|
|
||||||
self.m_n_helper.find_instance.return_value = instance
|
|
||||||
result = self.action_swap._can_swap(volume)
|
|
||||||
self.assertTrue(result)
|
|
||||||
|
|
||||||
def test_can_swap_fail(self):
|
def test_can_swap_fail(self):
|
||||||
|
|
||||||
volume = self.fake_volume(
|
volume = self.fake_volume(
|
||||||
status='in-use', attachments=[{'server_id': 'server_id'}])
|
status='in-use', attachments=[
|
||||||
|
{'server_id': TestMigration.INSTANCE_UUID}])
|
||||||
instance = self.fake_instance(status='STOPPED')
|
instance = self.fake_instance(status='STOPPED')
|
||||||
self.m_n_helper.find_instance.return_value = instance
|
self.m_n_helper.find_instance.return_value = instance
|
||||||
result = self.action_swap._can_swap(volume)
|
result = self.action_swap._can_swap(volume)
|
||||||
self.assertFalse(result)
|
self.assertFalse(result)
|
||||||
|
|
||||||
|
instance = self.fake_instance(status='RESIZED')
|
||||||
|
self.m_n_helper.find_instance.return_value = instance
|
||||||
|
result = self.action_swap._can_swap(volume)
|
||||||
|
self.assertFalse(result)
|
||||||
|
|
||||||
|
def test_swap_success(self):
|
||||||
|
volume = self.fake_volume(
|
||||||
|
status='in-use', attachments=[
|
||||||
|
{'server_id': TestMigration.INSTANCE_UUID}])
|
||||||
|
self.m_c_helper.get_volume.return_value = volume
|
||||||
|
|
||||||
|
instance = self.fake_instance()
|
||||||
|
self.m_n_helper.find_instance.return_value = instance
|
||||||
|
|
||||||
|
result = self.action_swap.execute()
|
||||||
|
self.assertTrue(result)
|
||||||
|
self.m_c_helper.migrate.assert_called_once_with(
|
||||||
|
volume,
|
||||||
|
"storage1-poolname"
|
||||||
|
)
|
||||||
|
|||||||
@@ -146,8 +146,9 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(expected_cpu_usage, result)
|
self.assertEqual(expected_cpu_usage, result)
|
||||||
mock_prometheus_query.assert_called_once_with(
|
mock_prometheus_query.assert_called_once_with(
|
||||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',instance='10.0.1.2:9100'}[300s])) * 100)")
|
"{mode='idle',fqdn='marios-env.controlplane.domain'}[300s]))"
|
||||||
|
" * 100)")
|
||||||
|
|
||||||
@mock.patch.object(prometheus_client.PrometheusAPIClient, 'query')
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, 'query')
|
||||||
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
||||||
@@ -241,7 +242,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
self.assertEqual(expected_cpu_usage, result_cpu)
|
self.assertEqual(expected_cpu_usage, result_cpu)
|
||||||
self.assertIsInstance(result_cpu, float)
|
self.assertIsInstance(result_cpu, float)
|
||||||
mock_prometheus_query.assert_called_once_with(
|
mock_prometheus_query.assert_called_once_with(
|
||||||
"clamp_max((avg by (instance)(rate("
|
"clamp_max((avg by (resource)(rate("
|
||||||
"ceilometer_cpu{resource='uuid-0'}[300s]))"
|
"ceilometer_cpu{resource='uuid-0'}[300s]))"
|
||||||
"/10e+8) *(100/2), 100)"
|
"/10e+8) *(100/2), 100)"
|
||||||
)
|
)
|
||||||
@@ -437,15 +438,48 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
'instance': '10.1.2.3:9100', 'job': 'node',
|
'instance': '10.1.2.3:9100', 'job': 'node',
|
||||||
}},
|
}},
|
||||||
]}}
|
]}}
|
||||||
expected_fqdn_map = {'foo.controlplane.domain': '10.1.2.1:9100',
|
expected_fqdn_list = {'foo.controlplane.domain',
|
||||||
'bar.controlplane.domain': '10.1.2.2:9100',
|
'bar.controlplane.domain',
|
||||||
'baz.controlplane.domain': '10.1.2.3:9100'}
|
'baz.controlplane.domain'}
|
||||||
expected_host_map = {'foo': '10.1.2.1:9100',
|
expected_host_map = {'foo': 'foo.controlplane.domain',
|
||||||
'bar': '10.1.2.2:9100',
|
'bar': 'bar.controlplane.domain',
|
||||||
'baz': '10.1.2.3:9100'}
|
'baz': 'baz.controlplane.domain'}
|
||||||
helper = prometheus_helper.PrometheusHelper()
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
self.assertEqual(helper.prometheus_fqdn_instance_map,
|
self.assertEqual(helper.prometheus_fqdn_labels,
|
||||||
expected_fqdn_map)
|
expected_fqdn_list)
|
||||||
|
self.assertEqual(helper.prometheus_host_instance_map,
|
||||||
|
expected_host_map)
|
||||||
|
|
||||||
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
||||||
|
def test_build_prometheus_fqdn_host_instance_map_dupl_fqdn(
|
||||||
|
self, mock_prometheus_get):
|
||||||
|
mock_prometheus_get.return_value = {'data': {'activeTargets': [
|
||||||
|
{'labels': {
|
||||||
|
'fqdn': 'foo.controlplane.domain',
|
||||||
|
'instance': '10.1.2.1:9100', 'job': 'node',
|
||||||
|
}},
|
||||||
|
{'labels': {
|
||||||
|
'fqdn': 'foo.controlplane.domain',
|
||||||
|
'instance': '10.1.2.1:9229', 'job': 'podman',
|
||||||
|
}},
|
||||||
|
{'labels': {
|
||||||
|
'fqdn': 'bar.controlplane.domain',
|
||||||
|
'instance': '10.1.2.2:9100', 'job': 'node',
|
||||||
|
}},
|
||||||
|
{'labels': {
|
||||||
|
'fqdn': 'baz.controlplane.domain',
|
||||||
|
'instance': '10.1.2.3:9100', 'job': 'node',
|
||||||
|
}},
|
||||||
|
]}}
|
||||||
|
expected_fqdn_list = {'foo.controlplane.domain',
|
||||||
|
'bar.controlplane.domain',
|
||||||
|
'baz.controlplane.domain'}
|
||||||
|
expected_host_map = {'foo': 'foo.controlplane.domain',
|
||||||
|
'bar': 'bar.controlplane.domain',
|
||||||
|
'baz': 'baz.controlplane.domain'}
|
||||||
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
|
self.assertEqual(helper.prometheus_fqdn_labels,
|
||||||
|
expected_fqdn_list)
|
||||||
self.assertEqual(helper.prometheus_host_instance_map,
|
self.assertEqual(helper.prometheus_host_instance_map,
|
||||||
expected_host_map)
|
expected_host_map)
|
||||||
|
|
||||||
@@ -460,7 +494,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
}},
|
}},
|
||||||
]}}
|
]}}
|
||||||
helper = prometheus_helper.PrometheusHelper()
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
self.assertEqual({}, helper.prometheus_fqdn_instance_map)
|
self.assertEqual(set(), helper.prometheus_fqdn_labels)
|
||||||
self.assertEqual({}, helper.prometheus_host_instance_map)
|
self.assertEqual({}, helper.prometheus_host_instance_map)
|
||||||
|
|
||||||
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
||||||
@@ -476,12 +510,29 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
}},
|
}},
|
||||||
]}}
|
]}}
|
||||||
helper = prometheus_helper.PrometheusHelper()
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
expected_fqdn_map = {'ena': '10.1.2.1:9100',
|
expected_fqdn_list = {'ena', 'dyo'}
|
||||||
'dyo': '10.1.2.2:9100'}
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
helper.prometheus_fqdn_instance_map, expected_fqdn_map)
|
helper.prometheus_fqdn_labels, expected_fqdn_list)
|
||||||
self.assertEqual({}, helper.prometheus_host_instance_map)
|
self.assertEqual({}, helper.prometheus_host_instance_map)
|
||||||
|
|
||||||
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
||||||
|
def test_using_ips_not_fqdn(self, mock_prometheus_get):
|
||||||
|
mock_prometheus_get.return_value = {'data': {'activeTargets': [
|
||||||
|
{'labels': {
|
||||||
|
'ip_label': '10.1.2.1',
|
||||||
|
'instance': '10.1.2.1:9100', 'job': 'node',
|
||||||
|
}},
|
||||||
|
{'labels': {
|
||||||
|
'ip_label': '10.1.2.2',
|
||||||
|
'instance': '10.1.2.2:9100', 'job': 'node',
|
||||||
|
}},
|
||||||
|
]}}
|
||||||
|
cfg.CONF.prometheus_client.fqdn_label = 'ip_label'
|
||||||
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
|
expected_fqdn_list = {'10.1.2.1', '10.1.2.2'}
|
||||||
|
self.assertEqual(
|
||||||
|
helper.prometheus_fqdn_labels, expected_fqdn_list)
|
||||||
|
|
||||||
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
@mock.patch.object(prometheus_client.PrometheusAPIClient, '_get')
|
||||||
def test_override_prometheus_fqdn_label(self, mock_prometheus_get):
|
def test_override_prometheus_fqdn_label(self, mock_prometheus_get):
|
||||||
mock_prometheus_get.return_value = {'data': {'activeTargets': [
|
mock_prometheus_get.return_value = {'data': {'activeTargets': [
|
||||||
@@ -494,19 +545,19 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
'instance': '10.1.2.2:9100', 'job': 'node',
|
'instance': '10.1.2.2:9100', 'job': 'node',
|
||||||
}},
|
}},
|
||||||
]}}
|
]}}
|
||||||
expected_fqdn_map = {'foo.controlplane.domain': '10.1.2.1:9100',
|
expected_fqdn_list = {'foo.controlplane.domain',
|
||||||
'bar.controlplane.domain': '10.1.2.2:9100'}
|
'bar.controlplane.domain'}
|
||||||
expected_host_map = {'foo': '10.1.2.1:9100',
|
expected_host_map = {'foo': 'foo.controlplane.domain',
|
||||||
'bar': '10.1.2.2:9100'}
|
'bar': 'bar.controlplane.domain'}
|
||||||
cfg.CONF.prometheus_client.fqdn_label = 'custom_fqdn_label'
|
cfg.CONF.prometheus_client.fqdn_label = 'custom_fqdn_label'
|
||||||
helper = prometheus_helper.PrometheusHelper()
|
helper = prometheus_helper.PrometheusHelper()
|
||||||
self.assertEqual(helper.prometheus_fqdn_instance_map,
|
self.assertEqual(helper.prometheus_fqdn_labels,
|
||||||
expected_fqdn_map)
|
expected_fqdn_list)
|
||||||
self.assertEqual(helper.prometheus_host_instance_map,
|
self.assertEqual(helper.prometheus_host_instance_map,
|
||||||
expected_host_map)
|
expected_host_map)
|
||||||
|
|
||||||
def test_resolve_prometheus_instance_label(self):
|
def test_resolve_prometheus_instance_label(self):
|
||||||
expected_instance_label = '10.0.1.2:9100'
|
expected_instance_label = 'marios-env.controlplane.domain'
|
||||||
result = self.helper._resolve_prometheus_instance_label(
|
result = self.helper._resolve_prometheus_instance_label(
|
||||||
'marios-env.controlplane.domain')
|
'marios-env.controlplane.domain')
|
||||||
self.assertEqual(result, expected_instance_label)
|
self.assertEqual(result, expected_instance_label)
|
||||||
@@ -524,34 +575,53 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
|
|
||||||
def test_build_prometheus_query_node_cpu_avg_agg(self):
|
def test_build_prometheus_query_node_cpu_avg_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"100 - (avg by (instance)(rate(node_cpu_seconds_total"
|
"100 - (avg by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',instance='a_host'}[111s])) * 100)")
|
"{mode='idle',fqdn='a_host'}[111s])) * 100)")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||||
self.assertEqual(result, expected_query)
|
self.assertEqual(result, expected_query)
|
||||||
|
|
||||||
def test_build_prometheus_query_node_cpu_max_agg(self):
|
def test_build_prometheus_query_node_cpu_max_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"100 - (max by (instance)(rate(node_cpu_seconds_total"
|
"100 - (max by (fqdn)(rate(node_cpu_seconds_total"
|
||||||
"{mode='idle',instance='b_host'}[444s])) * 100)")
|
"{mode='idle',fqdn='b_host'}[444s])) * 100)")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'max', 'node_cpu_seconds_total', 'b_host', '444')
|
'max', 'node_cpu_seconds_total', 'b_host', '444')
|
||||||
self.assertEqual(result, expected_query)
|
self.assertEqual(result, expected_query)
|
||||||
|
|
||||||
def test_build_prometheus_query_node_memory_avg_agg(self):
|
def test_build_prometheus_query_node_memory_avg_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"(node_memory_MemTotal_bytes{instance='c_host'} - avg_over_time"
|
"(node_memory_MemTotal_bytes{fqdn='c_host'} - avg_over_time"
|
||||||
"(node_memory_MemAvailable_bytes{instance='c_host'}[555s])) "
|
"(node_memory_MemAvailable_bytes{fqdn='c_host'}[555s])) "
|
||||||
"/ 1024 / 1024")
|
"/ 1024")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'avg', 'node_memory_MemAvailable_bytes', 'c_host', '555')
|
'avg', 'node_memory_MemAvailable_bytes', 'c_host', '555')
|
||||||
self.assertEqual(result, expected_query)
|
self.assertEqual(result, expected_query)
|
||||||
|
|
||||||
def test_build_prometheus_query_node_memory_min_agg(self):
|
def test_build_prometheus_query_node_memory_min_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"(node_memory_MemTotal_bytes{instance='d_host'} - min_over_time"
|
"(node_memory_MemTotal_bytes{fqdn='d_host'} - min_over_time"
|
||||||
"(node_memory_MemAvailable_bytes{instance='d_host'}[222s])) "
|
"(node_memory_MemAvailable_bytes{fqdn='d_host'}[222s])) "
|
||||||
"/ 1024 / 1024")
|
"/ 1024")
|
||||||
|
result = self.helper._build_prometheus_query(
|
||||||
|
'min', 'node_memory_MemAvailable_bytes', 'd_host', '222')
|
||||||
|
self.assertEqual(result, expected_query)
|
||||||
|
|
||||||
|
def test_build_prometheus_query_node_cpu_avg_agg_custom_label(self):
|
||||||
|
self.helper.prometheus_fqdn_label = 'custom_fqdn_label'
|
||||||
|
expected_query = (
|
||||||
|
"100 - (avg by (custom_fqdn_label)(rate(node_cpu_seconds_total"
|
||||||
|
"{mode='idle',custom_fqdn_label='a_host'}[111s])) * 100)")
|
||||||
|
result = self.helper._build_prometheus_query(
|
||||||
|
'avg', 'node_cpu_seconds_total', 'a_host', '111')
|
||||||
|
self.assertEqual(result, expected_query)
|
||||||
|
|
||||||
|
def test_build_prometheus_query_node_memory_min_agg_custom_label(self):
|
||||||
|
self.helper.prometheus_fqdn_label = 'custom_fqdn'
|
||||||
|
expected_query = (
|
||||||
|
"(node_memory_MemTotal_bytes{custom_fqdn='d_host'} - min_over_time"
|
||||||
|
"(node_memory_MemAvailable_bytes{custom_fqdn='d_host'}[222s])) "
|
||||||
|
"/ 1024")
|
||||||
result = self.helper._build_prometheus_query(
|
result = self.helper._build_prometheus_query(
|
||||||
'min', 'node_memory_MemAvailable_bytes', 'd_host', '222')
|
'min', 'node_memory_MemAvailable_bytes', 'd_host', '222')
|
||||||
self.assertEqual(result, expected_query)
|
self.assertEqual(result, expected_query)
|
||||||
@@ -574,7 +644,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
|
|
||||||
def test_build_prometheus_query_instance_cpu_avg_agg(self):
|
def test_build_prometheus_query_instance_cpu_avg_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"clamp_max((avg by (instance)(rate("
|
"clamp_max((avg by (resource)(rate("
|
||||||
"ceilometer_cpu{resource='uuid-0'}[222s]))"
|
"ceilometer_cpu{resource='uuid-0'}[222s]))"
|
||||||
"/10e+8) *(100/2), 100)"
|
"/10e+8) *(100/2), 100)"
|
||||||
)
|
)
|
||||||
@@ -585,7 +655,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
|
|
||||||
def test_build_prometheus_query_instance_cpu_max_agg(self):
|
def test_build_prometheus_query_instance_cpu_max_agg(self):
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"clamp_max((max by (instance)(rate("
|
"clamp_max((max by (resource)(rate("
|
||||||
"ceilometer_cpu{resource='uuid-0'}[555s]))"
|
"ceilometer_cpu{resource='uuid-0'}[555s]))"
|
||||||
"/10e+8) *(100/4), 100)"
|
"/10e+8) *(100/4), 100)"
|
||||||
)
|
)
|
||||||
@@ -629,7 +699,7 @@ class TestPrometheusHelper(base.BaseTestCase):
|
|||||||
def test_prometheus_query_custom_uuid_label(self, mock_prometheus_get):
|
def test_prometheus_query_custom_uuid_label(self, mock_prometheus_get):
|
||||||
cfg.CONF.prometheus_client.instance_uuid_label = 'custom_uuid_label'
|
cfg.CONF.prometheus_client.instance_uuid_label = 'custom_uuid_label'
|
||||||
expected_query = (
|
expected_query = (
|
||||||
"clamp_max((max by (instance)"
|
"clamp_max((max by (custom_uuid_label)"
|
||||||
"(rate(ceilometer_cpu{custom_uuid_label='uuid-0'}[555s]))"
|
"(rate(ceilometer_cpu{custom_uuid_label='uuid-0'}[555s]))"
|
||||||
"/10e+8) *(100/4), 100)"
|
"/10e+8) *(100/4), 100)"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -111,7 +111,7 @@ class TestHostMaintenance(TestBaseStrategy):
|
|||||||
self.strategy.instance_migration(instance_0, node_0, node_1)
|
self.strategy.instance_migration(instance_0, node_0, node_1)
|
||||||
self.assertEqual(1, len(self.strategy.solution.actions))
|
self.assertEqual(1, len(self.strategy.solution.actions))
|
||||||
expected = [{'action_type': 'migrate',
|
expected = [{'action_type': 'migrate',
|
||||||
'input_parameters': {'destination_node': node_1.uuid,
|
'input_parameters': {'destination_node': node_1.hostname,
|
||||||
'source_node': node_0.uuid,
|
'source_node': node_0.uuid,
|
||||||
'migration_type': 'live',
|
'migration_type': 'live',
|
||||||
'resource_id': instance_0.uuid,
|
'resource_id': instance_0.uuid,
|
||||||
@@ -144,14 +144,14 @@ class TestHostMaintenance(TestBaseStrategy):
|
|||||||
self.strategy.host_migration(node_0, node_1)
|
self.strategy.host_migration(node_0, node_1)
|
||||||
self.assertEqual(2, len(self.strategy.solution.actions))
|
self.assertEqual(2, len(self.strategy.solution.actions))
|
||||||
expected = [{'action_type': 'migrate',
|
expected = [{'action_type': 'migrate',
|
||||||
'input_parameters': {'destination_node': node_1.uuid,
|
'input_parameters': {'destination_node': node_1.hostname,
|
||||||
'source_node': node_0.uuid,
|
'source_node': node_0.uuid,
|
||||||
'migration_type': 'live',
|
'migration_type': 'live',
|
||||||
'resource_id': instance_0.uuid,
|
'resource_id': instance_0.uuid,
|
||||||
'resource_name': instance_0.name
|
'resource_name': instance_0.name
|
||||||
}},
|
}},
|
||||||
{'action_type': 'migrate',
|
{'action_type': 'migrate',
|
||||||
'input_parameters': {'destination_node': node_1.uuid,
|
'input_parameters': {'destination_node': node_1.hostname,
|
||||||
'source_node': node_0.uuid,
|
'source_node': node_0.uuid,
|
||||||
'migration_type': 'live',
|
'migration_type': 'live',
|
||||||
'resource_id': instance_1.uuid,
|
'resource_id': instance_1.uuid,
|
||||||
@@ -167,12 +167,15 @@ class TestHostMaintenance(TestBaseStrategy):
|
|||||||
node_1 = model.get_node_by_uuid('Node_1')
|
node_1 = model.get_node_by_uuid('Node_1')
|
||||||
self.assertFalse(self.strategy.safe_maintain(node_0))
|
self.assertFalse(self.strategy.safe_maintain(node_0))
|
||||||
self.assertFalse(self.strategy.safe_maintain(node_1))
|
self.assertFalse(self.strategy.safe_maintain(node_1))
|
||||||
|
# It will return true, if backup node is passed
|
||||||
|
self.assertTrue(self.strategy.safe_maintain(node_0, node_1))
|
||||||
|
|
||||||
model = self.fake_c_cluster.\
|
model = self.fake_c_cluster.\
|
||||||
generate_scenario_1_with_all_nodes_disable()
|
generate_scenario_1_with_all_nodes_disable()
|
||||||
self.m_c_model.return_value = model
|
self.m_c_model.return_value = model
|
||||||
node_0 = model.get_node_by_uuid('Node_0')
|
node_0 = model.get_node_by_uuid('Node_0')
|
||||||
self.assertTrue(self.strategy.safe_maintain(node_0))
|
# It will return false, if there is no backup node
|
||||||
|
self.assertFalse(self.strategy.safe_maintain(node_0))
|
||||||
|
|
||||||
def test_try_maintain(self):
|
def test_try_maintain(self):
|
||||||
model = self.fake_c_cluster.generate_scenario_1()
|
model = self.fake_c_cluster.generate_scenario_1()
|
||||||
@@ -213,7 +216,7 @@ class TestHostMaintenance(TestBaseStrategy):
|
|||||||
'disabled_reason': 'watcher_maintaining'}},
|
'disabled_reason': 'watcher_maintaining'}},
|
||||||
{'action_type': 'migrate',
|
{'action_type': 'migrate',
|
||||||
'input_parameters': {
|
'input_parameters': {
|
||||||
'destination_node': node_3.uuid,
|
'destination_node': node_3.hostname,
|
||||||
'source_node': node_2.uuid,
|
'source_node': node_2.uuid,
|
||||||
'migration_type': 'live',
|
'migration_type': 'live',
|
||||||
'resource_id': instance_4.uuid,
|
'resource_id': instance_4.uuid,
|
||||||
|
|||||||
@@ -212,11 +212,71 @@ class TestWorkloadStabilization(TestBaseStrategy):
|
|||||||
len(self.strategy.simulate_migrations(self.hosts_load_assert)))
|
len(self.strategy.simulate_migrations(self.hosts_load_assert)))
|
||||||
|
|
||||||
def test_check_threshold(self):
|
def test_check_threshold(self):
|
||||||
|
|
||||||
|
# sd for 0.05, 0.05, 0.07, 0.07, 0.8
|
||||||
|
test_cpu_sd = 0.296
|
||||||
|
|
||||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
self.strategy.thresholds = {'instance_cpu_usage': 0.001,
|
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||||
'instance_ram_usage': 0.2}
|
'instance_ram_usage': 0.2}
|
||||||
|
|
||||||
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
self.assertTrue(self.strategy.check_threshold())
|
self.assertTrue(self.strategy.check_threshold())
|
||||||
|
self.assertEqual(
|
||||||
|
round(self.strategy.sd_before_audit, 3),
|
||||||
|
test_cpu_sd)
|
||||||
|
|
||||||
|
def test_check_threshold_cpu(self):
|
||||||
|
|
||||||
|
test_cpu_sd = 0.296
|
||||||
|
|
||||||
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.strategy.metrics = ["instance_cpu_usage"]
|
||||||
|
self.strategy.thresholds = {'instance_cpu_usage': 0.25}
|
||||||
|
|
||||||
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
|
self.assertTrue(self.strategy.check_threshold())
|
||||||
|
self.assertEqual(
|
||||||
|
round(self.strategy.sd_before_audit, 3),
|
||||||
|
test_cpu_sd)
|
||||||
|
|
||||||
|
def test_check_threshold_ram(self):
|
||||||
|
|
||||||
|
# sd for 4,5,7,8, 29 MB used in 132MB total memory hosts
|
||||||
|
test_ram_sd = 0.071
|
||||||
|
|
||||||
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.strategy.metrics = ["instance_ram_usage"]
|
||||||
|
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||||
|
'instance_ram_usage': 0.05}
|
||||||
|
|
||||||
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
|
self.assertTrue(self.strategy.check_threshold())
|
||||||
|
self.assertEqual(
|
||||||
|
round(self.strategy.sd_before_audit, 3),
|
||||||
|
test_ram_sd)
|
||||||
|
|
||||||
|
def test_check_threshold_fail(self):
|
||||||
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.strategy.thresholds = {'instance_cpu_usage': 0.3,
|
||||||
|
'instance_ram_usage': 0.2}
|
||||||
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
|
self.assertFalse(self.strategy.check_threshold())
|
||||||
|
|
||||||
|
def test_check_threshold_cpu_fail(self):
|
||||||
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.strategy.metrics = ["instance_cpu_usage"]
|
||||||
|
self.strategy.thresholds = {'instance_cpu_usage': 0.4}
|
||||||
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
|
self.assertFalse(self.strategy.check_threshold())
|
||||||
|
|
||||||
|
def test_check_threshold_ram_fail(self):
|
||||||
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
self.strategy.metrics = ["instance_ram_usage"]
|
||||||
|
self.strategy.thresholds = {'instance_cpu_usage': 0.25,
|
||||||
|
'instance_ram_usage': 0.1}
|
||||||
|
self.strategy.simulate_migrations = mock.Mock(return_value=True)
|
||||||
|
self.assertFalse(self.strategy.check_threshold())
|
||||||
|
|
||||||
def test_execute_one_migration(self):
|
def test_execute_one_migration(self):
|
||||||
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
self.m_c_model.return_value = self.fake_c_cluster.generate_scenario_1()
|
||||||
|
|||||||
@@ -320,7 +320,10 @@ class TestZoneMigration(TestBaseStrategy):
|
|||||||
migration_types = collections.Counter(
|
migration_types = collections.Counter(
|
||||||
[action.get('input_parameters')['migration_type']
|
[action.get('input_parameters')['migration_type']
|
||||||
for action in solution.actions])
|
for action in solution.actions])
|
||||||
self.assertEqual(1, migration_types.get("swap", 0))
|
# watcher no longer implements swap. it is now an
|
||||||
|
# alias for migrate.
|
||||||
|
self.assertEqual(0, migration_types.get("swap", 0))
|
||||||
|
self.assertEqual(1, migration_types.get("migrate", 1))
|
||||||
global_efficacy_value = solution.global_efficacy[3].get('value', 0)
|
global_efficacy_value = solution.global_efficacy[3].get('value', 0)
|
||||||
self.assertEqual(100, global_efficacy_value)
|
self.assertEqual(100, global_efficacy_value)
|
||||||
|
|
||||||
|
|||||||
0
watcher/wsgi/__init__.py
Normal file
0
watcher/wsgi/__init__.py
Normal file
18
watcher/wsgi/api.py
Normal file
18
watcher/wsgi/api.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
"""WSGI application entry-point for Watcher API."""
|
||||||
|
import threading
|
||||||
|
from watcher.api import wsgi
|
||||||
|
application = None
|
||||||
|
with threading.Lock():
|
||||||
|
if application is None:
|
||||||
|
application = wsgi.initialize_wsgi_app()
|
||||||
Reference in New Issue
Block a user