From fd6562382e394ed79a2717addeb8cb7123a69355 Mon Sep 17 00:00:00 2001 From: Lucian Petrut Date: Wed, 11 Oct 2023 10:18:09 +0300 Subject: [PATCH] Avoid performing retries in case of missing resources There may be no available metrics for instances that are stopped or were recently spawned. This makes retries unnecessary and time consuming. For this reason, we'll ignore gnocchi MetricNotFound errors. Change-Id: I79cd03bf04db634b931d6dfd32d5150f58e82044 --- watcher/decision_engine/datasources/base.py | 10 +++++++++- watcher/decision_engine/datasources/gnocchi.py | 17 +++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/watcher/decision_engine/datasources/base.py b/watcher/decision_engine/datasources/base.py index 7a58d3e64..fa1524b45 100644 --- a/watcher/decision_engine/datasources/base.py +++ b/watcher/decision_engine/datasources/base.py @@ -63,7 +63,7 @@ class DataSourceBase(object): raise exception.MetricNotAvailable(metric=meter_name) return meter - def query_retry(self, f, *args, **kwargs): + def query_retry(self, f, *args, ignored_exc=None, **kwargs): """Attempts to retrieve metrics from the external service Attempts to access data from the external service and handles @@ -71,15 +71,23 @@ class DataSourceBase(object): to the value of query_max_retries :param f: The method that performs the actual querying for metrics :param args: Array of arguments supplied to the method + :param ignored_exc: An exception or tuple of exceptions that shouldn't + be retried, for example "NotFound" exceptions. :param kwargs: The amount of arguments supplied to the method :return: The value as retrieved from the external service """ num_retries = CONF.watcher_datasources.query_max_retries timeout = CONF.watcher_datasources.query_timeout + ignored_exc = ignored_exc or tuple() + for i in range(num_retries): try: return f(*args, **kwargs) + except ignored_exc as e: + LOG.debug("Got an ignored exception (%s) while calling: %s ", + e, f) + return except Exception as e: LOG.exception(e) self.query_retry_reset(e) diff --git a/watcher/decision_engine/datasources/gnocchi.py b/watcher/decision_engine/datasources/gnocchi.py index 6a52845ca..a8677350f 100644 --- a/watcher/decision_engine/datasources/gnocchi.py +++ b/watcher/decision_engine/datasources/gnocchi.py @@ -19,6 +19,7 @@ from datetime import datetime from datetime import timedelta +from gnocchiclient import exceptions as gnc_exc from oslo_config import cfg from oslo_log import log @@ -84,7 +85,9 @@ class GnocchiHelper(base.DataSourceBase): kwargs = dict(query={"=": {"original_resource_id": resource_id}}, limit=1) resources = self.query_retry( - f=self.gnocchi.resource.search, **kwargs) + f=self.gnocchi.resource.search, + ignored_exc=gnc_exc.NotFound, + **kwargs) if not resources: LOG.warning("The {0} resource {1} could not be " @@ -105,7 +108,9 @@ class GnocchiHelper(base.DataSourceBase): kwargs = {k: v for k, v in raw_kwargs.items() if k and v} statistics = self.query_retry( - f=self.gnocchi.metric.get_measures, **kwargs) + f=self.gnocchi.metric.get_measures, + ignored_exc=gnc_exc.NotFound, + **kwargs) return_value = None if statistics: @@ -132,7 +137,9 @@ class GnocchiHelper(base.DataSourceBase): kwargs = dict(query={"=": {"original_resource_id": resource_id}}, limit=1) resources = self.query_retry( - f=self.gnocchi.resource.search, **kwargs) + f=self.gnocchi.resource.search, + ignored_exc=gnc_exc.NotFound, + **kwargs) if not resources: LOG.warning("The {0} resource {1} could not be " @@ -152,7 +159,9 @@ class GnocchiHelper(base.DataSourceBase): kwargs = {k: v for k, v in raw_kwargs.items() if k and v} statistics = self.query_retry( - f=self.gnocchi.metric.get_measures, **kwargs) + f=self.gnocchi.metric.get_measures, + ignored_exc=gnc_exc.NotFound, + **kwargs) return_value = None if statistics: