diff --git a/aodh/evaluator/threshold.py b/aodh/evaluator/threshold.py index 885e23692..20516b362 100644 --- a/aodh/evaluator/threshold.py +++ b/aodh/evaluator/threshold.py @@ -19,6 +19,7 @@ import operator import six from ceilometerclient import client as ceiloclient +from oslo_config import cfg from oslo_log import log from oslo_utils import timeutils @@ -38,11 +39,20 @@ COMPARATORS = { 'ne': operator.ne, } +OPTS = [ + cfg.IntOpt('additional_ingestion_lag', + min=0, + default=0, + help='The number of seconds to extend the evaluation windows ' + 'to compensate the reporting/ingestion lag.') +] + class ThresholdEvaluator(evaluator.Evaluator): # the sliding evaluation window is extended to allow - # for reporting/ingestion lag + # the reporting/ingestion lag this can be increased + # with 'additional_ingestion_lag' seconds if needed. look_back = 1 def __init__(self, conf): @@ -63,17 +73,17 @@ class ThresholdEvaluator(evaluator.Evaluator): return self._cm_client - @classmethod - def _bound_duration(cls, rule): + def _bound_duration(self, rule): """Bound the duration of the statistics query.""" now = timeutils.utcnow() # when exclusion of weak datapoints is enabled, we extend # the look-back period so as to allow a clearer sample count # trend to be established - look_back = (cls.look_back if not rule.get('exclude_outliers') + look_back = (self.look_back if not rule.get('exclude_outliers') else rule['evaluation_periods']) window = ((rule.get('period', None) or rule['granularity']) - * (rule['evaluation_periods'] + look_back)) + * (rule['evaluation_periods'] + look_back) + + self.conf.additional_ingestion_lag) start = now - datetime.timedelta(seconds=window) LOG.debug('query stats from %(start)s to ' '%(now)s', {'start': start, 'now': now}) diff --git a/aodh/opts.py b/aodh/opts.py index cec31eeae..dbc02f2d1 100644 --- a/aodh/opts.py +++ b/aodh/opts.py @@ -35,6 +35,7 @@ def list_opts(): itertools.chain( aodh.evaluator.OPTS, aodh.evaluator.event.OPTS, + aodh.evaluator.threshold.OPTS, aodh.notifier.rest.OPTS, aodh.queue.OPTS, aodh.service.OPTS)), diff --git a/aodh/tests/unit/evaluator/test_threshold.py b/aodh/tests/unit/evaluator/test_threshold.py index 30b5f7e90..1134215b0 100644 --- a/aodh/tests/unit/evaluator/test_threshold.py +++ b/aodh/tests/unit/evaluator/test_threshold.py @@ -193,6 +193,56 @@ class TestEvaluate(base.TestEvaluatorBase): in zip(self.alarms, reasons, reason_datas)] self.assertEqual(expected, self.notifier.notify.call_args_list) + @mock.patch.object(timeutils, 'utcnow') + def test_lag_configuration(self, mock_utcnow): + mock_utcnow.return_value = datetime.datetime(2012, 7, 2, 10, 45) + self.api_client.statistics.list.side_effect = [] + + self._set_all_alarms('ok') + self._evaluate_all_alarms() + self._set_all_alarms('ok') + self.conf.set_override("additional_ingestion_lag", 42) + self._evaluate_all_alarms() + + self.assertEqual([ + mock.call( + meter_name='cpu_util', period=60, + q=[{'value': 'cpu_util', 'op': 'eq', 'field': 'meter'}, + {'value': 'my_instance', 'op': 'eq', + 'field': 'resource_id'}, + {'value': '2012-07-02T10:45:00', 'op': 'le', + 'field': 'timestamp'}, + {'value': '2012-07-02T10:39:00', 'op': 'ge', + 'field': 'timestamp'}]), + mock.call( + meter_name='cpu_util', period=300, + q=[{'value': 'cpu_util', 'op': 'eq', 'field': 'meter'}, + {'value': 'my_group', 'op': 'eq', + 'field': 'metadata.user_metadata.AS'}, + {'value': '2012-07-02T10:45:00', 'op': 'le', + 'field': 'timestamp'}, + {'value': '2012-07-02T10:20:00', 'op': 'ge', + 'field': 'timestamp'}]), + mock.call( + meter_name='cpu_util', period=60, + q=[{'value': 'cpu_util', 'op': 'eq', 'field': 'meter'}, + {'value': 'my_instance', 'op': 'eq', + 'field': 'resource_id'}, + {'value': '2012-07-02T10:45:00', 'op': 'le', + 'field': 'timestamp'}, + {'value': '2012-07-02T10:38:18', 'op': 'ge', + 'field': 'timestamp'}]), + mock.call( + meter_name='cpu_util', period=300, + q=[{'value': 'cpu_util', 'op': 'eq', 'field': 'meter'}, + {'value': 'my_group', 'op': 'eq', + 'field': 'metadata.user_metadata.AS'}, + {'value': '2012-07-02T10:45:00', 'op': 'le', + 'field': 'timestamp'}, + {'value': '2012-07-02T10:19:18', 'op': 'ge', + 'field': 'timestamp'}])], + self.api_client.statistics.list.mock_calls) + def test_simple_alarm_clear(self): self._set_all_alarms('alarm') avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v) diff --git a/releasenotes/notes/ingestion-lag-2317725887287fbc.yaml b/releasenotes/notes/ingestion-lag-2317725887287fbc.yaml new file mode 100644 index 000000000..dc2b52c19 --- /dev/null +++ b/releasenotes/notes/ingestion-lag-2317725887287fbc.yaml @@ -0,0 +1,7 @@ +--- +features: + - Allow to extends the alarm evaluation windows to to compensate the + reporting/ingestion lag. + + An new option is introduced additional_ingestion_lag defaulted to 0. + It represents the number of seconds of the window extension.