diff --git a/aodh/evaluator/gnocchi.py b/aodh/evaluator/gnocchi.py index beb6b955c..455f62182 100644 --- a/aodh/evaluator/gnocchi.py +++ b/aodh/evaluator/gnocchi.py @@ -32,63 +32,75 @@ OPTS = [ ] -class GnocchiThresholdEvaluator(threshold.ThresholdEvaluator): - +class GnocchiBase(threshold.ThresholdEvaluator): def __init__(self, conf): - super(threshold.ThresholdEvaluator, self).__init__(conf) + super(GnocchiBase, self).__init__(conf) self._gnocchi_client = client.Client( '1', keystone_client.get_session(conf), interface=conf.service_credentials.interface, region_name=conf.service_credentials.region_name, endpoint_override=conf.gnocchi_url) - def _statistics(self, alarm, start, end): - """Retrieve statistics over the current window.""" - try: - if alarm.type == 'gnocchi_aggregation_by_resources_threshold': - # FIXME(sileht): In case of a heat autoscaling stack decide to - # delete an instance, the gnocchi metrics associated to this - # instance will be no more updated and when the alarm will ask - # for the aggregation, gnocchi will raise a 'No overlap' - # exception. - # So temporary set 'needed_overlap' to 0 to disable the - # gnocchi checks about missing points. For more detail see: - # https://bugs.launchpad.net/gnocchi/+bug/1479429 - return self._gnocchi_client.metric.aggregation( - metrics=alarm.rule['metric'], - query=jsonutils.loads(alarm.rule['query']), - resource_type=alarm.rule["resource_type"], - start=start, stop=end, - aggregation=alarm.rule['aggregation_method'], - needed_overlap=0, - ) - elif alarm.type == 'gnocchi_aggregation_by_metrics_threshold': - return self._gnocchi_client.metric.aggregation( - metrics=alarm.rule['metrics'], - start=start, stop=end, - aggregation=alarm.rule['aggregation_method']) - elif alarm.type == 'gnocchi_resources_threshold': - return self._gnocchi_client.metric.get_measures( - metric=alarm.rule['metric'], - start=start, stop=end, - resource_id=alarm.rule['resource_id'], - aggregation=alarm.rule['aggregation_method']) - except Exception: - LOG.exception(_('alarm stats retrieval failed')) - return [] - @staticmethod - def _sanitize(alarm, statistics): + def _sanitize(rule, statistics): """Return the datapoints that correspond to the alarm granularity""" # TODO(sileht): if there's no direct match, but there is an archive # policy with granularity that's an even divisor or the period, # we could potentially do a mean-of-means (or max-of-maxes or whatever, # but not a stddev-of-stddevs). # TODO(sileht): support alarm['exclude_outliers'] - LOG.error('sanitize (%s) stats %s', alarm.rule['granularity'], - statistics) + LOG.debug('sanitize stats %s', statistics) statistics = [stats[2] for stats in statistics - if stats[1] == alarm.rule['granularity']] - statistics = statistics[-alarm.rule['evaluation_periods']:] - LOG.error('pruned statistics to %d', len(statistics)) + if stats[1] == rule['granularity']] + statistics = statistics[-rule['evaluation_periods']:] + LOG.debug('pruned statistics to %d', len(statistics)) return statistics + + +class GnocchiResourceThresholdEvaluator(GnocchiBase): + def _statistics(self, rule, start, end): + try: + return self._gnocchi_client.metric.get_measures( + metric=rule['metric'], + start=start, stop=end, + resource_id=rule['resource_id'], + aggregation=rule['aggregation_method']) + except Exception: + LOG.exception(_('alarm stats retrieval failed')) + return [] + + +class GnocchiAggregationMetricsThresholdEvaluator(GnocchiBase): + def _statistics(self, rule, start, end): + try: + return self._gnocchi_client.metric.aggregation( + metrics=rule['metrics'], + start=start, stop=end, + aggregation=rule['aggregation_method']) + except Exception: + LOG.exception(_('alarm stats retrieval failed')) + return [] + + +class GnocchiAggregationResourcesThresholdEvaluator(GnocchiBase): + def _statistics(self, rule, start, end): + # FIXME(sileht): In case of a heat autoscaling stack decide to + # delete an instance, the gnocchi metrics associated to this + # instance will be no more updated and when the alarm will ask + # for the aggregation, gnocchi will raise a 'No overlap' + # exception. + # So temporary set 'needed_overlap' to 0 to disable the + # gnocchi checks about missing points. For more detail see: + # https://bugs.launchpad.net/gnocchi/+bug/1479429 + try: + return self._gnocchi_client.metric.aggregation( + metrics=rule['metric'], + query=jsonutils.loads(rule['query']), + resource_type=rule["resource_type"], + start=start, stop=end, + aggregation=rule['aggregation_method'], + needed_overlap=0, + ) + except Exception: + LOG.exception(_('alarm stats retrieval failed')) + return [] diff --git a/aodh/evaluator/threshold.py b/aodh/evaluator/threshold.py index d1e53a739..b220c351a 100644 --- a/aodh/evaluator/threshold.py +++ b/aodh/evaluator/threshold.py @@ -64,26 +64,26 @@ class ThresholdEvaluator(evaluator.Evaluator): return self._cm_client @classmethod - def _bound_duration(cls, alarm): + def _bound_duration(cls, rule): """Bound the duration of the statistics query.""" now = timeutils.utcnow() # when exclusion of weak datapoints is enabled, we extend # the look-back period so as to allow a clearer sample count # trend to be established - look_back = (cls.look_back if not alarm.rule.get('exclude_outliers') - else alarm.rule['evaluation_periods']) - window = ((alarm.rule.get('period', None) or alarm.rule['granularity']) - * (alarm.rule['evaluation_periods'] + look_back)) + look_back = (cls.look_back if not rule.get('exclude_outliers') + else rule['evaluation_periods']) + window = ((rule.get('period', None) or rule['granularity']) + * (rule['evaluation_periods'] + look_back)) start = now - datetime.timedelta(seconds=window) LOG.debug('query stats from %(start)s to ' '%(now)s', {'start': start, 'now': now}) return start.isoformat(), now.isoformat() @staticmethod - def _sanitize(alarm, statistics): + def _sanitize(rule, statistics): """Sanitize statistics.""" LOG.debug('sanitize stats %s', statistics) - if alarm.rule.get('exclude_outliers'): + if rule.get('exclude_outliers'): key = operator.attrgetter('count') mean = utils.mean(statistics, key) stddev = utils.stddev(statistics, key, mean) @@ -99,50 +99,27 @@ class ThresholdEvaluator(evaluator.Evaluator): # in practice statistics are always sorted by period start, not # strictly required by the API though - statistics = statistics[-alarm.rule['evaluation_periods']:] - result_statistics = [getattr(stat, alarm.rule['statistic']) + statistics = statistics[-rule['evaluation_periods']:] + result_statistics = [getattr(stat, rule['statistic']) for stat in statistics] LOG.debug('pruned statistics to %d', len(statistics)) return result_statistics - def _statistics(self, alarm, start, end): + def _statistics(self, rule, start, end): """Retrieve statistics over the current window.""" after = dict(field='timestamp', op='ge', value=start) before = dict(field='timestamp', op='le', value=end) - query = copy.copy(alarm.rule['query']) + query = copy.copy(rule['query']) query.extend([before, after]) LOG.debug('stats query %s', query) try: return self.cm_client.statistics.list( - meter_name=alarm.rule['meter_name'], q=query, - period=alarm.rule['period']) + meter_name=rule['meter_name'], q=query, + period=rule['period']) except Exception: LOG.exception(_('alarm stats retrieval failed')) return [] - def _sufficient(self, alarm, statistics): - """Check for the sufficiency of the data for evaluation. - - Ensure there is sufficient data for evaluation, transitioning to - unknown otherwise. - """ - sufficient = len(statistics) >= alarm.rule['evaluation_periods'] - if not sufficient and alarm.state != evaluator.UNKNOWN: - LOG.warning(_LW('Expecting %(expected)d datapoints but only get ' - '%(actual)d') % { - 'expected': alarm.rule['evaluation_periods'], - 'actual': len(statistics)}) - # Reason is not same as log message because we want to keep - # consistent since thirdparty software may depend on old format. - reason = _('%d datapoints are unknown') % alarm.rule[ - 'evaluation_periods'] - last = None if not statistics else statistics[-1] - reason_data = self._reason_data('unknown', - alarm.rule['evaluation_periods'], - last) - self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data) - return sufficient - @staticmethod def _reason_data(disposition, count, most_recent): """Create a reason data dictionary for this evaluator type.""" @@ -150,7 +127,7 @@ class ThresholdEvaluator(evaluator.Evaluator): 'count': count, 'most_recent': most_recent} @classmethod - def _reason(cls, alarm, statistics, distilled, state): + def _reason(cls, alarm, statistics, state): """Fabricate reason string.""" count = len(statistics) disposition = 'inside' if state == evaluator.OK else 'outside' @@ -166,35 +143,64 @@ class ThresholdEvaluator(evaluator.Evaluator): ' %(disposition)s threshold, most recent: %(most_recent)s') % dict(reason_data, state=state)), reason_data - def _transition(self, alarm, statistics, compared): - """Transition alarm state if necessary. + def evaluate_rule(self, alarm_rule): + """Evaluate alarm rule. - The transition rules are currently hardcoded as: - - - transitioning from a known state requires an unequivocal - set of datapoints - - - transitioning from unknown is on the basis of the most - recent datapoint if equivocal - - Ultimately this will be policy-driven. + :returns: state, trending state and statistics. """ + start, end = self._bound_duration(alarm_rule) + statistics = self._statistics(alarm_rule, start, end) + statistics = self._sanitize(alarm_rule, statistics) + sufficient = len(statistics) >= alarm_rule['evaluation_periods'] + if not sufficient: + return evaluator.UNKNOWN, None, statistics + + def _compare(value): + op = COMPARATORS[alarm_rule['comparison_operator']] + limit = alarm_rule['threshold'] + LOG.debug('comparing value %(value)s against threshold' + ' %(limit)s', {'value': value, 'limit': limit}) + return op(value, limit) + + compared = list(six.moves.map(_compare, statistics)) distilled = all(compared) unequivocal = distilled or not any(compared) - unknown = alarm.state == evaluator.UNKNOWN - continuous = alarm.repeat_actions if unequivocal: state = evaluator.ALARM if distilled else evaluator.OK - reason, reason_data = self._reason(alarm, statistics, - distilled, state) - if alarm.state != state or continuous: - self._refresh(alarm, state, reason, reason_data) - elif unknown or continuous: + return state, None, statistics + else: trending_state = evaluator.ALARM if compared[-1] else evaluator.OK - state = trending_state if unknown else alarm.state - reason, reason_data = self._reason(alarm, statistics, - distilled, state) + return None, trending_state, statistics + + def _transition_alarm(self, alarm, state, trending_state, statistics): + unknown = alarm.state == evaluator.UNKNOWN + continuous = alarm.repeat_actions + + if trending_state: + if unknown or continuous: + state = trending_state if unknown else alarm.state + reason, reason_data = self._reason(alarm, statistics, state) + self._refresh(alarm, state, reason, reason_data) + return + + if state == evaluator.UNKNOWN and not unknown: + LOG.warn(_LW('Expecting %(expected)d datapoints but only get ' + '%(actual)d') % { + 'expected': alarm.rule['evaluation_periods'], + 'actual': len(statistics)}) + # Reason is not same as log message because we want to keep + # consistent since thirdparty software may depend on old format. + reason = _('%d datapoints are unknown') % alarm.rule[ + 'evaluation_periods'] + last = None if not statistics else statistics[-1] + reason_data = self._reason_data('unknown', + alarm.rule['evaluation_periods'], + last) + self._refresh(alarm, state, reason, reason_data) + + elif state and (alarm.state != state or continuous): + reason, reason_data = self._reason(alarm, statistics, state) self._refresh(alarm, state, reason, reason_data) def evaluate(self, alarm): @@ -203,18 +209,5 @@ class ThresholdEvaluator(evaluator.Evaluator): 'within its time constraint.', alarm.alarm_id) return - start, end = self._bound_duration(alarm) - statistics = self._statistics(alarm, start, end) - statistics = self._sanitize(alarm, statistics) - - if self._sufficient(alarm, statistics): - def _compare(value): - op = COMPARATORS[alarm.rule['comparison_operator']] - limit = alarm.rule['threshold'] - LOG.debug('comparing value %(value)s against threshold' - ' %(limit)s', {'value': value, 'limit': limit}) - return op(value, limit) - - self._transition(alarm, - statistics, - list(six.moves.map(_compare, statistics))) + state, trending_state, statistics = self.evaluate_rule(alarm.rule) + self._transition_alarm(alarm, state, trending_state, statistics) diff --git a/aodh/tests/unit/evaluator/test_gnocchi.py b/aodh/tests/unit/evaluator/test_gnocchi.py index 10f29f6ee..de23d917a 100644 --- a/aodh/tests/unit/evaluator/test_gnocchi.py +++ b/aodh/tests/unit/evaluator/test_gnocchi.py @@ -31,17 +31,12 @@ from aodh.tests import constants from aodh.tests.unit.evaluator import base -class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): - EVALUATOR = gnocchi.GnocchiThresholdEvaluator - +class TestGnocchiEvaluatorBase(base.TestEvaluatorBase): def setUp(self): self.client = self.useFixture(mockpatch.Patch( 'aodh.evaluator.gnocchi.client' )).mock.Client.return_value - super(TestGnocchiThresholdEvaluate, self).setUp() - - def prepare_alarms(self): - self.alarms = [ + self.prepared_alarms = [ models.Alarm(name='instance_running_hot', description='instance_running_hot', type='gnocchi_resources_threshold', @@ -115,10 +110,11 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): metric='cpu_util', resource_type='instance', query='{"=": {"server_group": ' - '"my_autoscaling_group"}}') + '"my_autoscaling_group"}}') ), ] + super(TestGnocchiEvaluatorBase, self).setUp() @staticmethod def _get_stats(granularity, values): @@ -135,29 +131,14 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): for alarm in self.alarms: alarm.rule[field] = value - def test_retry_transient_api_failure(self): - means = self._get_stats(60, [self.alarms[0].rule['threshold'] - v - for v in moves.xrange(5)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] + v - for v in moves.xrange(4)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] - v - for v in moves.xrange(6)]) - self.client.metric.get_measures.side_effect = [ - exceptions.ClientException(501, "error2"), - means] - self.client.metric.aggregation.side_effect = [ - Exception('boom'), - exceptions.ClientException(500, "error"), - maxs, avgs2] + def _test_retry_transient(self): self._evaluate_all_alarms() self._assert_all_alarms('insufficient data') self._evaluate_all_alarms() self._assert_all_alarms('ok') - def test_simple_insufficient(self): + def _test_simple_insufficient(self): self._set_all_alarms('ok') - self.client.metric.get_measures.return_value = [] - self.client.metric.aggregation.return_value = [] self._evaluate_all_alarms() self._assert_all_alarms('insufficient data') expected = [mock.call(alarm) for alarm in self.alarms] @@ -167,227 +148,125 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): alarm, 'ok', ('%d datapoints are unknown' - % alarm.rule['evaluation_periods']), + % alarm.rule['evaluation_periods']), self._reason_data('unknown', alarm.rule['evaluation_periods'], None)) for alarm in self.alarms] self.assertEqual(expected, self.notifier.notify.call_args_list) + +class TestGnocchiResourceThresholdEvaluate(TestGnocchiEvaluatorBase): + EVALUATOR = gnocchi.GnocchiResourceThresholdEvaluator + + def prepare_alarms(self): + self.alarms = self.prepared_alarms[0:1] + + def test_retry_transient_api_failure(self): + means = self._get_stats(60, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(5)]) + self.client.metric.get_measures.side_effect = [ + exceptions.ClientException(501, "error2"), means] + self._test_retry_transient() + + def test_simple_insufficient(self): + self.client.metric.get_measures.return_value = [] + self._test_simple_insufficient() + @mock.patch.object(timeutils, 'utcnow') def test_simple_alarm_trip(self, utcnow): utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0) self._set_all_alarms('ok') avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v for v in moves.xrange(1, 6)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(4)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(1, 7)]) - self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] self._evaluate_all_alarms() - - start_alarm1 = "2015-01-26T12:51:00" - start_alarm2 = "2015-01-26T12:32:00" - start_alarm3 = "2015-01-26T12:51:10" + start_alarm = "2015-01-26T12:51:00" end = "2015-01-26T12:57:00" - self.assertEqual([ - mock.call.get_measures(aggregation='mean', metric='cpu_util', - resource_id='my_instance', - start=start_alarm1, stop=end), - mock.call.aggregation(aggregation='max', - metrics=[ - '0bb1604d-1193-4c0a-b4b8-74b170e35e83', - '9ddc209f-42f8-41e1-b8f1-8804f59c4053'], - start=start_alarm2, stop=end), - mock.call.aggregation(aggregation='mean', metrics='cpu_util', - needed_overlap=0, - query={"=": {"server_group": - "my_autoscaling_group"}}, - resource_type='instance', - start=start_alarm3, stop=end), - ], self.client.metric.mock_calls) + self.assertEqual( + [mock.call.get_measures(aggregation='mean', metric='cpu_util', + resource_id='my_instance', + start=start_alarm, stop=end)], + self.client.metric.mock_calls) - self._assert_all_alarms('alarm') - expected = [mock.call(alarm) for alarm in self.alarms] - update_calls = self.storage_conn.update_alarm.call_args_list - self.assertEqual(expected, update_calls) - reasons = ['Transition to alarm due to 5 samples outside' - ' threshold, most recent: %s' % avgs[-1][2], - 'Transition to alarm due to 4 samples outside' - ' threshold, most recent: %s' % maxs[-1][2], - 'Transition to alarm due to 6 samples outside' - ' threshold, most recent: %s' % avgs2[-1][2], - ] - reason_datas = [self._reason_data('outside', 5, avgs[-1][2]), - self._reason_data('outside', 4, maxs[-1][2]), - self._reason_data('outside', 6, avgs2[-1][2])] - expected = [mock.call(alarm, 'ok', reason, reason_data) - for alarm, reason, reason_data - in zip(self.alarms, reasons, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) + reason = ('Transition to alarm due to 5 samples outside threshold,' + ' most recent: %s' % avgs[-1][2]) + reason_data = self._reason_data('outside', 5, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'ok', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) def test_simple_alarm_clear(self): self._set_all_alarms('alarm') avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] - v for v in moves.xrange(5)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] + v - for v in moves.xrange(1, 5)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] - v - for v in moves.xrange(6)]) + self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] + self._evaluate_all_alarms() self._assert_all_alarms('ok') expected = [mock.call(alarm) for alarm in self.alarms] update_calls = self.storage_conn.update_alarm.call_args_list self.assertEqual(expected, update_calls) - reasons = ['Transition to ok due to 5 samples inside' - ' threshold, most recent: %s' % avgs[-1][2], - 'Transition to ok due to 4 samples inside' - ' threshold, most recent: %s' % maxs[-1][2], - 'Transition to ok due to 6 samples inside' - ' threshold, most recent: %s' % avgs2[-1][2]] - reason_datas = [self._reason_data('inside', 5, avgs[-1][2]), - self._reason_data('inside', 4, maxs[-1][2]), - self._reason_data('inside', 6, avgs2[-1][2])] - expected = [mock.call(alarm, 'alarm', reason, reason_data) - for alarm, reason, reason_data - in zip(self.alarms, reasons, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) + + reason = ('Transition to ok due to 5 samples inside' + ' threshold, most recent: %s' % avgs[-1][2]) + + reason_data = self._reason_data('inside', 5, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'alarm', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) def test_equivocal_from_known_state_ok(self): self._set_all_alarms('ok') avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v for v in moves.xrange(5)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(-1, 3)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(6)]) self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] + self._evaluate_all_alarms() self._assert_all_alarms('ok') - self.assertEqual( - [], - self.storage_conn.update_alarm.call_args_list) + self.assertEqual([], + self.storage_conn.update_alarm.call_args_list) self.assertEqual([], self.notifier.notify.call_args_list) - def test_equivocal_ok_to_alarm(self): - self.alarms = [self.alarms[1]] - self._set_all_alarms('ok') - # NOTE(sileht): we add one useless point (81.0) that will break - # the test if the evaluator doesn't remove it. - maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v - for v in moves.xrange(-1, 5)]) - self.client.metric.aggregation.side_effect = [maxs] - self._evaluate_all_alarms() - self._assert_all_alarms('alarm') - - def test_equivocal_from_known_state_and_repeat_actions(self): - self._set_all_alarms('ok') - self.alarms[1].repeat_actions = True - avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v - for v in moves.xrange(5)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(-1, 3)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(6)]) - self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] - self._evaluate_all_alarms() - self._assert_all_alarms('ok') - self.assertEqual([], self.storage_conn.update_alarm.call_args_list) - reason = ('Remaining as ok due to 4 samples inside' - ' threshold, most recent: 8.0') - reason_datas = self._reason_data('inside', 4, 8.0) - expected = [mock.call(self.alarms[1], 'ok', reason, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) - - def test_unequivocal_from_known_state_and_repeat_actions(self): - self._set_all_alarms('alarm') - self.alarms[1].repeat_actions = True - avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v - for v in moves.xrange(1, 6)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(4)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(6)]) - self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] - self._evaluate_all_alarms() - self._assert_all_alarms('alarm') - self.assertEqual([], self.storage_conn.update_alarm.call_args_list) - reason = ('Remaining as alarm due to 4 samples outside' - ' threshold, most recent: 7.0') - reason_datas = self._reason_data('outside', 4, 7.0) - expected = [mock.call(self.alarms[1], 'alarm', - reason, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) - def test_state_change_and_repeat_actions(self): self._set_all_alarms('ok') self.alarms[0].repeat_actions = True - self.alarms[1].repeat_actions = True avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v for v in moves.xrange(1, 6)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(4)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(1, 7)]) + self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] + self._evaluate_all_alarms() self._assert_all_alarms('alarm') expected = [mock.call(alarm) for alarm in self.alarms] update_calls = self.storage_conn.update_alarm.call_args_list self.assertEqual(expected, update_calls) - reasons = ['Transition to alarm due to 5 samples outside' - ' threshold, most recent: %s' % avgs[-1][2], - 'Transition to alarm due to 4 samples outside' - ' threshold, most recent: %s' % maxs[-1][2], - 'Transition to alarm due to 6 samples outside' - ' threshold, most recent: %s' % avgs2[-1][2]] - reason_datas = [self._reason_data('outside', 5, avgs[-1][2]), - self._reason_data('outside', 4, maxs[-1][2]), - self._reason_data('outside', 6, avgs2[-1][2])] - expected = [mock.call(alarm, 'ok', reason, reason_data) - for alarm, reason, reason_data - in zip(self.alarms, reasons, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) + + reason = ('Transition to alarm due to 5 samples outside ' + 'threshold, most recent: %s' % avgs[-1][2]) + reason_data = self._reason_data('outside', 5, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'ok', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) def test_equivocal_from_unknown(self): self._set_all_alarms('insufficient data') avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v for v in moves.xrange(1, 6)]) - maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v - for v in moves.xrange(4)]) - avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v - for v in moves.xrange(1, 7)]) + self.client.metric.get_measures.side_effect = [avgs] - self.client.metric.aggregation.side_effect = [maxs, avgs2] + self._evaluate_all_alarms() self._assert_all_alarms('alarm') expected = [mock.call(alarm) for alarm in self.alarms] update_calls = self.storage_conn.update_alarm.call_args_list self.assertEqual(expected, update_calls) - reasons = ['Transition to alarm due to 5 samples outside' - ' threshold, most recent: %s' % avgs[-1][2], - 'Transition to alarm due to 4 samples outside' - ' threshold, most recent: %s' % maxs[-1][2], - 'Transition to alarm due to 6 samples outside' - ' threshold, most recent: %s' % avgs2[-1][2]] - reason_datas = [self._reason_data('outside', 5, avgs[-1][2]), - self._reason_data('outside', 4, maxs[-1][2]), - self._reason_data('outside', 6, avgs2[-1][2])] - expected = [mock.call(alarm, 'insufficient data', - reason, reason_data) - for alarm, reason, reason_data - in zip(self.alarms, reasons, reason_datas)] - self.assertEqual(expected, self.notifier.notify.call_args_list) + + reason = ('Transition to alarm due to 5 samples outside' + ' threshold, most recent: %s' % avgs[-1][2]) + reason_data = self._reason_data('outside', 5, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'insufficient data', + reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) @unittest.skipIf(six.PY3, "the aodh base class is not python 3 ready") @@ -401,13 +280,10 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): 'duration': 10800, # 3 hours 'timezone': 'Europe/Ljubljana'} ] - self.alarms[1].time_constraints = self.alarms[0].time_constraints - self.alarms[2].time_constraints = self.alarms[0].time_constraints dt = datetime.datetime(2014, 1, 1, 15, 0, 0, tzinfo=pytz.timezone('Europe/Ljubljana')) mock_utcnow.return_value = dt.astimezone(pytz.UTC) self.client.metric.get_measures.return_value = [] - self.client.metric.aggregation.return_value = [] self._evaluate_all_alarms() self._assert_all_alarms('ok') update_calls = self.storage_conn.update_alarm.call_args_list @@ -415,3 +291,197 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase): "Alarm should not change state if the current " " time is outside its time constraint.") self.assertEqual([], self.notifier.notify.call_args_list) + + +class TestGnocchiAggregationMetricsThresholdEvaluate(TestGnocchiEvaluatorBase): + EVALUATOR = gnocchi.GnocchiAggregationMetricsThresholdEvaluator + + def prepare_alarms(self): + self.alarms = self.prepared_alarms[1:2] + + def test_retry_transient_api_failure(self): + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] + v + for v in moves.xrange(4)]) + self.client.metric.aggregation.side_effect = [Exception('boom'), maxs] + self._test_retry_transient() + + def test_simple_insufficient(self): + self.client.metric.aggregation.return_value = [] + self._test_simple_insufficient() + + @mock.patch.object(timeutils, 'utcnow') + def test_simple_alarm_trip(self, utcnow): + utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0) + self._set_all_alarms('ok') + + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(4)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + start_alarm = "2015-01-26T12:32:00" + end = "2015-01-26T12:57:00" + + self.assertEqual( + [mock.call.aggregation(aggregation='max', + metrics=[ + '0bb1604d-1193-4c0a-b4b8-74b170e35e83', + '9ddc209f-42f8-41e1-b8f1-8804f59c4053'], + start=start_alarm, stop=end)], + self.client.metric.mock_calls) + self._assert_all_alarms('alarm') + expected = [mock.call(alarm) for alarm in self.alarms] + update_calls = self.storage_conn.update_alarm.call_args_list + self.assertEqual(expected, update_calls) + reason = ('Transition to alarm due to 4 samples outside ' + 'threshold, most recent: %s' % maxs[-1][2]) + + reason_data = self._reason_data('outside', 4, maxs[-1][2]) + expected = mock.call(self.alarms[0], 'ok', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) + + def test_simple_alarm_clear(self): + self._set_all_alarms('alarm') + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] + v + for v in moves.xrange(1, 5)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + self._assert_all_alarms('ok') + expected = [mock.call(alarm) for alarm in self.alarms] + update_calls = self.storage_conn.update_alarm.call_args_list + self.assertEqual(expected, update_calls) + reason = ('Transition to ok due to 4 samples inside ' + 'threshold, most recent: %s' % maxs[-1][2]) + reason_data = self._reason_data('inside', 4, maxs[-1][2]) + expected = mock.call(self.alarms[0], 'alarm', reason, reason_data) + + self.assertEqual(expected, self.notifier.notify.call_args) + + def test_equivocal_from_known_state_ok(self): + self._set_all_alarms('ok') + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(-1, 3)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + self._assert_all_alarms('ok') + self.assertEqual( + [], + self.storage_conn.update_alarm.call_args_list) + self.assertEqual([], self.notifier.notify.call_args_list) + + def test_equivocal_ok_to_alarm(self): + self._set_all_alarms('ok') + # NOTE(sileht): we add one useless point (81.0) that will break + # the test if the evaluator doesn't remove it. + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(-1, 5)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + self._assert_all_alarms('alarm') + + def test_equivocal_from_known_state_and_repeat_actions(self): + self._set_all_alarms('ok') + self.alarms[0].repeat_actions = True + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(-1, 3)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + self._assert_all_alarms('ok') + self.assertEqual([], self.storage_conn.update_alarm.call_args_list) + reason = ('Remaining as ok due to 4 samples inside' + ' threshold, most recent: 8.0') + reason_datas = self._reason_data('inside', 4, 8.0) + expected = [mock.call(self.alarms[0], 'ok', reason, reason_datas)] + self.assertEqual(expected, self.notifier.notify.call_args_list) + + def test_unequivocal_from_known_state_and_repeat_actions(self): + self._set_all_alarms('alarm') + self.alarms[0].repeat_actions = True + + maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(4)]) + self.client.metric.aggregation.side_effect = [maxs] + self._evaluate_all_alarms() + self._assert_all_alarms('alarm') + self.assertEqual([], self.storage_conn.update_alarm.call_args_list) + reason = ('Remaining as alarm due to 4 samples outside' + ' threshold, most recent: 7.0') + reason_datas = self._reason_data('outside', 4, 7.0) + expected = [mock.call(self.alarms[0], 'alarm', + reason, reason_datas)] + self.assertEqual(expected, self.notifier.notify.call_args_list) + + +class TestGnocchiAggregationResourcesThresholdEvaluate( + TestGnocchiEvaluatorBase): + EVALUATOR = gnocchi.GnocchiAggregationResourcesThresholdEvaluator + + def prepare_alarms(self): + self.alarms = self.prepared_alarms[2:3] + + def test_retry_transient_api_failure(self): + avgs2 = self._get_stats(50, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(6)]) + self.client.metric.aggregation.side_effect = [ + exceptions.ClientException(500, "error"), avgs2] + self._test_retry_transient() + + def test_simple_insufficient(self): + self.client.metric.aggregation.return_value = [] + self._test_simple_insufficient() + + @mock.patch.object(timeutils, 'utcnow') + def test_simple_alarm_trip(self, utcnow): + utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0) + self._set_all_alarms('ok') + avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] + v + for v in moves.xrange(1, 7)]) + + self.client.metric.aggregation.side_effect = [avgs] + self._evaluate_all_alarms() + start_alarm = "2015-01-26T12:51:10" + end = "2015-01-26T12:57:00" + self.assertEqual( + [mock.call.aggregation(aggregation='mean', metrics='cpu_util', + needed_overlap=0, + query={"=": {"server_group": + "my_autoscaling_group"}}, + resource_type='instance', + start=start_alarm, stop=end)], + self.client.metric.mock_calls) + self._assert_all_alarms('alarm') + expected = [mock.call(alarm) for alarm in self.alarms] + update_calls = self.storage_conn.update_alarm.call_args_list + self.assertEqual(expected, update_calls) + reason = ('Transition to alarm due to 6 samples outside ' + 'threshold, most recent: %s' % avgs[-1][2]) + reason_data = self._reason_data('outside', 6, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'ok', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) + + def test_simple_alarm_clear(self): + self._set_all_alarms('alarm') + avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] - v + for v in moves.xrange(6)]) + self.client.metric.aggregation.side_effect = [avgs] + self._evaluate_all_alarms() + self._assert_all_alarms('ok') + expected = [mock.call(alarm) for alarm in self.alarms] + update_calls = self.storage_conn.update_alarm.call_args_list + self.assertEqual(expected, update_calls) + reason = ('Transition to ok due to 6 samples inside ' + 'threshold, most recent: %s' % avgs[-1][2]) + reason_data = self._reason_data('inside', 6, avgs[-1][2]) + expected = mock.call(self.alarms[0], 'alarm', reason, reason_data) + self.assertEqual(expected, self.notifier.notify.call_args) + + def test_equivocal_from_known_state_ok(self): + self._set_all_alarms('ok') + avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] + v + for v in moves.xrange(6)]) + self.client.metric.aggregation.side_effect = [avgs] + self._evaluate_all_alarms() + self._assert_all_alarms('ok') + self.assertEqual( + [], + self.storage_conn.update_alarm.call_args_list) + self.assertEqual([], self.notifier.notify.call_args_list) diff --git a/aodh/tests/unit/evaluator/test_threshold.py b/aodh/tests/unit/evaluator/test_threshold.py index c9ee68fc3..58c8bf70e 100644 --- a/aodh/tests/unit/evaluator/test_threshold.py +++ b/aodh/tests/unit/evaluator/test_threshold.py @@ -396,7 +396,7 @@ class TestEvaluate(base.TestEvaluatorBase): alarm.rule['exclude_outliers'] = exclude_outliers with mock.patch.object(timeutils, 'utcnow') as mock_utcnow: mock_utcnow.return_value = datetime.datetime(2012, 7, 2, 10, 45) - constraint = self.evaluator._bound_duration(alarm) + constraint = self.evaluator._bound_duration(alarm.rule) self.assertEqual((start, timeutils.utcnow().isoformat()), constraint) diff --git a/setup.cfg b/setup.cfg index 1d9ab8c29..ddb92b816 100644 --- a/setup.cfg +++ b/setup.cfg @@ -84,9 +84,9 @@ aodh.alarm.rule = aodh.evaluator = threshold = aodh.evaluator.threshold:ThresholdEvaluator combination = aodh.evaluator.combination:CombinationEvaluator - gnocchi_resources_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator - gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator - gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator + gnocchi_resources_threshold = aodh.evaluator.gnocchi:GnocchiResourceThresholdEvaluator + gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiAggregationMetricsThresholdEvaluator + gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiAggregationResourcesThresholdEvaluator aodh.notifier = log = aodh.notifier.log:LogAlarmNotifier