Merge "Refactor Gnocchi and threshold evaluators"

This commit is contained in:
Jenkins 2016-01-21 08:12:22 +00:00 committed by Gerrit Code Review
commit 015e186ea2
5 changed files with 385 additions and 310 deletions

View File

@ -32,63 +32,75 @@ OPTS = [
]
class GnocchiThresholdEvaluator(threshold.ThresholdEvaluator):
class GnocchiBase(threshold.ThresholdEvaluator):
def __init__(self, conf):
super(threshold.ThresholdEvaluator, self).__init__(conf)
super(GnocchiBase, self).__init__(conf)
self._gnocchi_client = client.Client(
'1', keystone_client.get_session(conf),
interface=conf.service_credentials.interface,
region_name=conf.service_credentials.region_name,
endpoint_override=conf.gnocchi_url)
def _statistics(self, alarm, start, end):
"""Retrieve statistics over the current window."""
try:
if alarm.type == 'gnocchi_aggregation_by_resources_threshold':
# FIXME(sileht): In case of a heat autoscaling stack decide to
# delete an instance, the gnocchi metrics associated to this
# instance will be no more updated and when the alarm will ask
# for the aggregation, gnocchi will raise a 'No overlap'
# exception.
# So temporary set 'needed_overlap' to 0 to disable the
# gnocchi checks about missing points. For more detail see:
# https://bugs.launchpad.net/gnocchi/+bug/1479429
return self._gnocchi_client.metric.aggregation(
metrics=alarm.rule['metric'],
query=jsonutils.loads(alarm.rule['query']),
resource_type=alarm.rule["resource_type"],
start=start, stop=end,
aggregation=alarm.rule['aggregation_method'],
needed_overlap=0,
)
elif alarm.type == 'gnocchi_aggregation_by_metrics_threshold':
return self._gnocchi_client.metric.aggregation(
metrics=alarm.rule['metrics'],
start=start, stop=end,
aggregation=alarm.rule['aggregation_method'])
elif alarm.type == 'gnocchi_resources_threshold':
return self._gnocchi_client.metric.get_measures(
metric=alarm.rule['metric'],
start=start, stop=end,
resource_id=alarm.rule['resource_id'],
aggregation=alarm.rule['aggregation_method'])
except Exception:
LOG.exception(_('alarm stats retrieval failed'))
return []
@staticmethod
def _sanitize(alarm, statistics):
def _sanitize(rule, statistics):
"""Return the datapoints that correspond to the alarm granularity"""
# TODO(sileht): if there's no direct match, but there is an archive
# policy with granularity that's an even divisor or the period,
# we could potentially do a mean-of-means (or max-of-maxes or whatever,
# but not a stddev-of-stddevs).
# TODO(sileht): support alarm['exclude_outliers']
LOG.error('sanitize (%s) stats %s', alarm.rule['granularity'],
statistics)
LOG.debug('sanitize stats %s', statistics)
statistics = [stats[2] for stats in statistics
if stats[1] == alarm.rule['granularity']]
statistics = statistics[-alarm.rule['evaluation_periods']:]
LOG.error('pruned statistics to %d', len(statistics))
if stats[1] == rule['granularity']]
statistics = statistics[-rule['evaluation_periods']:]
LOG.debug('pruned statistics to %d', len(statistics))
return statistics
class GnocchiResourceThresholdEvaluator(GnocchiBase):
def _statistics(self, rule, start, end):
try:
return self._gnocchi_client.metric.get_measures(
metric=rule['metric'],
start=start, stop=end,
resource_id=rule['resource_id'],
aggregation=rule['aggregation_method'])
except Exception:
LOG.exception(_('alarm stats retrieval failed'))
return []
class GnocchiAggregationMetricsThresholdEvaluator(GnocchiBase):
def _statistics(self, rule, start, end):
try:
return self._gnocchi_client.metric.aggregation(
metrics=rule['metrics'],
start=start, stop=end,
aggregation=rule['aggregation_method'])
except Exception:
LOG.exception(_('alarm stats retrieval failed'))
return []
class GnocchiAggregationResourcesThresholdEvaluator(GnocchiBase):
def _statistics(self, rule, start, end):
# FIXME(sileht): In case of a heat autoscaling stack decide to
# delete an instance, the gnocchi metrics associated to this
# instance will be no more updated and when the alarm will ask
# for the aggregation, gnocchi will raise a 'No overlap'
# exception.
# So temporary set 'needed_overlap' to 0 to disable the
# gnocchi checks about missing points. For more detail see:
# https://bugs.launchpad.net/gnocchi/+bug/1479429
try:
return self._gnocchi_client.metric.aggregation(
metrics=rule['metric'],
query=jsonutils.loads(rule['query']),
resource_type=rule["resource_type"],
start=start, stop=end,
aggregation=rule['aggregation_method'],
needed_overlap=0,
)
except Exception:
LOG.exception(_('alarm stats retrieval failed'))
return []

View File

@ -64,26 +64,26 @@ class ThresholdEvaluator(evaluator.Evaluator):
return self._cm_client
@classmethod
def _bound_duration(cls, alarm):
def _bound_duration(cls, rule):
"""Bound the duration of the statistics query."""
now = timeutils.utcnow()
# when exclusion of weak datapoints is enabled, we extend
# the look-back period so as to allow a clearer sample count
# trend to be established
look_back = (cls.look_back if not alarm.rule.get('exclude_outliers')
else alarm.rule['evaluation_periods'])
window = ((alarm.rule.get('period', None) or alarm.rule['granularity'])
* (alarm.rule['evaluation_periods'] + look_back))
look_back = (cls.look_back if not rule.get('exclude_outliers')
else rule['evaluation_periods'])
window = ((rule.get('period', None) or rule['granularity'])
* (rule['evaluation_periods'] + look_back))
start = now - datetime.timedelta(seconds=window)
LOG.debug('query stats from %(start)s to '
'%(now)s', {'start': start, 'now': now})
return start.isoformat(), now.isoformat()
@staticmethod
def _sanitize(alarm, statistics):
def _sanitize(rule, statistics):
"""Sanitize statistics."""
LOG.debug('sanitize stats %s', statistics)
if alarm.rule.get('exclude_outliers'):
if rule.get('exclude_outliers'):
key = operator.attrgetter('count')
mean = utils.mean(statistics, key)
stddev = utils.stddev(statistics, key, mean)
@ -99,50 +99,27 @@ class ThresholdEvaluator(evaluator.Evaluator):
# in practice statistics are always sorted by period start, not
# strictly required by the API though
statistics = statistics[-alarm.rule['evaluation_periods']:]
result_statistics = [getattr(stat, alarm.rule['statistic'])
statistics = statistics[-rule['evaluation_periods']:]
result_statistics = [getattr(stat, rule['statistic'])
for stat in statistics]
LOG.debug('pruned statistics to %d', len(statistics))
return result_statistics
def _statistics(self, alarm, start, end):
def _statistics(self, rule, start, end):
"""Retrieve statistics over the current window."""
after = dict(field='timestamp', op='ge', value=start)
before = dict(field='timestamp', op='le', value=end)
query = copy.copy(alarm.rule['query'])
query = copy.copy(rule['query'])
query.extend([before, after])
LOG.debug('stats query %s', query)
try:
return self.cm_client.statistics.list(
meter_name=alarm.rule['meter_name'], q=query,
period=alarm.rule['period'])
meter_name=rule['meter_name'], q=query,
period=rule['period'])
except Exception:
LOG.exception(_('alarm stats retrieval failed'))
return []
def _sufficient(self, alarm, statistics):
"""Check for the sufficiency of the data for evaluation.
Ensure there is sufficient data for evaluation, transitioning to
unknown otherwise.
"""
sufficient = len(statistics) >= alarm.rule['evaluation_periods']
if not sufficient and alarm.state != evaluator.UNKNOWN:
LOG.warning(_LW('Expecting %(expected)d datapoints but only get '
'%(actual)d') % {
'expected': alarm.rule['evaluation_periods'],
'actual': len(statistics)})
# Reason is not same as log message because we want to keep
# consistent since thirdparty software may depend on old format.
reason = _('%d datapoints are unknown') % alarm.rule[
'evaluation_periods']
last = None if not statistics else statistics[-1]
reason_data = self._reason_data('unknown',
alarm.rule['evaluation_periods'],
last)
self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
return sufficient
@staticmethod
def _reason_data(disposition, count, most_recent):
"""Create a reason data dictionary for this evaluator type."""
@ -150,7 +127,7 @@ class ThresholdEvaluator(evaluator.Evaluator):
'count': count, 'most_recent': most_recent}
@classmethod
def _reason(cls, alarm, statistics, distilled, state):
def _reason(cls, alarm, statistics, state):
"""Fabricate reason string."""
count = len(statistics)
disposition = 'inside' if state == evaluator.OK else 'outside'
@ -166,35 +143,64 @@ class ThresholdEvaluator(evaluator.Evaluator):
' %(disposition)s threshold, most recent: %(most_recent)s')
% dict(reason_data, state=state)), reason_data
def _transition(self, alarm, statistics, compared):
"""Transition alarm state if necessary.
def evaluate_rule(self, alarm_rule):
"""Evaluate alarm rule.
The transition rules are currently hardcoded as:
- transitioning from a known state requires an unequivocal
set of datapoints
- transitioning from unknown is on the basis of the most
recent datapoint if equivocal
Ultimately this will be policy-driven.
:returns: state, trending state and statistics.
"""
start, end = self._bound_duration(alarm_rule)
statistics = self._statistics(alarm_rule, start, end)
statistics = self._sanitize(alarm_rule, statistics)
sufficient = len(statistics) >= alarm_rule['evaluation_periods']
if not sufficient:
return evaluator.UNKNOWN, None, statistics
def _compare(value):
op = COMPARATORS[alarm_rule['comparison_operator']]
limit = alarm_rule['threshold']
LOG.debug('comparing value %(value)s against threshold'
' %(limit)s', {'value': value, 'limit': limit})
return op(value, limit)
compared = list(six.moves.map(_compare, statistics))
distilled = all(compared)
unequivocal = distilled or not any(compared)
unknown = alarm.state == evaluator.UNKNOWN
continuous = alarm.repeat_actions
if unequivocal:
state = evaluator.ALARM if distilled else evaluator.OK
reason, reason_data = self._reason(alarm, statistics,
distilled, state)
if alarm.state != state or continuous:
self._refresh(alarm, state, reason, reason_data)
elif unknown or continuous:
return state, None, statistics
else:
trending_state = evaluator.ALARM if compared[-1] else evaluator.OK
state = trending_state if unknown else alarm.state
reason, reason_data = self._reason(alarm, statistics,
distilled, state)
return None, trending_state, statistics
def _transition_alarm(self, alarm, state, trending_state, statistics):
unknown = alarm.state == evaluator.UNKNOWN
continuous = alarm.repeat_actions
if trending_state:
if unknown or continuous:
state = trending_state if unknown else alarm.state
reason, reason_data = self._reason(alarm, statistics, state)
self._refresh(alarm, state, reason, reason_data)
return
if state == evaluator.UNKNOWN and not unknown:
LOG.warn(_LW('Expecting %(expected)d datapoints but only get '
'%(actual)d') % {
'expected': alarm.rule['evaluation_periods'],
'actual': len(statistics)})
# Reason is not same as log message because we want to keep
# consistent since thirdparty software may depend on old format.
reason = _('%d datapoints are unknown') % alarm.rule[
'evaluation_periods']
last = None if not statistics else statistics[-1]
reason_data = self._reason_data('unknown',
alarm.rule['evaluation_periods'],
last)
self._refresh(alarm, state, reason, reason_data)
elif state and (alarm.state != state or continuous):
reason, reason_data = self._reason(alarm, statistics, state)
self._refresh(alarm, state, reason, reason_data)
def evaluate(self, alarm):
@ -203,18 +209,5 @@ class ThresholdEvaluator(evaluator.Evaluator):
'within its time constraint.', alarm.alarm_id)
return
start, end = self._bound_duration(alarm)
statistics = self._statistics(alarm, start, end)
statistics = self._sanitize(alarm, statistics)
if self._sufficient(alarm, statistics):
def _compare(value):
op = COMPARATORS[alarm.rule['comparison_operator']]
limit = alarm.rule['threshold']
LOG.debug('comparing value %(value)s against threshold'
' %(limit)s', {'value': value, 'limit': limit})
return op(value, limit)
self._transition(alarm,
statistics,
list(six.moves.map(_compare, statistics)))
state, trending_state, statistics = self.evaluate_rule(alarm.rule)
self._transition_alarm(alarm, state, trending_state, statistics)

View File

@ -31,17 +31,12 @@ from aodh.tests import constants
from aodh.tests.unit.evaluator import base
class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
EVALUATOR = gnocchi.GnocchiThresholdEvaluator
class TestGnocchiEvaluatorBase(base.TestEvaluatorBase):
def setUp(self):
self.client = self.useFixture(mockpatch.Patch(
'aodh.evaluator.gnocchi.client'
)).mock.Client.return_value
super(TestGnocchiThresholdEvaluate, self).setUp()
def prepare_alarms(self):
self.alarms = [
self.prepared_alarms = [
models.Alarm(name='instance_running_hot',
description='instance_running_hot',
type='gnocchi_resources_threshold',
@ -115,10 +110,11 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
metric='cpu_util',
resource_type='instance',
query='{"=": {"server_group": '
'"my_autoscaling_group"}}')
'"my_autoscaling_group"}}')
),
]
super(TestGnocchiEvaluatorBase, self).setUp()
@staticmethod
def _get_stats(granularity, values):
@ -135,29 +131,14 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
for alarm in self.alarms:
alarm.rule[field] = value
def test_retry_transient_api_failure(self):
means = self._get_stats(60, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(5)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] + v
for v in moves.xrange(4)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] - v
for v in moves.xrange(6)])
self.client.metric.get_measures.side_effect = [
exceptions.ClientException(501, "error2"),
means]
self.client.metric.aggregation.side_effect = [
Exception('boom'),
exceptions.ClientException(500, "error"),
maxs, avgs2]
def _test_retry_transient(self):
self._evaluate_all_alarms()
self._assert_all_alarms('insufficient data')
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
def test_simple_insufficient(self):
def _test_simple_insufficient(self):
self._set_all_alarms('ok')
self.client.metric.get_measures.return_value = []
self.client.metric.aggregation.return_value = []
self._evaluate_all_alarms()
self._assert_all_alarms('insufficient data')
expected = [mock.call(alarm) for alarm in self.alarms]
@ -167,227 +148,125 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
alarm,
'ok',
('%d datapoints are unknown'
% alarm.rule['evaluation_periods']),
% alarm.rule['evaluation_periods']),
self._reason_data('unknown',
alarm.rule['evaluation_periods'],
None))
for alarm in self.alarms]
self.assertEqual(expected, self.notifier.notify.call_args_list)
class TestGnocchiResourceThresholdEvaluate(TestGnocchiEvaluatorBase):
EVALUATOR = gnocchi.GnocchiResourceThresholdEvaluator
def prepare_alarms(self):
self.alarms = self.prepared_alarms[0:1]
def test_retry_transient_api_failure(self):
means = self._get_stats(60, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(5)])
self.client.metric.get_measures.side_effect = [
exceptions.ClientException(501, "error2"), means]
self._test_retry_transient()
def test_simple_insufficient(self):
self.client.metric.get_measures.return_value = []
self._test_simple_insufficient()
@mock.patch.object(timeutils, 'utcnow')
def test_simple_alarm_trip(self, utcnow):
utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0)
self._set_all_alarms('ok')
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 6)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(4)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(1, 7)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
start_alarm1 = "2015-01-26T12:51:00"
start_alarm2 = "2015-01-26T12:32:00"
start_alarm3 = "2015-01-26T12:51:10"
start_alarm = "2015-01-26T12:51:00"
end = "2015-01-26T12:57:00"
self.assertEqual([
mock.call.get_measures(aggregation='mean', metric='cpu_util',
resource_id='my_instance',
start=start_alarm1, stop=end),
mock.call.aggregation(aggregation='max',
metrics=[
'0bb1604d-1193-4c0a-b4b8-74b170e35e83',
'9ddc209f-42f8-41e1-b8f1-8804f59c4053'],
start=start_alarm2, stop=end),
mock.call.aggregation(aggregation='mean', metrics='cpu_util',
needed_overlap=0,
query={"=": {"server_group":
"my_autoscaling_group"}},
resource_type='instance',
start=start_alarm3, stop=end),
], self.client.metric.mock_calls)
self.assertEqual(
[mock.call.get_measures(aggregation='mean', metric='cpu_util',
resource_id='my_instance',
start=start_alarm, stop=end)],
self.client.metric.mock_calls)
self._assert_all_alarms('alarm')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reasons = ['Transition to alarm due to 5 samples outside'
' threshold, most recent: %s' % avgs[-1][2],
'Transition to alarm due to 4 samples outside'
' threshold, most recent: %s' % maxs[-1][2],
'Transition to alarm due to 6 samples outside'
' threshold, most recent: %s' % avgs2[-1][2],
]
reason_datas = [self._reason_data('outside', 5, avgs[-1][2]),
self._reason_data('outside', 4, maxs[-1][2]),
self._reason_data('outside', 6, avgs2[-1][2])]
expected = [mock.call(alarm, 'ok', reason, reason_data)
for alarm, reason, reason_data
in zip(self.alarms, reasons, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
reason = ('Transition to alarm due to 5 samples outside threshold,'
' most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('outside', 5, avgs[-1][2])
expected = mock.call(self.alarms[0], 'ok', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_simple_alarm_clear(self):
self._set_all_alarms('alarm')
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(5)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] + v
for v in moves.xrange(1, 5)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] - v
for v in moves.xrange(6)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reasons = ['Transition to ok due to 5 samples inside'
' threshold, most recent: %s' % avgs[-1][2],
'Transition to ok due to 4 samples inside'
' threshold, most recent: %s' % maxs[-1][2],
'Transition to ok due to 6 samples inside'
' threshold, most recent: %s' % avgs2[-1][2]]
reason_datas = [self._reason_data('inside', 5, avgs[-1][2]),
self._reason_data('inside', 4, maxs[-1][2]),
self._reason_data('inside', 6, avgs2[-1][2])]
expected = [mock.call(alarm, 'alarm', reason, reason_data)
for alarm, reason, reason_data
in zip(self.alarms, reasons, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
reason = ('Transition to ok due to 5 samples inside'
' threshold, most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('inside', 5, avgs[-1][2])
expected = mock.call(self.alarms[0], 'alarm', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_equivocal_from_known_state_ok(self):
self._set_all_alarms('ok')
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(5)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(-1, 3)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(6)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
self.assertEqual(
[],
self.storage_conn.update_alarm.call_args_list)
self.assertEqual([],
self.storage_conn.update_alarm.call_args_list)
self.assertEqual([], self.notifier.notify.call_args_list)
def test_equivocal_ok_to_alarm(self):
self.alarms = [self.alarms[1]]
self._set_all_alarms('ok')
# NOTE(sileht): we add one useless point (81.0) that will break
# the test if the evaluator doesn't remove it.
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(-1, 5)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
def test_equivocal_from_known_state_and_repeat_actions(self):
self._set_all_alarms('ok')
self.alarms[1].repeat_actions = True
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(5)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(-1, 3)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(6)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
self.assertEqual([], self.storage_conn.update_alarm.call_args_list)
reason = ('Remaining as ok due to 4 samples inside'
' threshold, most recent: 8.0')
reason_datas = self._reason_data('inside', 4, 8.0)
expected = [mock.call(self.alarms[1], 'ok', reason, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
def test_unequivocal_from_known_state_and_repeat_actions(self):
self._set_all_alarms('alarm')
self.alarms[1].repeat_actions = True
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 6)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(4)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(6)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
self.assertEqual([], self.storage_conn.update_alarm.call_args_list)
reason = ('Remaining as alarm due to 4 samples outside'
' threshold, most recent: 7.0')
reason_datas = self._reason_data('outside', 4, 7.0)
expected = [mock.call(self.alarms[1], 'alarm',
reason, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
def test_state_change_and_repeat_actions(self):
self._set_all_alarms('ok')
self.alarms[0].repeat_actions = True
self.alarms[1].repeat_actions = True
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 6)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(4)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(1, 7)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reasons = ['Transition to alarm due to 5 samples outside'
' threshold, most recent: %s' % avgs[-1][2],
'Transition to alarm due to 4 samples outside'
' threshold, most recent: %s' % maxs[-1][2],
'Transition to alarm due to 6 samples outside'
' threshold, most recent: %s' % avgs2[-1][2]]
reason_datas = [self._reason_data('outside', 5, avgs[-1][2]),
self._reason_data('outside', 4, maxs[-1][2]),
self._reason_data('outside', 6, avgs2[-1][2])]
expected = [mock.call(alarm, 'ok', reason, reason_data)
for alarm, reason, reason_data
in zip(self.alarms, reasons, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
reason = ('Transition to alarm due to 5 samples outside '
'threshold, most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('outside', 5, avgs[-1][2])
expected = mock.call(self.alarms[0], 'ok', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_equivocal_from_unknown(self):
self._set_all_alarms('insufficient data')
avgs = self._get_stats(60, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 6)])
maxs = self._get_stats(300, [self.alarms[1].rule['threshold'] - v
for v in moves.xrange(4)])
avgs2 = self._get_stats(50, [self.alarms[2].rule['threshold'] + v
for v in moves.xrange(1, 7)])
self.client.metric.get_measures.side_effect = [avgs]
self.client.metric.aggregation.side_effect = [maxs, avgs2]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reasons = ['Transition to alarm due to 5 samples outside'
' threshold, most recent: %s' % avgs[-1][2],
'Transition to alarm due to 4 samples outside'
' threshold, most recent: %s' % maxs[-1][2],
'Transition to alarm due to 6 samples outside'
' threshold, most recent: %s' % avgs2[-1][2]]
reason_datas = [self._reason_data('outside', 5, avgs[-1][2]),
self._reason_data('outside', 4, maxs[-1][2]),
self._reason_data('outside', 6, avgs2[-1][2])]
expected = [mock.call(alarm, 'insufficient data',
reason, reason_data)
for alarm, reason, reason_data
in zip(self.alarms, reasons, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
reason = ('Transition to alarm due to 5 samples outside'
' threshold, most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('outside', 5, avgs[-1][2])
expected = mock.call(self.alarms[0], 'insufficient data',
reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
@unittest.skipIf(six.PY3,
"the aodh base class is not python 3 ready")
@ -401,13 +280,10 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
'duration': 10800, # 3 hours
'timezone': 'Europe/Ljubljana'}
]
self.alarms[1].time_constraints = self.alarms[0].time_constraints
self.alarms[2].time_constraints = self.alarms[0].time_constraints
dt = datetime.datetime(2014, 1, 1, 15, 0, 0,
tzinfo=pytz.timezone('Europe/Ljubljana'))
mock_utcnow.return_value = dt.astimezone(pytz.UTC)
self.client.metric.get_measures.return_value = []
self.client.metric.aggregation.return_value = []
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
update_calls = self.storage_conn.update_alarm.call_args_list
@ -415,3 +291,197 @@ class TestGnocchiThresholdEvaluate(base.TestEvaluatorBase):
"Alarm should not change state if the current "
" time is outside its time constraint.")
self.assertEqual([], self.notifier.notify.call_args_list)
class TestGnocchiAggregationMetricsThresholdEvaluate(TestGnocchiEvaluatorBase):
EVALUATOR = gnocchi.GnocchiAggregationMetricsThresholdEvaluator
def prepare_alarms(self):
self.alarms = self.prepared_alarms[1:2]
def test_retry_transient_api_failure(self):
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(4)])
self.client.metric.aggregation.side_effect = [Exception('boom'), maxs]
self._test_retry_transient()
def test_simple_insufficient(self):
self.client.metric.aggregation.return_value = []
self._test_simple_insufficient()
@mock.patch.object(timeutils, 'utcnow')
def test_simple_alarm_trip(self, utcnow):
utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0)
self._set_all_alarms('ok')
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(4)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
start_alarm = "2015-01-26T12:32:00"
end = "2015-01-26T12:57:00"
self.assertEqual(
[mock.call.aggregation(aggregation='max',
metrics=[
'0bb1604d-1193-4c0a-b4b8-74b170e35e83',
'9ddc209f-42f8-41e1-b8f1-8804f59c4053'],
start=start_alarm, stop=end)],
self.client.metric.mock_calls)
self._assert_all_alarms('alarm')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reason = ('Transition to alarm due to 4 samples outside '
'threshold, most recent: %s' % maxs[-1][2])
reason_data = self._reason_data('outside', 4, maxs[-1][2])
expected = mock.call(self.alarms[0], 'ok', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_simple_alarm_clear(self):
self._set_all_alarms('alarm')
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 5)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reason = ('Transition to ok due to 4 samples inside '
'threshold, most recent: %s' % maxs[-1][2])
reason_data = self._reason_data('inside', 4, maxs[-1][2])
expected = mock.call(self.alarms[0], 'alarm', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_equivocal_from_known_state_ok(self):
self._set_all_alarms('ok')
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(-1, 3)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
self.assertEqual(
[],
self.storage_conn.update_alarm.call_args_list)
self.assertEqual([], self.notifier.notify.call_args_list)
def test_equivocal_ok_to_alarm(self):
self._set_all_alarms('ok')
# NOTE(sileht): we add one useless point (81.0) that will break
# the test if the evaluator doesn't remove it.
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(-1, 5)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
def test_equivocal_from_known_state_and_repeat_actions(self):
self._set_all_alarms('ok')
self.alarms[0].repeat_actions = True
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(-1, 3)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
self.assertEqual([], self.storage_conn.update_alarm.call_args_list)
reason = ('Remaining as ok due to 4 samples inside'
' threshold, most recent: 8.0')
reason_datas = self._reason_data('inside', 4, 8.0)
expected = [mock.call(self.alarms[0], 'ok', reason, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
def test_unequivocal_from_known_state_and_repeat_actions(self):
self._set_all_alarms('alarm')
self.alarms[0].repeat_actions = True
maxs = self._get_stats(300, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(4)])
self.client.metric.aggregation.side_effect = [maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('alarm')
self.assertEqual([], self.storage_conn.update_alarm.call_args_list)
reason = ('Remaining as alarm due to 4 samples outside'
' threshold, most recent: 7.0')
reason_datas = self._reason_data('outside', 4, 7.0)
expected = [mock.call(self.alarms[0], 'alarm',
reason, reason_datas)]
self.assertEqual(expected, self.notifier.notify.call_args_list)
class TestGnocchiAggregationResourcesThresholdEvaluate(
TestGnocchiEvaluatorBase):
EVALUATOR = gnocchi.GnocchiAggregationResourcesThresholdEvaluator
def prepare_alarms(self):
self.alarms = self.prepared_alarms[2:3]
def test_retry_transient_api_failure(self):
avgs2 = self._get_stats(50, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(6)])
self.client.metric.aggregation.side_effect = [
exceptions.ClientException(500, "error"), avgs2]
self._test_retry_transient()
def test_simple_insufficient(self):
self.client.metric.aggregation.return_value = []
self._test_simple_insufficient()
@mock.patch.object(timeutils, 'utcnow')
def test_simple_alarm_trip(self, utcnow):
utcnow.return_value = datetime.datetime(2015, 1, 26, 12, 57, 0, 0)
self._set_all_alarms('ok')
avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(1, 7)])
self.client.metric.aggregation.side_effect = [avgs]
self._evaluate_all_alarms()
start_alarm = "2015-01-26T12:51:10"
end = "2015-01-26T12:57:00"
self.assertEqual(
[mock.call.aggregation(aggregation='mean', metrics='cpu_util',
needed_overlap=0,
query={"=": {"server_group":
"my_autoscaling_group"}},
resource_type='instance',
start=start_alarm, stop=end)],
self.client.metric.mock_calls)
self._assert_all_alarms('alarm')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reason = ('Transition to alarm due to 6 samples outside '
'threshold, most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('outside', 6, avgs[-1][2])
expected = mock.call(self.alarms[0], 'ok', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_simple_alarm_clear(self):
self._set_all_alarms('alarm')
avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] - v
for v in moves.xrange(6)])
self.client.metric.aggregation.side_effect = [avgs]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
expected = [mock.call(alarm) for alarm in self.alarms]
update_calls = self.storage_conn.update_alarm.call_args_list
self.assertEqual(expected, update_calls)
reason = ('Transition to ok due to 6 samples inside '
'threshold, most recent: %s' % avgs[-1][2])
reason_data = self._reason_data('inside', 6, avgs[-1][2])
expected = mock.call(self.alarms[0], 'alarm', reason, reason_data)
self.assertEqual(expected, self.notifier.notify.call_args)
def test_equivocal_from_known_state_ok(self):
self._set_all_alarms('ok')
avgs = self._get_stats(50, [self.alarms[0].rule['threshold'] + v
for v in moves.xrange(6)])
self.client.metric.aggregation.side_effect = [avgs]
self._evaluate_all_alarms()
self._assert_all_alarms('ok')
self.assertEqual(
[],
self.storage_conn.update_alarm.call_args_list)
self.assertEqual([], self.notifier.notify.call_args_list)

View File

@ -396,7 +396,7 @@ class TestEvaluate(base.TestEvaluatorBase):
alarm.rule['exclude_outliers'] = exclude_outliers
with mock.patch.object(timeutils, 'utcnow') as mock_utcnow:
mock_utcnow.return_value = datetime.datetime(2012, 7, 2, 10, 45)
constraint = self.evaluator._bound_duration(alarm)
constraint = self.evaluator._bound_duration(alarm.rule)
self.assertEqual((start, timeutils.utcnow().isoformat()),
constraint)

View File

@ -84,9 +84,9 @@ aodh.alarm.rule =
aodh.evaluator =
threshold = aodh.evaluator.threshold:ThresholdEvaluator
combination = aodh.evaluator.combination:CombinationEvaluator
gnocchi_resources_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator
gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator
gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiThresholdEvaluator
gnocchi_resources_threshold = aodh.evaluator.gnocchi:GnocchiResourceThresholdEvaluator
gnocchi_aggregation_by_metrics_threshold = aodh.evaluator.gnocchi:GnocchiAggregationMetricsThresholdEvaluator
gnocchi_aggregation_by_resources_threshold = aodh.evaluator.gnocchi:GnocchiAggregationResourcesThresholdEvaluator
aodh.notifier =
log = aodh.notifier.log:LogAlarmNotifier