Use alarm's evaluation periods in sufficient test

Currently, we use constant value quorum=1 to check if there are enough
datapoints, however, this is not quite right for an alarm rule.

Image evaluation periods is set to, for i.e., 3 for an instance on
cpu_util greater or equal than 80%. Here are the cases which current
may not work as expected:

1. when system start or instance is just created, we may only get one
or two samples for the instance
2. when system is somewhere broken, or an instance is restarted (after
being shutoff), sample may fail to be collected in some time, so we only
get one or two sample in that time range

We want to avoid a spurious data peak, for example, instance cpu_util can
be 50%, 50%, 50%, 90%, in such case, alarm will not be triggered, but if
instance cpu_util is None, None, None, 90%, current code will think alarm
should be triggered, which is not consistent and may confuse end users.

This patch will put alarm to insufficient data when datapoints are less
than evaluation periods.

Change-Id: Ie64a537434493a5965c8e9e165cf028d57689da2
Closes-Bug: #1380216
This commit is contained in:
ZhiQiang Fan 2014-10-31 03:33:34 +08:00 committed by ZhiQiang Fan
parent fb53883fcb
commit 553d8d96e6
2 changed files with 38 additions and 8 deletions

View File

@ -23,7 +23,7 @@ from oslo_utils import timeutils
from ceilometer.alarm import evaluator
from ceilometer.alarm.evaluator import utils
from ceilometer.i18n import _
from ceilometer.i18n import _, _LW
from ceilometer.openstack.common import log
LOG = log.getLogger(__name__)
@ -44,10 +44,6 @@ class ThresholdEvaluator(evaluator.Evaluator):
# for reporting/ingestion lag
look_back = 1
# minimum number of datapoints within sliding window to
# avoid unknown state
quorum = 1
@classmethod
def _bound_duration(cls, alarm, constraints):
"""Bound the duration of the statistics query."""
@ -108,13 +104,21 @@ class ThresholdEvaluator(evaluator.Evaluator):
Ensure there is sufficient data for evaluation, transitioning to
unknown otherwise.
"""
sufficient = len(statistics) >= self.quorum
sufficient = len(statistics) >= alarm.rule['evaluation_periods']
if not sufficient and alarm.state != evaluator.UNKNOWN:
LOG.warn(_LW('Expecting %(expected)d datapoints but only get '
'%(actual)d') % {
'expected': alarm.rule['evaluation_periods'],
'actual': len(statistics)})
# Reason is not same as log message because we want to keep
# consistent since thirdparty software may depend on old format.
reason = _('%d datapoints are unknown') % alarm.rule[
'evaluation_periods']
last = None if not statistics else (
getattr(statistics[-1], alarm.rule['statistic']))
reason_data = self._reason_data('unknown',
alarm.rule['evaluation_periods'],
None)
last)
self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
return sufficient

View File

@ -118,7 +118,7 @@ class TestEvaluate(base.TestEvaluatorBase):
avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
for v in moves.xrange(5)]
maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] + v)
for v in moves.xrange(1, 4)]
for v in moves.xrange(1, 5)]
self.api_client.statistics.list.side_effect = [broken,
broken,
avgs,
@ -150,6 +150,32 @@ class TestEvaluate(base.TestEvaluatorBase):
for alarm in self.alarms]
self.assertEqual(expected, self.notifier.notify.call_args_list)
def test_less_insufficient_data(self):
self._set_all_alarms('ok')
with mock.patch('ceilometerclient.client.get_client',
return_value=self.api_client):
avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
for v in moves.xrange(4)]
maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] - v)
for v in moves.xrange(1, 4)]
self.api_client.statistics.list.side_effect = [avgs, maxs]
self._evaluate_all_alarms()
self._assert_all_alarms('insufficient data')
expected = [mock.call(alarm.alarm_id, state='insufficient data')
for alarm in self.alarms]
update_calls = self.api_client.alarms.set_state.call_args_list
self.assertEqual(update_calls, expected)
expected = [mock.call(
alarm,
'ok',
('%d datapoints are unknown'
% alarm.rule['evaluation_periods']),
self._reason_data('unknown',
alarm.rule['evaluation_periods'],
alarm.rule['threshold'] - 3))
for alarm in self.alarms]
self.assertEqual(expected, self.notifier.notify.call_args_list)
def test_simple_alarm_trip(self):
self._set_all_alarms('ok')
with mock.patch('ceilometerclient.client.get_client',