Use alarm's evaluation periods in sufficient test
Currently, we use constant value quorum=1 to check if there are enough datapoints, however, this is not quite right for an alarm rule. Image evaluation periods is set to, for i.e., 3 for an instance on cpu_util greater or equal than 80%. Here are the cases which current may not work as expected: 1. when system start or instance is just created, we may only get one or two samples for the instance 2. when system is somewhere broken, or an instance is restarted (after being shutoff), sample may fail to be collected in some time, so we only get one or two sample in that time range We want to avoid a spurious data peak, for example, instance cpu_util can be 50%, 50%, 50%, 90%, in such case, alarm will not be triggered, but if instance cpu_util is None, None, None, 90%, current code will think alarm should be triggered, which is not consistent and may confuse end users. This patch will put alarm to insufficient data when datapoints are less than evaluation periods. Change-Id: Ie64a537434493a5965c8e9e165cf028d57689da2 Closes-Bug: #1380216
This commit is contained in:
parent
fb53883fcb
commit
553d8d96e6
@ -23,7 +23,7 @@ from oslo_utils import timeutils
|
||||
|
||||
from ceilometer.alarm import evaluator
|
||||
from ceilometer.alarm.evaluator import utils
|
||||
from ceilometer.i18n import _
|
||||
from ceilometer.i18n import _, _LW
|
||||
from ceilometer.openstack.common import log
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
@ -44,10 +44,6 @@ class ThresholdEvaluator(evaluator.Evaluator):
|
||||
# for reporting/ingestion lag
|
||||
look_back = 1
|
||||
|
||||
# minimum number of datapoints within sliding window to
|
||||
# avoid unknown state
|
||||
quorum = 1
|
||||
|
||||
@classmethod
|
||||
def _bound_duration(cls, alarm, constraints):
|
||||
"""Bound the duration of the statistics query."""
|
||||
@ -108,13 +104,21 @@ class ThresholdEvaluator(evaluator.Evaluator):
|
||||
Ensure there is sufficient data for evaluation, transitioning to
|
||||
unknown otherwise.
|
||||
"""
|
||||
sufficient = len(statistics) >= self.quorum
|
||||
sufficient = len(statistics) >= alarm.rule['evaluation_periods']
|
||||
if not sufficient and alarm.state != evaluator.UNKNOWN:
|
||||
LOG.warn(_LW('Expecting %(expected)d datapoints but only get '
|
||||
'%(actual)d') % {
|
||||
'expected': alarm.rule['evaluation_periods'],
|
||||
'actual': len(statistics)})
|
||||
# Reason is not same as log message because we want to keep
|
||||
# consistent since thirdparty software may depend on old format.
|
||||
reason = _('%d datapoints are unknown') % alarm.rule[
|
||||
'evaluation_periods']
|
||||
last = None if not statistics else (
|
||||
getattr(statistics[-1], alarm.rule['statistic']))
|
||||
reason_data = self._reason_data('unknown',
|
||||
alarm.rule['evaluation_periods'],
|
||||
None)
|
||||
last)
|
||||
self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
|
||||
return sufficient
|
||||
|
||||
|
@ -118,7 +118,7 @@ class TestEvaluate(base.TestEvaluatorBase):
|
||||
avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
|
||||
for v in moves.xrange(5)]
|
||||
maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] + v)
|
||||
for v in moves.xrange(1, 4)]
|
||||
for v in moves.xrange(1, 5)]
|
||||
self.api_client.statistics.list.side_effect = [broken,
|
||||
broken,
|
||||
avgs,
|
||||
@ -150,6 +150,32 @@ class TestEvaluate(base.TestEvaluatorBase):
|
||||
for alarm in self.alarms]
|
||||
self.assertEqual(expected, self.notifier.notify.call_args_list)
|
||||
|
||||
def test_less_insufficient_data(self):
|
||||
self._set_all_alarms('ok')
|
||||
with mock.patch('ceilometerclient.client.get_client',
|
||||
return_value=self.api_client):
|
||||
avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
|
||||
for v in moves.xrange(4)]
|
||||
maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] - v)
|
||||
for v in moves.xrange(1, 4)]
|
||||
self.api_client.statistics.list.side_effect = [avgs, maxs]
|
||||
self._evaluate_all_alarms()
|
||||
self._assert_all_alarms('insufficient data')
|
||||
expected = [mock.call(alarm.alarm_id, state='insufficient data')
|
||||
for alarm in self.alarms]
|
||||
update_calls = self.api_client.alarms.set_state.call_args_list
|
||||
self.assertEqual(update_calls, expected)
|
||||
expected = [mock.call(
|
||||
alarm,
|
||||
'ok',
|
||||
('%d datapoints are unknown'
|
||||
% alarm.rule['evaluation_periods']),
|
||||
self._reason_data('unknown',
|
||||
alarm.rule['evaluation_periods'],
|
||||
alarm.rule['threshold'] - 3))
|
||||
for alarm in self.alarms]
|
||||
self.assertEqual(expected, self.notifier.notify.call_args_list)
|
||||
|
||||
def test_simple_alarm_trip(self):
|
||||
self._set_all_alarms('ok')
|
||||
with mock.patch('ceilometerclient.client.get_client',
|
||||
|
Loading…
x
Reference in New Issue
Block a user