Use alarm's evaluation periods in sufficient test

Currently, we use constant value quorum=1 to check if there are enough datapoints, however, this is not quite right for an alarm rule. Image evaluation periods is set to, for i.e., 3 for an instance on cpu_util greater or equal than 80%. Here are the cases which current may not work as expected: 1. when system start or instance is just created, we may only get one or two samples for the instance 2. when system is somewhere broken, or an instance is restarted (after being shutoff), sample may fail to be collected in some time, so we only get one or two sample in that time range We want to avoid a spurious data peak, for example, instance cpu_util can be 50%, 50%, 50%, 90%, in such case, alarm will not be triggered, but if instance cpu_util is None, None, None, 90%, current code will think alarm should be triggered, which is not consistent and may confuse end users. This patch will put alarm to insufficient data when datapoints are less than evaluation periods. Change-Id: Ie64a537434493a5965c8e9e165cf028d57689da2 Closes-Bug: #1380216
2014-10-31 03:33:34 +08:00 · 2014-10-31 03:33:34 +08:00 · 553d8d96e6
commit 553d8d96e6
parent fb53883fcb
2 changed files with 38 additions and 8 deletions
--- a/ceilometer/alarm/evaluator/threshold.py
+++ b/ceilometer/alarm/evaluator/threshold.py
@ -23,7 +23,7 @@ from oslo_utils import timeutils

 from ceilometer.alarm import evaluator
 from ceilometer.alarm.evaluator import utils
-from ceilometer.i18n import _
+from ceilometer.i18n import _, _LW
 from ceilometer.openstack.common import log

 LOG = log.getLogger(__name__)
@ -44,10 +44,6 @@ class ThresholdEvaluator(evaluator.Evaluator):
    # for reporting/ingestion lag
    look_back = 1

-    # minimum number of datapoints within sliding window to
-    # avoid unknown state
-    quorum = 1
-
    @classmethod
    def _bound_duration(cls, alarm, constraints):
        """Bound the duration of the statistics query."""
@ -108,13 +104,21 @@ class ThresholdEvaluator(evaluator.Evaluator):
        Ensure there is sufficient data for evaluation, transitioning to
        unknown otherwise.
        """
-        sufficient = len(statistics) >= self.quorum
+        sufficient = len(statistics) >= alarm.rule['evaluation_periods']
        if not sufficient and alarm.state != evaluator.UNKNOWN:
+            LOG.warn(_LW('Expecting %(expected)d datapoints but only get '
+                         '%(actual)d') % {
+                'expected': alarm.rule['evaluation_periods'],
+                'actual': len(statistics)})
+            # Reason is not same as log message because we want to keep
+            # consistent since thirdparty software may depend on old format.
            reason = _('%d datapoints are unknown') % alarm.rule[
                'evaluation_periods']
+            last = None if not statistics else (
+                getattr(statistics[-1], alarm.rule['statistic']))
            reason_data = self._reason_data('unknown',
                                            alarm.rule['evaluation_periods'],
-                                            None)
+                                            last)
            self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data)
        return sufficient

--- a/ceilometer/tests/alarm/evaluator/test_threshold.py
+++ b/ceilometer/tests/alarm/evaluator/test_threshold.py
@ -118,7 +118,7 @@ class TestEvaluate(base.TestEvaluatorBase):
            avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
                    for v in moves.xrange(5)]
            maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] + v)
-                    for v in moves.xrange(1, 4)]
+                    for v in moves.xrange(1, 5)]
            self.api_client.statistics.list.side_effect = [broken,
                                                           broken,
                                                           avgs,
@ -150,6 +150,32 @@ class TestEvaluate(base.TestEvaluatorBase):
                for alarm in self.alarms]
            self.assertEqual(expected, self.notifier.notify.call_args_list)

+    def test_less_insufficient_data(self):
+        self._set_all_alarms('ok')
+        with mock.patch('ceilometerclient.client.get_client',
+                        return_value=self.api_client):
+            avgs = [self._get_stat('avg', self.alarms[0].rule['threshold'] - v)
+                    for v in moves.xrange(4)]
+            maxs = [self._get_stat('max', self.alarms[1].rule['threshold'] - v)
+                    for v in moves.xrange(1, 4)]
+            self.api_client.statistics.list.side_effect = [avgs, maxs]
+            self._evaluate_all_alarms()
+            self._assert_all_alarms('insufficient data')
+            expected = [mock.call(alarm.alarm_id, state='insufficient data')
+                        for alarm in self.alarms]
+            update_calls = self.api_client.alarms.set_state.call_args_list
+            self.assertEqual(update_calls, expected)
+            expected = [mock.call(
+                alarm,
+                'ok',
+                ('%d datapoints are unknown'
+                 % alarm.rule['evaluation_periods']),
+                self._reason_data('unknown',
+                                  alarm.rule['evaluation_periods'],
+                                  alarm.rule['threshold'] - 3))
+                for alarm in self.alarms]
+            self.assertEqual(expected, self.notifier.notify.call_args_list)
+
    def test_simple_alarm_trip(self):
        self._set_all_alarms('ok')
        with mock.patch('ceilometerclient.client.get_client',