From 0e01da763ab7b3783ae86aa6582c38ebdc6e2378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Harri=20H=C3=A4m=C3=A4l=C3=A4inen?= Date: Thu, 12 Sep 2013 09:34:34 +0300 Subject: [PATCH] Catch exceptions from nova client in poll_and_publish Ceilometer compute agent dies if nova client raises an exception while it is retrieving server instances. This might happen e.g. when some OpenStack API is temporarily unavailable Fixes LP Bug #1218889 Change-Id: I808dcfae18d23240f8e095d6c97c8dede7dede8f --- ceilometer/compute/manager.py | 8 ++++++-- tests/agentbase.py | 10 ++++++++++ tests/compute/test_manager.py | 11 +++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/ceilometer/compute/manager.py b/ceilometer/compute/manager.py index c3b7f250d..85fd6414c 100644 --- a/ceilometer/compute/manager.py +++ b/ceilometer/compute/manager.py @@ -52,8 +52,12 @@ class PollingTask(agent.PollingTask): LOG.exception(err) def poll_and_publish(self): - self.poll_and_publish_instances( - self.manager.nv.instance_get_all_by_host(cfg.CONF.host)) + try: + instances = self.manager.nv.instance_get_all_by_host(cfg.CONF.host) + except Exception as err: + LOG.exception('Unable to retrieve instances: %s', err) + else: + self.poll_and_publish_instances(instances) class AgentManager(agent.AgentManager): diff --git a/tests/agentbase.py b/tests/agentbase.py index 7e8263b7e..9deba23be 100644 --- a/tests/agentbase.py +++ b/tests/agentbase.py @@ -245,3 +245,13 @@ class BaseAgentManagerTestCase(base.TestCase): self.mgr.interval_task(polling_tasks.get(10)) pub = self.mgr.pipeline_manager.pipelines[0].publishers[0] self.assertEqual(len(pub.samples), 0) + + def test_manager_exception_persistency(self): + self.pipeline_cfg.append({ + 'name': "test_pipeline", + 'interval': 60, + 'counters': ['testanother'], + 'transformers': [], + 'publishers': ["test"], + }) + self.setup_pipeline() diff --git a/tests/compute/test_manager.py b/tests/compute/test_manager.py index 02eb61253..b3f3b3da3 100644 --- a/tests/compute/test_manager.py +++ b/tests/compute/test_manager.py @@ -41,6 +41,9 @@ class TestRunTasks(agentbase.BaseAgentManagerTestCase): setattr(instance, 'OS-EXT-STS:vm_state', state) return instance + def _raise_exception(self): + raise Exception + def setup_manager(self): self.mgr = manager.AgentManager() @@ -71,3 +74,11 @@ class TestRunTasks(agentbase.BaseAgentManagerTestCase): super(TestRunTasks, self).test_interval_exception_isolation() self.assertEqual(len(self.PollsterException.samples), 1) self.assertEqual(len(self.PollsterExceptionAnother.samples), 1) + + def test_manager_exception_persistency(self): + super(TestRunTasks, self).test_manager_exception_persistency() + self.stubs.Set(nova_client.Client, 'instance_get_all_by_host', + lambda *x: self._raise_exception()) + mgr = manager.AgentManager() + polling_task = manager.PollingTask(mgr) + polling_task.poll_and_publish()