Catch exceptions from nova client in poll_and_publish

Ceilometer compute agent dies if nova client raises an exception while it is
retrieving server instances. This might happen e.g. when some OpenStack API is
temporarily unavailable

Fixes LP Bug #1218889

Change-Id: I808dcfae18d23240f8e095d6c97c8dede7dede8f
This commit is contained in:
Harri Hämäläinen 2013-09-12 09:34:34 +03:00
parent 76578d4f18
commit 0e01da763a
3 changed files with 27 additions and 2 deletions

View File

@ -52,8 +52,12 @@ class PollingTask(agent.PollingTask):
LOG.exception(err)
def poll_and_publish(self):
self.poll_and_publish_instances(
self.manager.nv.instance_get_all_by_host(cfg.CONF.host))
try:
instances = self.manager.nv.instance_get_all_by_host(cfg.CONF.host)
except Exception as err:
LOG.exception('Unable to retrieve instances: %s', err)
else:
self.poll_and_publish_instances(instances)
class AgentManager(agent.AgentManager):

View File

@ -245,3 +245,13 @@ class BaseAgentManagerTestCase(base.TestCase):
self.mgr.interval_task(polling_tasks.get(10))
pub = self.mgr.pipeline_manager.pipelines[0].publishers[0]
self.assertEqual(len(pub.samples), 0)
def test_manager_exception_persistency(self):
self.pipeline_cfg.append({
'name': "test_pipeline",
'interval': 60,
'counters': ['testanother'],
'transformers': [],
'publishers': ["test"],
})
self.setup_pipeline()

View File

@ -41,6 +41,9 @@ class TestRunTasks(agentbase.BaseAgentManagerTestCase):
setattr(instance, 'OS-EXT-STS:vm_state', state)
return instance
def _raise_exception(self):
raise Exception
def setup_manager(self):
self.mgr = manager.AgentManager()
@ -71,3 +74,11 @@ class TestRunTasks(agentbase.BaseAgentManagerTestCase):
super(TestRunTasks, self).test_interval_exception_isolation()
self.assertEqual(len(self.PollsterException.samples), 1)
self.assertEqual(len(self.PollsterExceptionAnother.samples), 1)
def test_manager_exception_persistency(self):
super(TestRunTasks, self).test_manager_exception_persistency()
self.stubs.Set(nova_client.Client, 'instance_get_all_by_host',
lambda *x: self._raise_exception())
mgr = manager.AgentManager()
polling_task = manager.PollingTask(mgr)
polling_task.poll_and_publish()