aodh/ceilometer/compute/manager.py
Harri Hämäläinen 0e01da763a Catch exceptions from nova client in poll_and_publish
Ceilometer compute agent dies if nova client raises an exception while it is
retrieving server instances. This might happen e.g. when some OpenStack API is
temporarily unavailable

Fixes LP Bug #1218889

Change-Id: I808dcfae18d23240f8e095d6c97c8dede7dede8f
2013-09-12 13:36:22 +03:00

101 lines
3.4 KiB
Python

# -*- encoding: utf-8 -*-
#
# Copyright © 2012-2013 eNovance <licensing@enovance.com>
#
# Author: Julien Danjou <julien@danjou.info>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo.config import cfg
from stevedore import extension
from ceilometer import agent
from ceilometer.compute.virt import inspector as virt_inspector
from ceilometer import nova_client
from ceilometer.openstack.common import log
from ceilometer.openstack.common import service as os_service
from ceilometer.openstack.common.rpc import service as rpc_service
from ceilometer import service
LOG = log.getLogger(__name__)
class PollingTask(agent.PollingTask):
def poll_and_publish_instances(self, instances):
with self.publish_context as publisher:
for instance in instances:
if getattr(instance, 'OS-EXT-STS:vm_state', None) == 'error':
continue
cache = {}
for pollster in self.pollsters:
try:
LOG.info("Polling pollster %s", pollster.name)
samples = list(pollster.obj.get_samples(
self.manager,
cache,
instance,
))
publisher(samples)
except Exception as err:
LOG.warning('Continue after error from %s: %s',
pollster.name, err)
LOG.exception(err)
def poll_and_publish(self):
try:
instances = self.manager.nv.instance_get_all_by_host(cfg.CONF.host)
except Exception as err:
LOG.exception('Unable to retrieve instances: %s', err)
else:
self.poll_and_publish_instances(instances)
class AgentManager(agent.AgentManager):
def __init__(self):
super(AgentManager, self).__init__(
extension.ExtensionManager(
namespace='ceilometer.poll.compute',
invoke_on_load=True,
),
)
self._inspector = virt_inspector.get_hypervisor_inspector()
self.nv = nova_client.Client()
def create_polling_task(self):
return PollingTask(self)
def setup_notifier_task(self):
"""For nova notifier usage."""
task = PollingTask(self)
for pollster in self.pollster_manager.extensions:
task.add(
pollster,
self.pipeline_manager.pipelines)
self.notifier_task = task
def poll_instance(self, context, instance):
"""Poll one instance."""
self.notifier_task.poll_and_publish_instances([instance])
@property
def inspector(self):
return self._inspector
def agent_compute():
service.prepare_service()
os_service.launch(rpc_service.Service(cfg.CONF.host,
'ceilometer.agent.compute',
AgentManager())).wait()