From 50a2f50ab86842fe7f56db08ae841b46263ccca8 Mon Sep 17 00:00:00 2001 From: Yuriy Zveryanskyy Date: Thu, 28 Jul 2016 18:25:59 +0300 Subject: [PATCH] Add node provision state change notification This patch adds node provision state changes notifications, event types are: "baremetal.node.provision_set.{start, end, success, error}" Developer documentation updated. Change-Id: I6a5a2da6541596b7d1b7c1bf2795cfc6f86191fb Partial-Bug: #1606520 --- doc/source/deploy/notifications.rst | 68 ++++++++++ ironic/common/states.py | 3 + ironic/conductor/notification_utils.py | 35 ++++++ ironic/conductor/task_manager.py | 70 ++++++++++- ironic/objects/node.py | 34 +++++ .../unit/conductor/test_notification_utils.py | 56 ++++++++- .../tests/unit/conductor/test_task_manager.py | 116 +++++++++++++++++- ironic/tests/unit/objects/test_objects.py | 4 +- ...-state-notifications-33c44841a18080c3.yaml | 6 + 9 files changed, 383 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/provision-state-notifications-33c44841a18080c3.yaml diff --git a/doc/source/deploy/notifications.rst b/doc/source/deploy/notifications.rst index 4faad24f53..5c9f89d932 100644 --- a/doc/source/deploy/notifications.rst +++ b/doc/source/deploy/notifications.rst @@ -187,5 +187,73 @@ prior to the correction:: "publisher_id":"ironic-conductor.cond-hostname02" } +baremetal.node.provision_set +---------------------------- + +* ``baremetal.node.provision_set.start`` is emitted by the ironic-conductor + service when it begins a provision state transition. It has notification + level INFO. + +* ``baremetal.node.provision_set.end`` is emitted when ironic-conductor + successfully completes a provision state transition. It has notification + level INFO. + +* ``baremetal.node.provision_set.success`` is emitted when ironic-conductor + successfully changes provision state instantly, without any intermediate + work required (example is AVAILABLE to MANAGEABLE). It has notification level + INFO. + +* ``baremetal.node.provision_set.error`` is emitted by ironic-conductor when it + changes provision state as result of error event processing. It has + notification level ERROR. + +Here is an example payload for a notification with this event type. The +"previous_provision_state" and "previous_target_provision_state" payload fields +indicate a node's provision states before state change, "event" is the FSM +(finite state machine) event that triggered the state change:: + + { + "priority": "info", + "payload":{ + "ironic_object.namespace":"ironic", + "ironic_object.name":"NodeSetProvisionStatePayload", + "ironic_object.version":"1.0", + "ironic_object.data":{ + "clean_step": None, + "console_enabled": False, + "created_at": "2016-01-26T20:41:03+00:00", + "driver": "fake", + "extra": {}, + "inspection_finished_at": None, + "inspection_started_at": None, + "instance_info": {}, + "instance_uuid": None, + "last_error": None, + "maintenance": False, + "maintenance_reason": None, + "network_interface": "flat", + "name": None, + "power_state": "power off", + "properties": { + "memory_mb": 4096, + "cpu_arch": "x86_64", + "local_gb": 10, + "cpus": 8}, + "provision_state": "deploying", + "provision_updated_at": "2016-01-27T20:41:03+00:00", + "resource_class": None, + "target_power_state": None, + "target_provision_state": "active", + "updated_at": "2016-01-27T20:41:03+00:00", + "uuid": "1be26c0b-03f2-4d2e-ae87-c02d7f33c123", + "previous_provision_state": "available", + "previous_target_provision_state": None, + "event": "deploy" + } + }, + "event_type":"baremetal.node.provision_set.start", + "publisher_id":"ironic-conductor.hostname01" + } + .. [1] https://wiki.openstack.org/wiki/LoggingStandards#Log_level_definitions .. [2] https://www.rabbitmq.com/documentation.html diff --git a/ironic/common/states.py b/ironic/common/states.py index f14ce5ec89..150966497a 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -195,6 +195,9 @@ DELETE_ALLOWED_STATES = (AVAILABLE, MANAGEABLE, ENROLL, ADOPTFAIL) STABLE_STATES = (ENROLL, MANAGEABLE, AVAILABLE, ACTIVE, ERROR) """States that will not transition unless receiving a request.""" +UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING, + DELETING, INSPECTING, ADOPTING) +"""States that can be changed without external request.""" ############## # Power states diff --git a/ironic/conductor/notification_utils.py b/ironic/conductor/notification_utils.py index c1a625560e..3fad981194 100644 --- a/ironic/conductor/notification_utils.py +++ b/ironic/conductor/notification_utils.py @@ -13,6 +13,7 @@ from oslo_config import cfg from oslo_log import log from oslo_messaging import exceptions as oslo_msg_exc +from oslo_utils import strutils from oslo_versionedobjects import exception as oslo_vo_exc from ironic.common import exception @@ -25,6 +26,17 @@ LOG = log.getLogger(__name__) CONF = cfg.CONF +def mask_secrets(payload): + """Remove secrets from payload object.""" + mask = '******' + if hasattr(payload, 'instance_info'): + payload.instance_info = strutils.mask_dict_password( + payload.instance_info, mask) + if 'image_url' in payload.instance_info: + payload.instance_info['image_url'] = mask + # TODO(yuriyz): add "driver_info" support + + def _emit_conductor_node_notification(task, notification_method, payload_method, action, level, status, **kwargs): @@ -58,6 +70,7 @@ def _emit_conductor_node_notification(task, notification_method, "payload_method %(payload_method)s, error " "%(error)s")) payload = payload_method(task.node, **kwargs) + mask_secrets(payload) notification_method( publisher=notification.NotificationPublisher( service='ironic-conductor', host=CONF.host), @@ -129,3 +142,25 @@ def emit_power_state_corrected_notification(task, from_power): fields.NotificationStatus.SUCCESS, from_power=from_power ) + + +def emit_provision_set_notification(task, level, status, prev_state, + prev_target, event): + """Helper for conductor sending a set provision state notification. + + :param task: a TaskManager instance. + :param level: One of fields.NotificationLevel. + :param status: One of fields.NotificationStatus. + :param prev_state: Previous provision state. + :param prev_target: Previous target provision state. + :param event: FSM event that triggered provision state change. + """ + _emit_conductor_node_notification( + task, + node_objects.NodeSetProvisionStateNotification, + node_objects.NodeSetProvisionStatePayload, + 'provision_set', level, status, + prev_state=prev_state, + prev_target=prev_target, + event=event + ) diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index 481bd1b736..e3f9c6d943 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -94,6 +94,8 @@ raised in the background thread.): """ +import copy + import futurist from oslo_config import cfg from oslo_log import log as logging @@ -106,7 +108,9 @@ from ironic.common import driver_factory from ironic.common import exception from ironic.common.i18n import _, _LE, _LI, _LW from ironic.common import states +from ironic.conductor import notification_utils as notify from ironic import objects +from ironic.objects import fields LOG = logging.getLogger(__name__) @@ -200,6 +204,12 @@ class TaskManager(object): self._purpose = purpose self._debug_timer = timeutils.StopWatch() + # states and event for notification + self._prev_provision_state = None + self._prev_target_provision_state = None + self._event = None + self._saved_node = None + try: node = objects.Node.get(context, node_id) LOG.debug("Attempting to get %(type)s lock on node %(node)s (for " @@ -358,6 +368,44 @@ class TaskManager(object): except exception.NodeNotFound: pass + def _notify_provision_state_change(self): + """Emit notification about change of the node provision state.""" + if self._event is None: + return + + if self.node is None: + # Rare case if resource released before notification + task = copy.copy(self) + task.fsm = states.machine.copy() + task.node = self._saved_node + else: + task = self + + node = task.node + + state = node.provision_state + prev_state = self._prev_provision_state + new_unstable = state in states.UNSTABLE_STATES + prev_unstable = prev_state in states.UNSTABLE_STATES + level = fields.NotificationLevel.INFO + + if self._event in ('fail', 'error'): + status = fields.NotificationStatus.ERROR + level = fields.NotificationLevel.ERROR + elif (prev_unstable, new_unstable) == (False, True): + status = fields.NotificationStatus.START + elif (prev_unstable, new_unstable) == (True, False): + status = fields.NotificationStatus.END + else: + status = fields.NotificationStatus.SUCCESS + + notify.emit_provision_set_notification( + task, level, status, self._prev_provision_state, + self._prev_target_provision_state, self._event) + + # reset saved event, avoiding duplicate notification + self._event = None + def _thread_release_resources(self, fut): """Thread callback to release resources.""" try: @@ -382,6 +430,11 @@ class TaskManager(object): :raises: InvalidState if the event is not allowed by the associated state machine """ + # save previous states and event + self._prev_provision_state = self.node.provision_state + self._prev_target_provision_state = self.node.target_provision_state + self._event = event + # Advance the state model for the given event. Note that this doesn't # alter the node in any way. This may raise InvalidState, if this event # is not allowed in the current state. @@ -394,7 +447,6 @@ class TaskManager(object): self.node.provision_state, self.node.target_provision_state) - previous_state = self.node.provision_state self.node.provision_state = self.fsm.current_state # NOTE(lucasagomes): If there's no extra processing @@ -422,7 +474,14 @@ class TaskManager(object): '"%(target)s"'), {'node': self.node.uuid, 'state': self.node.provision_state, 'target': self.node.target_provision_state, - 'previous': previous_state}) + 'previous': self._prev_provision_state}) + + if callback is None: + self._notify_provision_state_change() + else: + # save the node, in case it is released before a notification is + # emitted at __exit__(). + self._saved_node = self.node def __enter__(self): return self @@ -450,6 +509,13 @@ class TaskManager(object): fut.add_done_callback(self._thread_release_resources) # Don't unlock! The unlock will occur when the # thread finishes. + # NOTE(yuriyz): A race condition with process_event() + # in callback is possible here if eventlet changes behavior. + # E.g., if the execution of the new thread (that handles the + # event processing) finishes before we get here, that new + # thread may emit the "end" notification before we emit the + # following "start" notification. + self._notify_provision_state_change() return except Exception as e: with excutils.save_and_reraise_exception(): diff --git a/ironic/objects/node.py b/ironic/objects/node.py index 40092d91b8..85688bb0f2 100644 --- a/ironic/objects/node.py +++ b/ironic/objects/node.py @@ -530,3 +530,37 @@ class NodeCorrectedPowerStatePayload(NodePayload): def __init__(self, node, from_power): super(NodeCorrectedPowerStatePayload, self).__init__( node, from_power=from_power) + + +@base.IronicObjectRegistry.register +class NodeSetProvisionStateNotification(notification.NotificationBase): + """Notification emitted when ironic changes a node provision state.""" + # Version 1.0: Initial version + VERSION = '1.0' + + fields = { + 'payload': object_fields.ObjectField('NodeSetProvisionStatePayload') + } + + +@base.IronicObjectRegistry.register +class NodeSetProvisionStatePayload(NodePayload): + """Payload schema for when ironic changes a node provision state.""" + # Version 1.0: Initial version + VERSION = '1.0' + + SCHEMA = dict(NodePayload.SCHEMA, + **{'instance_info': ('node', 'instance_info')}) + + fields = { + 'instance_info': object_fields.FlexibleDictField(nullable=True), + 'event': object_fields.StringField(nullable=True), + 'previous_provision_state': object_fields.StringField(nullable=True), + 'previous_target_provision_state': + object_fields.StringField(nullable=True) + } + + def __init__(self, node, prev_state, prev_target, event): + super(NodeSetProvisionStatePayload, self).__init__( + node, event=event, previous_provision_state=prev_state, + previous_target_provision_state=prev_target) diff --git a/ironic/tests/unit/conductor/test_notification_utils.py b/ironic/tests/unit/conductor/test_notification_utils.py index 243d46ba1b..3126d9d50a 100644 --- a/ironic/tests/unit/conductor/test_notification_utils.py +++ b/ironic/tests/unit/conductor/test_notification_utils.py @@ -21,8 +21,10 @@ from oslo_versionedobjects.exception import VersionedObjectsException from ironic.common import exception from ironic.common import states from ironic.conductor import notification_utils as notif_utils +from ironic.conductor import task_manager from ironic.objects import fields from ironic.objects import node as node_objects +from ironic.tests import base as tests_base from ironic.tests.unit.db import base from ironic.tests.unit.objects import utils as obj_utils @@ -67,7 +69,8 @@ class TestNotificationUtils(base.DbTestCase): to_power=states.POWER_ON ) - def test__emit_conductor_node_notification(self): + @mock.patch.object(notif_utils, 'mask_secrets') + def test__emit_conductor_node_notification(self, mock_secrets): mock_notify_method = mock.Mock() # Required for exception handling mock_notify_method.__name__ = 'MockNotificationConstructor' @@ -88,6 +91,7 @@ class TestNotificationUtils(base.DbTestCase): mock_payload_method.assert_called_once_with( self.task.node, **mock_kwargs) + mock_secrets.assert_called_once_with(mock_payload_method.return_value) mock_notify_method.assert_called_once_with( publisher=mock.ANY, event_type=mock.ANY, @@ -120,7 +124,9 @@ class TestNotificationUtils(base.DbTestCase): self.assertFalse(mock_notify_method.called) - def test__emit_conductor_node_notification_known_notify_exc(self): + @mock.patch.object(notif_utils, 'mask_secrets') + def test__emit_conductor_node_notification_known_notify_exc(self, + mock_secrets): """Test exception caught for a known notification exception.""" mock_notify_method = mock.Mock() # Required for exception handling @@ -142,3 +148,49 @@ class TestNotificationUtils(base.DbTestCase): ) self.assertFalse(mock_notify_method.return_value.emit.called) + + +class ProvisionNotifyTestCase(tests_base.TestCase): + @mock.patch('ironic.objects.node.NodeSetProvisionStateNotification') + def test_emit_notification(self, provision_mock): + provision_mock.__name__ = 'NodeSetProvisionStateNotification' + self.config(host='fake-host') + node = obj_utils.get_test_node(self.context, + provision_state='fake state', + target_provision_state='fake target', + instance_info={'foo': 'baz'}) + task = mock.Mock(spec=task_manager.TaskManager) + task.node = node + test_level = fields.NotificationLevel.INFO + test_status = fields.NotificationStatus.SUCCESS + notif_utils.emit_provision_set_notification( + task, test_level, test_status, 'fake_old', + 'fake_old_target', 'event') + init_kwargs = provision_mock.call_args[1] + publisher = init_kwargs['publisher'] + event_type = init_kwargs['event_type'] + level = init_kwargs['level'] + payload = init_kwargs['payload'] + self.assertEqual('fake-host', publisher.host) + self.assertEqual('ironic-conductor', publisher.service) + self.assertEqual('node', event_type.object) + self.assertEqual('provision_set', event_type.action) + self.assertEqual(test_status, event_type.status) + self.assertEqual(test_level, level) + self.assertEqual(node.uuid, payload.uuid) + self.assertEqual('fake state', payload.provision_state) + self.assertEqual('fake target', payload.target_provision_state) + self.assertEqual('fake_old', payload.previous_provision_state) + self.assertEqual('fake_old_target', + payload.previous_target_provision_state) + self.assertEqual({'foo': 'baz'}, payload.instance_info) + + def test_mask_secrets(self): + test_info = {'configdrive': 'fake_drive', 'image_url': 'fake-url', + 'some_value': 'fake-value'} + node = obj_utils.get_test_node(self.context, + instance_info=test_info) + notif_utils.mask_secrets(node) + self.assertEqual('******', node.instance_info['configdrive']) + self.assertEqual('******', node.instance_info['image_url']) + self.assertEqual('fake-value', node.instance_info['some_value']) diff --git a/ironic/tests/unit/conductor/test_task_manager.py b/ironic/tests/unit/conductor/test_task_manager.py index 8246b71a27..625eb529a6 100644 --- a/ironic/tests/unit/conductor/test_task_manager.py +++ b/ironic/tests/unit/conductor/test_task_manager.py @@ -25,8 +25,10 @@ from ironic.common import driver_factory from ironic.common import exception from ironic.common import fsm from ironic.common import states +from ironic.conductor import notification_utils from ironic.conductor import task_manager from ironic import objects +from ironic.objects import fields from ironic.tests import base as tests_base from ironic.tests.unit.db import base as tests_db_base from ironic.tests.unit.objects import utils as obj_utils @@ -418,9 +420,11 @@ class TaskManagerTestCase(tests_db_base.DbTestCase): task1.process_event('provide') self.assertEqual(states.CLEANING, task1.node.provision_state) + @mock.patch.object(task_manager.TaskManager, + '_notify_provision_state_change', autospec=True) def test_spawn_after( - self, get_portgroups_mock, get_ports_mock, build_driver_mock, - reserve_mock, release_mock, node_get_mock): + self, notify_mock, get_portgroups_mock, get_ports_mock, + build_driver_mock, reserve_mock, release_mock, node_get_mock): spawn_mock = mock.Mock(return_value=self.future_mock) task_release_mock = mock.Mock() reserve_mock.return_value = self.node @@ -437,6 +441,7 @@ class TaskManagerTestCase(tests_db_base.DbTestCase): # release resources pending the finishing of the background # thread self.assertFalse(task_release_mock.called) + notify_mock.assert_called_once_with(task) def test_spawn_after_exception_while_yielded( self, get_portgroups_mock, get_ports_mock, build_driver_mock, @@ -455,9 +460,11 @@ class TaskManagerTestCase(tests_db_base.DbTestCase): self.assertFalse(spawn_mock.called) task_release_mock.assert_called_once_with() + @mock.patch.object(task_manager.TaskManager, + '_notify_provision_state_change', autospec=True) def test_spawn_after_spawn_fails( - self, get_portgroups_mock, get_ports_mock, build_driver_mock, - reserve_mock, release_mock, node_get_mock): + self, notify_mock, get_portgroups_mock, get_ports_mock, + build_driver_mock, reserve_mock, release_mock, node_get_mock): spawn_mock = mock.Mock(side_effect=exception.IronicException('foo')) task_release_mock = mock.Mock() reserve_mock.return_value = self.node @@ -471,6 +478,7 @@ class TaskManagerTestCase(tests_db_base.DbTestCase): spawn_mock.assert_called_once_with(1, 2, foo='bar', cat='meow') task_release_mock.assert_called_once_with() + self.assertFalse(notify_mock.called) def test_spawn_after_link_fails( self, get_portgroups_mock, get_ports_mock, build_driver_mock, @@ -672,6 +680,11 @@ class TaskManagerStateModelTestCases(tests_base.TestCase): self.assertEqual(states.NOSTATE, self.task.node.target_provision_state) + def test_process_event_no_callback_notify(self): + self.task.process_event = task_manager.TaskManager.process_event + self.task.process_event(self.task, 'fake') + self.task._notify_provision_state_change.assert_called_once_with() + @task_manager.require_exclusive_lock def _req_excl_lock_method(*args, **kwargs): @@ -762,3 +775,98 @@ class ThreadExceptionTestCase(tests_base.TestCase): self.future_mock.exception.assert_called_once_with() self.assertIsNone(self.node.last_error) self.assertTrue(log_mock.called) + + +@mock.patch.object(notification_utils, 'emit_provision_set_notification', + autospec=True) +class ProvisionNotifyTestCase(tests_base.TestCase): + def setUp(self): + super(ProvisionNotifyTestCase, self).setUp() + self.node = mock.Mock(spec=objects.Node) + self.task = mock.Mock(spec=task_manager.TaskManager) + self.task.node = self.node + notifier = task_manager.TaskManager._notify_provision_state_change + self.task.notifier = notifier + self.task._prev_target_provision_state = 'oldtarget' + self.task._event = 'event' + + def test_notify_no_state_change(self, emit_mock): + self.task._event = None + self.task.notifier(self.task) + self.assertFalse(emit_mock.called) + + def test_notify_error_state(self, emit_mock): + self.task._event = 'fail' + self.task._prev_provision_state = 'fake' + self.task.notifier(self.task) + emit_mock.assert_called_once_with(self.task, + fields.NotificationLevel.ERROR, + fields.NotificationStatus.ERROR, + 'fake', 'oldtarget', 'fail') + self.assertIsNone(self.task._event) + + def test_notify_unstable_to_unstable(self, emit_mock): + self.node.provision_state = states.DEPLOYING + self.task._prev_provision_state = states.DEPLOYWAIT + self.task.notifier(self.task) + emit_mock.assert_called_once_with(self.task, + fields.NotificationLevel.INFO, + fields.NotificationStatus.SUCCESS, + states.DEPLOYWAIT, + 'oldtarget', 'event') + + def test_notify_stable_to_unstable(self, emit_mock): + self.node.provision_state = states.DEPLOYING + self.task._prev_provision_state = states.AVAILABLE + self.task.notifier(self.task) + emit_mock.assert_called_once_with(self.task, + fields.NotificationLevel.INFO, + fields.NotificationStatus.START, + states.AVAILABLE, + 'oldtarget', 'event') + + def test_notify_unstable_to_stable(self, emit_mock): + self.node.provision_state = states.ACTIVE + self.task._prev_provision_state = states.DEPLOYING + self.task.notifier(self.task) + emit_mock.assert_called_once_with(self.task, + fields.NotificationLevel.INFO, + fields.NotificationStatus.END, + states.DEPLOYING, + 'oldtarget', 'event') + + def test_notify_stable_to_stable(self, emit_mock): + self.node.provision_state = states.MANAGEABLE + self.task._prev_provision_state = states.AVAILABLE + self.task.notifier(self.task) + emit_mock.assert_called_once_with(self.task, + fields.NotificationLevel.INFO, + fields.NotificationStatus.SUCCESS, + states.AVAILABLE, + 'oldtarget', 'event') + + def test_notify_resource_released(self, emit_mock): + node = mock.Mock(spec=objects.Node) + node.provision_state = states.DEPLOYING + node.target_provision_state = states.ACTIVE + task = mock.Mock(spec=task_manager.TaskManager) + task._prev_provision_state = states.AVAILABLE + task._prev_target_provision_state = states.NOSTATE + task._event = 'event' + task.node = None + task._saved_node = node + notifier = task_manager.TaskManager._notify_provision_state_change + task.notifier = notifier + task.notifier(task) + task_arg = emit_mock.call_args[0][0] + self.assertEqual(node, task_arg.node) + self.assertIsNot(task, task_arg) + + def test_notify_only_once(self, emit_mock): + self.node.provision_state = states.DEPLOYING + self.task._prev_provision_state = states.AVAILABLE + self.task.notifier(self.task) + self.assertIsNone(self.task._event) + self.task.notifier(self.task) + self.assertEqual(1, emit_mock.call_count) + self.assertIsNone(self.task._event) diff --git a/ironic/tests/unit/objects/test_objects.py b/ironic/tests/unit/objects/test_objects.py index 8fdc583e79..7805900a9e 100644 --- a/ironic/tests/unit/objects/test_objects.py +++ b/ironic/tests/unit/objects/test_objects.py @@ -418,7 +418,9 @@ expected_object_fingerprints = { 'NodeCorrectedPowerStateNotification': '1.0-59acc533c11d306f149846f922739' 'c15', 'NodeCorrectedPowerStatePayload': '1.0-2a484d7c342caa9fe488de16dc5f1f1e', - + 'NodeSetProvisionStateNotification': + '1.0-59acc533c11d306f149846f922739c15', + 'NodeSetProvisionStatePayload': '1.0-91be7439b9b6b04931c9b99b8e1ea87a' } diff --git a/releasenotes/notes/provision-state-notifications-33c44841a18080c3.yaml b/releasenotes/notes/provision-state-notifications-33c44841a18080c3.yaml new file mode 100644 index 0000000000..b64bb5c81c --- /dev/null +++ b/releasenotes/notes/provision-state-notifications-33c44841a18080c3.yaml @@ -0,0 +1,6 @@ +--- +features: + - Adds notifications for node's provision state changes, event types are + "baremetal.node.provision_set.{start, end, success, error}". + For more details, see + http://docs.openstack.org/developer/ironic/dev/notifications.html.