Block ability update callback_url
A malicious user with: * API access normally reserved for the provisioning, cleaning, rescue networks. * Insight about a node, such as a MAC address, or baremetal node UUID. * Insight into the state of the node, such as the access provided to Compute API users, or other Bare Metal API users. Can submit an erroneous ``heartbeat`` to the ironic-api endpoint with a ``callback_url`` that is not of the actual intended agent. This can potentially cause a rescue, cleaning, or deployment operation to be derailed, or at worst commands to be sent to to an endpoint the malicious user controls. Story: 2006773 Task: 37295 Change-Id: I1a5e3c2b34d45c06fb74e82d0f30735ce9041914
This commit is contained in:
parent
311375345b
commit
931c125982
@ -184,6 +184,19 @@ class HeartbeatController(rest.RestController):
|
||||
policy.authorize('baremetal:node:ipa_heartbeat', cdict, cdict)
|
||||
|
||||
rpc_node = api_utils.get_rpc_node_with_suffix(node_ident)
|
||||
dii = rpc_node['driver_internal_info']
|
||||
agent_url = dii.get('agent_url')
|
||||
# If we have an agent_url on file, and we get something different
|
||||
# we should fail because this is unexpected behavior of the agent.
|
||||
if (agent_url is not None
|
||||
and agent_url != callback_url):
|
||||
LOG.error('Received heartbeat for node %(node)s with '
|
||||
'callback URL %(url)s. This is not expected, '
|
||||
'and the heartbeat will not be processed.',
|
||||
{'node': rpc_node.uuid, 'url': callback_url})
|
||||
raise exception.Invalid(
|
||||
_('Detected change in ramdisk provided '
|
||||
'"callback_url"'))
|
||||
|
||||
try:
|
||||
topic = api.request.rpcapi.get_topic_for(rpc_node)
|
||||
|
@ -598,6 +598,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
node_id, purpose='node rescue') as task:
|
||||
|
||||
node = task.node
|
||||
# Record of any pre-existing agent_url should be removed.
|
||||
utils.remove_agent_url(node)
|
||||
if node.maintenance:
|
||||
raise exception.NodeInMaintenance(op=_('rescuing'),
|
||||
node=node.uuid)
|
||||
@ -697,6 +699,9 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
with task_manager.acquire(context, node_id,
|
||||
purpose='node unrescue') as task:
|
||||
node = task.node
|
||||
# Record of any pre-existing agent_url should be removed,
|
||||
# Not that there should be.
|
||||
utils.remove_agent_url(node)
|
||||
if node.maintenance:
|
||||
raise exception.NodeInMaintenance(op=_('unrescuing'),
|
||||
node=node.uuid)
|
||||
@ -776,6 +781,7 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
info_message = _('Rescue operation aborted for node %s.') % node.uuid
|
||||
last_error = _('By request, the rescue operation was aborted.')
|
||||
node.refresh()
|
||||
utils.remove_agent_url(node)
|
||||
node.last_error = last_error
|
||||
node.save()
|
||||
LOG.info(info_message)
|
||||
@ -819,6 +825,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
with task_manager.acquire(context, node_id, shared=False,
|
||||
purpose='node deployment') as task:
|
||||
node = task.node
|
||||
# Record of any pre-existing agent_url should be removed.
|
||||
utils.remove_agent_url(node)
|
||||
if node.maintenance:
|
||||
raise exception.NodeInMaintenance(op=_('provisioning'),
|
||||
node=node.uuid)
|
||||
@ -972,6 +980,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
|
||||
with task_manager.acquire(context, node_id, shared=False,
|
||||
purpose='node tear down') as task:
|
||||
# Record of any pre-existing agent_url should be removed.
|
||||
utils.remove_agent_url(task.node)
|
||||
if task.node.protected:
|
||||
raise exception.NodeProtected(node=task.node.uuid)
|
||||
|
||||
@ -1168,7 +1178,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
with task_manager.acquire(context, node_id, shared=False,
|
||||
purpose='node manual cleaning') as task:
|
||||
node = task.node
|
||||
|
||||
# Record of any pre-existing agent_url should be removed.
|
||||
utils.remove_agent_url(node)
|
||||
if node.maintenance:
|
||||
raise exception.NodeInMaintenance(op=_('cleaning'),
|
||||
node=node.uuid)
|
||||
@ -1473,6 +1484,8 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
driver_internal_info.pop('clean_step_index', None)
|
||||
driver_internal_info.pop('cleaning_reboot', None)
|
||||
driver_internal_info.pop('cleaning_polling', None)
|
||||
# Remove agent_url
|
||||
driver_internal_info.pop('agent_url', None)
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
try:
|
||||
@ -1562,6 +1575,7 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
info.pop('cleaning_reboot', None)
|
||||
info.pop('cleaning_polling', None)
|
||||
info.pop('skip_current_clean_step', None)
|
||||
info.pop('agent_url', None)
|
||||
node.driver_internal_info = info
|
||||
node.save()
|
||||
LOG.info(info_message)
|
||||
@ -3979,6 +3993,8 @@ def _do_next_deploy_step(task, step_index, conductor_id):
|
||||
driver_internal_info.pop('deploy_step_index', None)
|
||||
driver_internal_info.pop('deployment_reboot', None)
|
||||
driver_internal_info.pop('deployment_polling', None)
|
||||
# Remove the agent_url cached from the deployment.
|
||||
driver_internal_info.pop('agent_url', None)
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
|
||||
@ -4202,6 +4218,9 @@ def _do_inspect_hardware(task):
|
||||
log_func("Failed to inspect node %(node)s: %(err)s",
|
||||
{'node': node.uuid, 'err': e})
|
||||
|
||||
# Remove agent_url, while not strictly needed for the inspection path,
|
||||
# lets just remove it out of good practice.
|
||||
utils.remove_agent_url(node)
|
||||
try:
|
||||
new_state = task.driver.inspect.inspect_hardware(task)
|
||||
except exception.IronicException as e:
|
||||
|
@ -417,6 +417,9 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
|
||||
info.pop('cleaning_reboot', None)
|
||||
info.pop('cleaning_polling', None)
|
||||
info.pop('skip_current_clean_step', None)
|
||||
# We don't need to keep the old agent URL
|
||||
# as it should change upon the next cleaning attempt.
|
||||
info.pop('agent_url', None)
|
||||
node.driver_internal_info = info
|
||||
# For manual cleaning, the target provision state is MANAGEABLE, whereas
|
||||
# for automated cleaning, it is AVAILABLE.
|
||||
@ -477,6 +480,9 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False,
|
||||
info.pop('deployment_reboot', None)
|
||||
info.pop('deployment_polling', None)
|
||||
info.pop('skip_current_deploy_step', None)
|
||||
# Remove agent_url since it will be re-asserted
|
||||
# upon the next deployment attempt.
|
||||
info.pop('agent_url', None)
|
||||
node.driver_internal_info = info
|
||||
|
||||
if cleanup_err:
|
||||
@ -521,6 +527,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
|
||||
try:
|
||||
node_power_action(task, states.POWER_OFF)
|
||||
task.driver.rescue.clean_up(task)
|
||||
remove_agent_url(node)
|
||||
node.last_error = msg
|
||||
except exception.IronicException as e:
|
||||
node.last_error = (_('Rescue operation was unsuccessful, clean up '
|
||||
@ -535,6 +542,7 @@ def rescuing_error_handler(task, msg, set_fail_state=True):
|
||||
LOG.exception('Rescue failed for node %(node)s, an exception was '
|
||||
'encountered while aborting.', {'node': node.uuid})
|
||||
finally:
|
||||
remove_agent_url(node)
|
||||
node.save()
|
||||
|
||||
if set_fail_state:
|
||||
@ -913,3 +921,10 @@ def is_fast_track(task):
|
||||
task.node.driver_internal_info.get('agent_last_heartbeat'),
|
||||
CONF.deploy.fast_track_timeout)
|
||||
and task.driver.power.get_power_state(task) == states.POWER_ON)
|
||||
|
||||
|
||||
def remove_agent_url(node):
|
||||
"""Helper to remove the agent_url record."""
|
||||
info = node.driver_internal_info
|
||||
info.pop('agent_url', None)
|
||||
node.driver_internal_info = info
|
||||
|
@ -255,3 +255,15 @@ class TestHeartbeat(test_api_base.BaseApiTest):
|
||||
headers={api_base.Version.string: '1.35'},
|
||||
expect_errors=True)
|
||||
self.assertEqual(http_client.BAD_REQUEST, response.status_int)
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'heartbeat', autospec=True)
|
||||
def test_heartbeat_rejects_different_callback_url(self, mock_heartbeat):
|
||||
node = obj_utils.create_test_node(
|
||||
self.context,
|
||||
driver_internal_info={'agent_url': 'url'})
|
||||
response = self.post_json(
|
||||
'/heartbeat/%s' % node.uuid,
|
||||
{'callback_url': 'url2'},
|
||||
headers={api_base.Version.string: str(api_v1.max_version())},
|
||||
expect_errors=True)
|
||||
self.assertEqual(http_client.BAD_REQUEST, response.status_int)
|
||||
|
@ -1352,9 +1352,11 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
autospec=True) as mock_spawn:
|
||||
mock_spawn.return_value = thread
|
||||
|
||||
node = obj_utils.create_test_node(self.context,
|
||||
driver='fake-hardware',
|
||||
provision_state=states.AVAILABLE)
|
||||
node = obj_utils.create_test_node(
|
||||
self.context,
|
||||
driver='fake-hardware',
|
||||
provision_state=states.AVAILABLE,
|
||||
driver_internal_info={'agent_url': 'url'})
|
||||
|
||||
self.service.do_node_deploy(self.context, node.uuid)
|
||||
self._stop_service()
|
||||
@ -1369,6 +1371,7 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
mock.ANY, None)
|
||||
mock_iwdi.assert_called_once_with(self.context, node.instance_info)
|
||||
self.assertFalse(node.driver_internal_info['is_whole_disk_image'])
|
||||
self.assertNotIn('agent_url', node.driver_internal_info)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.deploy')
|
||||
def test_do_node_deploy_rebuild_active_state_old(self, mock_deploy,
|
||||
@ -2658,7 +2661,8 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
def test__do_next_deploy_step_all(self, mock_execute):
|
||||
# Run all steps from start to finish (all synchronous)
|
||||
driver_internal_info = {'deploy_step_index': None,
|
||||
'deploy_steps': self.deploy_steps}
|
||||
'deploy_steps': self.deploy_steps,
|
||||
'agent_url': 'url'}
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
@ -2680,6 +2684,7 @@ class DoNextDeployStepTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
self.assertIsNone(node.driver_internal_info['deploy_steps'])
|
||||
mock_execute.assert_has_calls = [mock.call(self.deploy_steps[0]),
|
||||
mock.call(self.deploy_steps[1])]
|
||||
self.assertNotIn('agent_url', node.driver_internal_info)
|
||||
|
||||
@mock.patch.object(conductor_utils, 'LOG', autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_deploy_step',
|
||||
@ -3854,7 +3859,8 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.CLEANING,
|
||||
target_provision_state=states.AVAILABLE,
|
||||
last_error=None)
|
||||
last_error=None,
|
||||
driver_internal_info={'agent_url': 'url'})
|
||||
with task_manager.acquire(
|
||||
self.context, node.uuid, shared=False) as task:
|
||||
self.service._do_node_clean(task)
|
||||
@ -3864,6 +3870,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
# Assert that the node was cleaned
|
||||
self.assertTrue(mock_validate.called)
|
||||
self.assertIn('clean_steps', node.driver_internal_info)
|
||||
self.assertNotIn('agent_url', node.driver_internal_info)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
|
||||
autospec=True)
|
||||
@ -4599,7 +4606,8 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
|
||||
task = self._create_task(
|
||||
node_attrs=dict(driver='fake-hardware',
|
||||
provision_state=states.ACTIVE,
|
||||
instance_info={}))
|
||||
instance_info={},
|
||||
driver_internal_info={'agent_url': 'url'}))
|
||||
mock_acquire.side_effect = self._get_acquire_side_effect(task)
|
||||
self.service.do_node_rescue(self.context, task.node.uuid,
|
||||
"password")
|
||||
@ -4609,6 +4617,7 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
|
||||
call_args=(self.service._do_node_rescue, task),
|
||||
err_handler=conductor_utils.spawn_rescue_error_handler)
|
||||
self.assertIn('rescue_password', task.node.instance_info)
|
||||
self.assertNotIn('agent_url', task.node.driver_internal_info)
|
||||
|
||||
def test_do_node_rescue_invalid_state(self):
|
||||
self._start_service()
|
||||
@ -4740,9 +4749,12 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
|
||||
self._start_service()
|
||||
task = self._create_task(
|
||||
node_attrs=dict(driver='fake-hardware',
|
||||
provision_state=states.RESCUE))
|
||||
provision_state=states.RESCUE,
|
||||
driver_internal_info={'agent_url': 'url'}))
|
||||
mock_acquire.side_effect = self._get_acquire_side_effect(task)
|
||||
self.service.do_node_unrescue(self.context, task.node.uuid)
|
||||
task.node.refresh()
|
||||
self.assertNotIn('agent_url', task.node.driver_internal_info)
|
||||
task.process_event.assert_called_once_with(
|
||||
'unrescue',
|
||||
callback=self.service._spawn_worker,
|
||||
@ -4876,12 +4888,14 @@ class DoNodeRescueTestCase(mgr_utils.CommonMixIn, mgr_utils.ServiceSetUpMixin,
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.RESCUEFAIL,
|
||||
target_provision_state=states.RESCUE)
|
||||
target_provision_state=states.RESCUE,
|
||||
driver_internal_info={'agent_url': 'url'})
|
||||
with task_manager.acquire(self.context, node.uuid) as task:
|
||||
self.service._do_node_rescue_abort(task)
|
||||
clean_up_mock.assert_called_once_with(task.driver.rescue, task)
|
||||
self.assertIsNotNone(task.node.last_error)
|
||||
self.assertFalse(task.node.maintenance)
|
||||
self.assertNotIn('agent_url', task.node.driver_internal_info)
|
||||
|
||||
@mock.patch.object(fake.FakeRescue, 'clean_up', autospec=True)
|
||||
def test__do_node_rescue_abort_clean_up_fail(self, clean_up_mock):
|
||||
@ -7872,8 +7886,10 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_ok(self, mock_inspect):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake-hardware',
|
||||
provision_state=states.INSPECTING)
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.INSPECTING,
|
||||
driver_internal_info={'agent_url': 'url'})
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = states.MANAGEABLE
|
||||
manager._do_inspect_hardware(task)
|
||||
@ -7882,6 +7898,8 @@ class NodeInspectHardware(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||
self.assertIsNone(node.last_error)
|
||||
mock_inspect.assert_called_once_with(mock.ANY)
|
||||
task.node.refresh()
|
||||
self.assertNotIn('agent_url', task.node.driver_internal_info)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_return_inspecting(self, mock_inspect):
|
||||
|
@ -920,6 +920,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
|
||||
info['deployment_reboot'] = True
|
||||
info['deployment_polling'] = True
|
||||
info['skip_current_deploy_step'] = True
|
||||
info['agent_url'] = 'url'
|
||||
conductor_utils.deploying_error_handler(self.task, self.logmsg,
|
||||
self.errmsg)
|
||||
|
||||
@ -932,6 +933,7 @@ class DeployingErrorHandlerTestCase(tests_base.TestCase):
|
||||
self.assertNotIn('deployment_polling', self.node.driver_internal_info)
|
||||
self.assertNotIn('skip_current_deploy_step',
|
||||
self.node.driver_internal_info)
|
||||
self.assertNotIn('agent_url', self.node.driver_internal_info)
|
||||
self.task.process_event.assert_called_once_with('fail')
|
||||
|
||||
def _test_deploying_error_handler_cleanup(self, exc, expected_str):
|
||||
@ -1059,7 +1061,8 @@ class ErrorHandlersTestCase(tests_base.TestCase):
|
||||
'cleaning_reboot': True,
|
||||
'cleaning_polling': True,
|
||||
'skip_current_clean_step': True,
|
||||
'clean_step_index': 0}
|
||||
'clean_step_index': 0,
|
||||
'agent_url': 'url'}
|
||||
msg = 'error bar'
|
||||
conductor_utils.cleaning_error_handler(self.task, msg)
|
||||
self.node.save.assert_called_once_with()
|
||||
@ -1080,6 +1083,7 @@ class ErrorHandlersTestCase(tests_base.TestCase):
|
||||
else:
|
||||
self.task.process_event.assert_called_once_with('fail',
|
||||
target_state=None)
|
||||
self.assertNotIn('agent_url', self.node.driver_internal_info)
|
||||
|
||||
def test_cleaning_error_handler(self):
|
||||
self._test_cleaning_error_handler()
|
||||
@ -1254,12 +1258,14 @@ class ErrorHandlersTestCase(tests_base.TestCase):
|
||||
def _test_rescuing_error_handler(self, node_power_mock,
|
||||
set_state=True):
|
||||
self.node.provision_state = states.RESCUEWAIT
|
||||
self.node.driver_internal_info.update({'agent_url': 'url'})
|
||||
conductor_utils.rescuing_error_handler(self.task,
|
||||
'some exception for node',
|
||||
set_fail_state=set_state)
|
||||
node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF)
|
||||
self.task.driver.rescue.clean_up.assert_called_once_with(self.task)
|
||||
self.node.save.assert_called_once_with()
|
||||
self.assertNotIn('agent_url', self.node.driver_internal_info)
|
||||
if set_state:
|
||||
self.assertTrue(self.task.process_event.called)
|
||||
else:
|
||||
|
@ -269,9 +269,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
shared=True) as task:
|
||||
self.deploy.heartbeat(task, agent_url, '3.2.0')
|
||||
self.assertFalse(task.shared)
|
||||
self.assertEqual(
|
||||
agent_url,
|
||||
task.node.driver_internal_info['agent_url'])
|
||||
self.assertIsNone(
|
||||
task.node.driver_internal_info.get('agent_url', None))
|
||||
self.assertEqual(
|
||||
'3.2.0',
|
||||
task.node.driver_internal_info['agent_version'])
|
||||
|
@ -0,0 +1,10 @@
|
||||
---
|
||||
security:
|
||||
- |
|
||||
Prevents additional updates of an agent ``callback_url`` through the agent
|
||||
heartbeat ``/v1/heartbeat/<node_uuid>`` endpoint as the ``callback_url``
|
||||
should remain stable through the cleaning, provisioning, or rescue
|
||||
processes. Should anything such as an unexpected agent reboot cause the
|
||||
``callback_url``, heartbeat operations will now be ignored.
|
||||
More information can be found at
|
||||
`story 2006773 <https://storyboard.openstack.org/#!/story/2006773>`_.
|
Loading…
x
Reference in New Issue
Block a user