diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index df4d3b052d..5c1bd2ae78 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -211,7 +211,7 @@ class BaseAgentVendor(base.VendorInterface): 'payload version: %s') % version) - def _notify_conductor_resume_clean(self, task): + def notify_conductor_resume_clean(self, task): LOG.debug('Sending RPC to conductor to resume cleaning for node %s', task.node.uuid) uuid = task.node.uuid @@ -265,7 +265,7 @@ class BaseAgentVendor(base.VendorInterface): 'step': node.clean_step}) LOG.exception(msg) return manager.cleaning_error_handler(task, msg) - self._notify_conductor_resume_clean(task) + self.notify_conductor_resume_clean(task) elif command.get('command_status') == 'SUCCEEDED': clean_step_hook = _get_post_clean_step_hook(node) @@ -290,7 +290,7 @@ class BaseAgentVendor(base.VendorInterface): LOG.info(_LI('Agent on node %s returned cleaning command success, ' 'moving to next clean step'), node.uuid) - self._notify_conductor_resume_clean(task) + self.notify_conductor_resume_clean(task) else: msg = (_('Agent returned unknown status for clean step %(step)s ' 'on node %(node)s : %(err)s.') % @@ -361,7 +361,7 @@ class BaseAgentVendor(base.VendorInterface): node.uuid) msg = _('Node failed to start the next cleaning step.') manager.set_node_cleaning_steps(task) - self._notify_conductor_resume_clean(task) + self.notify_conductor_resume_clean(task) else: msg = _('Node failed to check cleaning progress.') self.continue_cleaning(task, **kwargs) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 237f3dccae..2fd0bad101 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -1443,7 +1443,17 @@ def prepare_inband_cleaning(task, manage_boot=True): if manage_boot: ramdisk_opts = build_agent_options(task.node) + + # TODO(rameshg87): Below code is to make sure that bash ramdisk + # invokes pass_deploy_info vendor passthru when it is booted + # for cleaning. Remove the below code once we stop supporting + # bash ramdisk in Ironic. Do a late import to avoid circular + # import. + from ironic.drivers.modules import iscsi_deploy + ramdisk_opts.update( + iscsi_deploy.build_deploy_ramdisk_options(task.node)) task.driver.boot.prepare_ramdisk(task, ramdisk_opts) + manager_utils.node_power_action(task, states.REBOOT) # Tell the conductor we are waiting for the agent to boot. diff --git a/ironic/drivers/modules/iscsi_deploy.py b/ironic/drivers/modules/iscsi_deploy.py index 02915ba0bf..49b0a61d80 100644 --- a/ironic/drivers/modules/iscsi_deploy.py +++ b/ironic/drivers/modules/iscsi_deploy.py @@ -30,6 +30,7 @@ from ironic.common.i18n import _LW from ironic.common import keystone from ironic.common import states from ironic.common import utils +from ironic.conductor import manager from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.drivers import base @@ -741,6 +742,59 @@ class ISCSIDeploy(base.DeployInterface): def take_over(self, task): pass + def get_clean_steps(self, task): + """Get the list of clean steps from the agent. + + :param task: a TaskManager object containing the node + + :returns: A list of clean step dictionaries. If bash ramdisk is + used for this node, it returns an empty list. + """ + # TODO(rameshg87): Remove the below code once we stop supporting + # bash ramdisk in Ironic. No need to log warning because we have + # already logged it in pass_deploy_info. + if 'agent_url' not in task.node.driver_internal_info: + return [] + + steps = deploy_utils.agent_get_clean_steps( + task, interface='deploy', + override_priorities={ + 'erase_devices': CONF.deploy.erase_devices_priority}) + return steps + + def execute_clean_step(self, task, step): + """Execute a clean step asynchronously on the agent. + + :param task: a TaskManager object containing the node + :param step: a clean step dictionary to execute + :raises: NodeCleaningFailure if the agent does not return a command + status + :returns: states.CLEANWAIT to signify the step will be completed + asynchronously. + """ + return deploy_utils.agent_execute_clean_step(task, step) + + def prepare_cleaning(self, task): + """Boot into the agent to prepare for cleaning. + + :param task: a TaskManager object containing the node + :raises NodeCleaningFailure: if the previous cleaning ports cannot + be removed or if new cleaning ports cannot be created + :returns: states.CLEANWAIT to signify an asynchronous prepare. + """ + return deploy_utils.prepare_inband_cleaning( + task, manage_boot=True) + + def tear_down_cleaning(self, task): + """Clean up the PXE and DHCP files after cleaning. + + :param task: a TaskManager object containing the node + :raises NodeCleaningFailure: if the cleaning ports cannot be + removed + """ + deploy_utils.tear_down_inband_cleaning( + task, manage_boot=True) + class VendorPassthru(agent_base_vendor.BaseAgentVendor): """Interface to mix IPMI and PXE vendor-specific interfaces.""" @@ -763,8 +817,13 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): :raises: InvalidParameterValue if any parameters is invalid. """ if method == 'pass_deploy_info': - deploy_utils.validate_capabilities(task.node) - get_deploy_info(task.node, **kwargs) + # TODO(rameshg87): Don't validate deploy info if bash ramdisk + # booted during cleaning. It will be handled in pass_deploy_info + # method. Remove the below code once we stop supporting bash + # ramdisk in Ironic. + if task.node.provision_state != states.CLEANWAIT: + deploy_utils.validate_capabilities(task.node) + get_deploy_info(task.node, **kwargs) elif method == 'pass_bootloader_install_info': validate_pass_bootloader_info_input(task, kwargs) @@ -798,6 +857,30 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): validate_bootloader_install_status(task, kwargs) finish_deploy(task, kwargs['address']) + def _initiate_cleaning(self, task): + """Initiates the steps required to start cleaning for the node. + + This method polls each interface of the driver for getting the + clean steps and notifies Ironic conductor to resume cleaning. + On error, it sets the node to CLEANFAIL state and populates + node.last_error with the error message. + + :param task: a TaskManager instance containing the node to act on. + """ + LOG.warning( + _LW("Bash deploy ramdisk doesn't support in-band cleaning. " + "Please use the ironic-python-agent (IPA) ramdisk " + "instead for node %s. "), task.node.uuid) + try: + manager.set_node_cleaning_steps(task) + self.notify_conductor_resume_clean(task) + except Exception as e: + last_error = ( + _('Encountered exception for node %(node)s ' + 'while initiating cleaning. Error: %(error)s') % + {'node': task.node.uuid, 'error': e}) + return manager.cleaning_error_handler(task, last_error) + @base.passthru(['POST']) @task_manager.require_exclusive_lock def pass_deploy_info(self, task, **kwargs): @@ -815,6 +898,12 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor): "its deployment. This deploy ramdisk has been " "deprecated. Please use the ironic-python-agent " "(IPA) ramdisk instead."), node.uuid) + + # TODO(rameshg87): Remove the below code once we stop supporting + # bash ramdisk in Ironic. + if node.provision_state == states.CLEANWAIT: + return self._initiate_cleaning(task) + task.process_event('resume') LOG.debug('Continuing the deployment on node %s', node.uuid) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index 28f27dae7a..125ceb47e4 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -325,7 +325,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps, mock_touch): kwargs = { @@ -407,7 +407,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance', autospec=True) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) def test_heartbeat_noops_maintenance_mode(self, ncrc_mock, rti_mock, cd_mock): """Ensures that heartbeat() no-ops for a maintenance node.""" @@ -685,7 +685,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertEqual(states.ACTIVE, task.node.target_provision_state) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning(self, status_mock, notify_mock): @@ -712,7 +712,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): @mock.patch.object(agent_base_vendor, '_get_post_clean_step_hook', autospec=True) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_with_hook( @@ -739,7 +739,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): notify_mock.assert_called_once_with(mock.ANY, task) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_base_vendor, '_get_post_clean_step_hook', autospec=True) @mock.patch.object(manager, 'cleaning_error_handler', autospec=True) @@ -772,7 +772,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertFalse(notify_mock.called) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_old_command(self, status_mock, notify_mock): @@ -801,7 +801,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): self.assertFalse(notify_mock.called) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_running(self, status_mock, notify_mock): @@ -835,7 +835,7 @@ class TestBaseAgentVendor(db_base.DbTestCase): @mock.patch('ironic.conductor.manager.set_node_cleaning_steps', autospec=True) @mock.patch.object(agent_base_vendor.BaseAgentVendor, - '_notify_conductor_resume_clean', autospec=True) + 'notify_conductor_resume_clean', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_clean_version_mismatch( diff --git a/ironic/tests/unit/drivers/modules/test_deploy_utils.py b/ironic/tests/unit/drivers/modules/test_deploy_utils.py index a2fd4fbfc0..2ad49008b5 100644 --- a/ironic/tests/unit/drivers/modules/test_deploy_utils.py +++ b/ironic/tests/unit/drivers/modules/test_deploy_utils.py @@ -44,6 +44,7 @@ from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils as utils from ironic.drivers.modules import image_cache +from ironic.drivers.modules import iscsi_deploy from ironic.drivers.modules import pxe from ironic.tests.unit import base as tests_base from ironic.tests.unit.conductor import utils as mgr_utils @@ -2128,13 +2129,16 @@ class AgentMethodsTestCase(db_base.DbTestCase): @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True) @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + @mock.patch.object(iscsi_deploy, 'build_deploy_ramdisk_options', + autospec=True) @mock.patch.object(utils, 'build_agent_options', autospec=True) @mock.patch.object(utils, 'prepare_cleaning_ports', autospec=True) def _test_prepare_inband_cleaning( - self, prepare_cleaning_ports_mock, + self, prepare_cleaning_ports_mock, iscsi_build_options_mock, build_options_mock, power_mock, prepare_ramdisk_mock, manage_boot=True): build_options_mock.return_value = {'a': 'b'} + iscsi_build_options_mock.return_value = {'c': 'd'} with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: self.assertEqual( @@ -2146,7 +2150,7 @@ class AgentMethodsTestCase(db_base.DbTestCase): 'agent_erase_devices_iterations'), 1) if manage_boot: prepare_ramdisk_mock.assert_called_once_with( - mock.ANY, mock.ANY, {'a': 'b'}) + mock.ANY, mock.ANY, {'a': 'b', 'c': 'd'}) build_options_mock.assert_called_once_with(task.node) else: self.assertFalse(prepare_ramdisk_mock.called) diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 261f11766b..ab20598991 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -29,6 +29,7 @@ from ironic.common import keystone from ironic.common import pxe_utils from ironic.common import states from ironic.common import utils +from ironic.conductor import manager from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils from ironic.drivers.modules import agent_base_vendor @@ -1048,6 +1049,61 @@ class ISCSIDeployTestCase(db_base.DbTestCase): clean_up_instance_mock.assert_called_once_with( task.driver.boot, task) + @mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True) + def test_prepare_cleaning(self, prepare_inband_cleaning_mock): + prepare_inband_cleaning_mock.return_value = states.CLEANWAIT + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertEqual( + states.CLEANWAIT, task.driver.deploy.prepare_cleaning(task)) + prepare_inband_cleaning_mock.assert_called_once_with( + task, manage_boot=True) + + @mock.patch.object(deploy_utils, 'tear_down_inband_cleaning', + autospec=True) + def test_tear_down_cleaning(self, tear_down_cleaning_mock): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.deploy.tear_down_cleaning(task) + tear_down_cleaning_mock.assert_called_once_with( + task, manage_boot=True) + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps', + autospec=True) + def test_get_clean_steps(self, mock_get_clean_steps): + # Test getting clean steps + self.config(group='deploy', erase_devices_priority=10) + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + self.node.driver_internal_info = {'agent_url': 'foo'} + self.node.save() + mock_get_clean_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = task.driver.deploy.get_clean_steps(task) + mock_get_clean_steps.assert_called_once_with( + task, interface='deploy', + override_priorities={ + 'erase_devices': 10}) + self.assertEqual(mock_steps, steps) + + @mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps', + autospec=True) + def test_get_clean_steps_no_agent_url(self, mock_get_clean_steps): + # Test getting clean steps + self.node.driver_internal_info = {} + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = task.driver.deploy.get_clean_steps(task) + + self.assertEqual([], steps) + self.assertFalse(mock_get_clean_steps.called) + + @mock.patch.object(deploy_utils, 'agent_execute_clean_step', autospec=True) + def test_execute_clean_step(self, agent_execute_clean_step_mock): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.deploy.execute_clean_step( + task, {'some-step': 'step-info'}) + agent_execute_clean_step_mock.assert_called_once_with( + task, {'some-step': 'step-info'}) + class TestVendorPassthru(db_base.DbTestCase): @@ -1077,6 +1133,16 @@ class TestVendorPassthru(db_base.DbTestCase): address='123456', iqn='aaa-bbb', key='fake-56789') + def test_validate_pass_deploy_info_during_cleaning(self): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.provision_state = states.CLEANWAIT + # Assert that it doesn't raise. + self.assertIsNone( + task.driver.vendor.validate(task, method='pass_deploy_info', + address='123456', iqn='aaa-bbb', + key='fake-56789')) + def test_validate_fail(self): with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: @@ -1094,6 +1160,41 @@ class TestVendorPassthru(db_base.DbTestCase): address='123456', iqn='aaa-bbb', key='fake-12345') + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + 'notify_conductor_resume_clean', + autospec=True) + @mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True) + @mock.patch.object(iscsi_deploy, 'LOG', spec=['warning']) + def test__initiate_cleaning(self, log_mock, set_node_cleaning_steps_mock, + notify_mock): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.vendor._initiate_cleaning(task) + + log_mock.warning.assert_called_once_with(mock.ANY, mock.ANY) + set_node_cleaning_steps_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(self.driver.vendor, task) + + @mock.patch.object(agent_base_vendor.BaseAgentVendor, + 'notify_conductor_resume_clean', + autospec=True) + @mock.patch.object(manager, 'cleaning_error_handler', autospec=True) + @mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True) + @mock.patch.object(iscsi_deploy, 'LOG', spec=['warning']) + def test__initiate_cleaning_exception( + self, log_mock, set_node_cleaning_steps_mock, + cleaning_error_handler_mock, notify_mock): + set_node_cleaning_steps_mock.side_effect = RuntimeError() + + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.vendor._initiate_cleaning(task) + + log_mock.warning.assert_called_once_with(mock.ANY, mock.ANY) + set_node_cleaning_steps_mock.assert_called_once_with(task) + cleaning_error_handler_mock.assert_called_once_with(task, mock.ANY) + self.assertFalse(notify_mock.called) + @mock.patch.object(fake.FakeBoot, 'prepare_instance', autospec=True) @mock.patch.object(deploy_utils, 'notify_ramdisk_to_proceed', autospec=True) @@ -1219,6 +1320,20 @@ class TestVendorPassthru(db_base.DbTestCase): self.assertEqual(1, mock_deploy_info.call_count, "pass_deploy_info was not called once.") + @mock.patch.object(iscsi_deploy.VendorPassthru, + '_initiate_cleaning', autospec=True) + def test_pass_deploy_info_cleaning(self, initiate_cleaning_mock): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.node.provision_state = states.CLEANWAIT + task.driver.vendor.pass_deploy_info( + task, address='123456', iqn='aaa-bbb', key='fake-56789') + initiate_cleaning_mock.assert_called_once_with( + task.driver.vendor, task) + # Asserting if we are still on CLEANWAIT state confirms that + # we return from pass_deploy_info method after initiating + # cleaning. + self.assertEqual(states.CLEANWAIT, task.node.provision_state) + def test_vendor_routes(self): expected = ['heartbeat', 'pass_deploy_info', 'pass_bootloader_install_info']