Merge "Add support for in-band cleaning in ISCSIDeploy"

This commit is contained in:
Jenkins 2015-10-15 15:17:17 +00:00 committed by Gerrit Code Review
commit 59b9896f88
6 changed files with 234 additions and 16 deletions

View File

@ -211,7 +211,7 @@ class BaseAgentVendor(base.VendorInterface):
'payload version: %s')
% version)
def _notify_conductor_resume_clean(self, task):
def notify_conductor_resume_clean(self, task):
LOG.debug('Sending RPC to conductor to resume cleaning for node %s',
task.node.uuid)
uuid = task.node.uuid
@ -265,7 +265,7 @@ class BaseAgentVendor(base.VendorInterface):
'step': node.clean_step})
LOG.exception(msg)
return manager.cleaning_error_handler(task, msg)
self._notify_conductor_resume_clean(task)
self.notify_conductor_resume_clean(task)
elif command.get('command_status') == 'SUCCEEDED':
clean_step_hook = _get_post_clean_step_hook(node)
@ -290,7 +290,7 @@ class BaseAgentVendor(base.VendorInterface):
LOG.info(_LI('Agent on node %s returned cleaning command success, '
'moving to next clean step'), node.uuid)
self._notify_conductor_resume_clean(task)
self.notify_conductor_resume_clean(task)
else:
msg = (_('Agent returned unknown status for clean step %(step)s '
'on node %(node)s : %(err)s.') %
@ -361,7 +361,7 @@ class BaseAgentVendor(base.VendorInterface):
node.uuid)
msg = _('Node failed to start the next cleaning step.')
manager.set_node_cleaning_steps(task)
self._notify_conductor_resume_clean(task)
self.notify_conductor_resume_clean(task)
else:
msg = _('Node failed to check cleaning progress.')
self.continue_cleaning(task, **kwargs)

View File

@ -1443,7 +1443,17 @@ def prepare_inband_cleaning(task, manage_boot=True):
if manage_boot:
ramdisk_opts = build_agent_options(task.node)
# TODO(rameshg87): Below code is to make sure that bash ramdisk
# invokes pass_deploy_info vendor passthru when it is booted
# for cleaning. Remove the below code once we stop supporting
# bash ramdisk in Ironic. Do a late import to avoid circular
# import.
from ironic.drivers.modules import iscsi_deploy
ramdisk_opts.update(
iscsi_deploy.build_deploy_ramdisk_options(task.node))
task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
manager_utils.node_power_action(task, states.REBOOT)
# Tell the conductor we are waiting for the agent to boot.

View File

@ -30,6 +30,7 @@ from ironic.common.i18n import _LW
from ironic.common import keystone
from ironic.common import states
from ironic.common import utils
from ironic.conductor import manager
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers import base
@ -741,6 +742,59 @@ class ISCSIDeploy(base.DeployInterface):
def take_over(self, task):
pass
def get_clean_steps(self, task):
"""Get the list of clean steps from the agent.
:param task: a TaskManager object containing the node
:returns: A list of clean step dictionaries. If bash ramdisk is
used for this node, it returns an empty list.
"""
# TODO(rameshg87): Remove the below code once we stop supporting
# bash ramdisk in Ironic. No need to log warning because we have
# already logged it in pass_deploy_info.
if 'agent_url' not in task.node.driver_internal_info:
return []
steps = deploy_utils.agent_get_clean_steps(
task, interface='deploy',
override_priorities={
'erase_devices': CONF.deploy.erase_devices_priority})
return steps
def execute_clean_step(self, task, step):
"""Execute a clean step asynchronously on the agent.
:param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute
:raises: NodeCleaningFailure if the agent does not return a command
status
:returns: states.CLEANWAIT to signify the step will be completed
asynchronously.
"""
return deploy_utils.agent_execute_clean_step(task, step)
def prepare_cleaning(self, task):
"""Boot into the agent to prepare for cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the previous cleaning ports cannot
be removed or if new cleaning ports cannot be created
:returns: states.CLEANWAIT to signify an asynchronous prepare.
"""
return deploy_utils.prepare_inband_cleaning(
task, manage_boot=True)
def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning.
:param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the cleaning ports cannot be
removed
"""
deploy_utils.tear_down_inband_cleaning(
task, manage_boot=True)
class VendorPassthru(agent_base_vendor.BaseAgentVendor):
"""Interface to mix IPMI and PXE vendor-specific interfaces."""
@ -763,8 +817,13 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor):
:raises: InvalidParameterValue if any parameters is invalid.
"""
if method == 'pass_deploy_info':
deploy_utils.validate_capabilities(task.node)
get_deploy_info(task.node, **kwargs)
# TODO(rameshg87): Don't validate deploy info if bash ramdisk
# booted during cleaning. It will be handled in pass_deploy_info
# method. Remove the below code once we stop supporting bash
# ramdisk in Ironic.
if task.node.provision_state != states.CLEANWAIT:
deploy_utils.validate_capabilities(task.node)
get_deploy_info(task.node, **kwargs)
elif method == 'pass_bootloader_install_info':
validate_pass_bootloader_info_input(task, kwargs)
@ -798,6 +857,30 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor):
validate_bootloader_install_status(task, kwargs)
finish_deploy(task, kwargs['address'])
def _initiate_cleaning(self, task):
"""Initiates the steps required to start cleaning for the node.
This method polls each interface of the driver for getting the
clean steps and notifies Ironic conductor to resume cleaning.
On error, it sets the node to CLEANFAIL state and populates
node.last_error with the error message.
:param task: a TaskManager instance containing the node to act on.
"""
LOG.warning(
_LW("Bash deploy ramdisk doesn't support in-band cleaning. "
"Please use the ironic-python-agent (IPA) ramdisk "
"instead for node %s. "), task.node.uuid)
try:
manager.set_node_cleaning_steps(task)
self.notify_conductor_resume_clean(task)
except Exception as e:
last_error = (
_('Encountered exception for node %(node)s '
'while initiating cleaning. Error: %(error)s') %
{'node': task.node.uuid, 'error': e})
return manager.cleaning_error_handler(task, last_error)
@base.passthru(['POST'])
@task_manager.require_exclusive_lock
def pass_deploy_info(self, task, **kwargs):
@ -815,6 +898,12 @@ class VendorPassthru(agent_base_vendor.BaseAgentVendor):
"its deployment. This deploy ramdisk has been "
"deprecated. Please use the ironic-python-agent "
"(IPA) ramdisk instead."), node.uuid)
# TODO(rameshg87): Remove the below code once we stop supporting
# bash ramdisk in Ironic.
if node.provision_state == states.CLEANWAIT:
return self._initiate_cleaning(task)
task.process_event('resume')
LOG.debug('Continuing the deployment on node %s', node.uuid)

View File

@ -325,7 +325,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
@mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps,
mock_touch):
kwargs = {
@ -407,7 +407,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance',
autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
def test_heartbeat_noops_maintenance_mode(self, ncrc_mock, rti_mock,
cd_mock):
"""Ensures that heartbeat() no-ops for a maintenance node."""
@ -685,7 +685,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning(self, status_mock, notify_mock):
@ -712,7 +712,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch.object(agent_base_vendor,
'_get_post_clean_step_hook', autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_with_hook(
@ -739,7 +739,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
notify_mock.assert_called_once_with(mock.ANY, task)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_base_vendor,
'_get_post_clean_step_hook', autospec=True)
@mock.patch.object(manager, 'cleaning_error_handler', autospec=True)
@ -772,7 +772,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertFalse(notify_mock.called)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_old_command(self, status_mock, notify_mock):
@ -801,7 +801,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
self.assertFalse(notify_mock.called)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_running(self, status_mock, notify_mock):
@ -835,7 +835,7 @@ class TestBaseAgentVendor(db_base.DbTestCase):
@mock.patch('ironic.conductor.manager.set_node_cleaning_steps',
autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
'notify_conductor_resume_clean', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
def test_continue_cleaning_clean_version_mismatch(

View File

@ -44,6 +44,7 @@ from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_client
from ironic.drivers.modules import deploy_utils as utils
from ironic.drivers.modules import image_cache
from ironic.drivers.modules import iscsi_deploy
from ironic.drivers.modules import pxe
from ironic.tests.unit import base as tests_base
from ironic.tests.unit.conductor import utils as mgr_utils
@ -2128,13 +2129,16 @@ class AgentMethodsTestCase(db_base.DbTestCase):
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(iscsi_deploy, 'build_deploy_ramdisk_options',
autospec=True)
@mock.patch.object(utils, 'build_agent_options', autospec=True)
@mock.patch.object(utils, 'prepare_cleaning_ports', autospec=True)
def _test_prepare_inband_cleaning(
self, prepare_cleaning_ports_mock,
self, prepare_cleaning_ports_mock, iscsi_build_options_mock,
build_options_mock, power_mock, prepare_ramdisk_mock,
manage_boot=True):
build_options_mock.return_value = {'a': 'b'}
iscsi_build_options_mock.return_value = {'c': 'd'}
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertEqual(
@ -2146,7 +2150,7 @@ class AgentMethodsTestCase(db_base.DbTestCase):
'agent_erase_devices_iterations'), 1)
if manage_boot:
prepare_ramdisk_mock.assert_called_once_with(
mock.ANY, mock.ANY, {'a': 'b'})
mock.ANY, mock.ANY, {'a': 'b', 'c': 'd'})
build_options_mock.assert_called_once_with(task.node)
else:
self.assertFalse(prepare_ramdisk_mock.called)

View File

@ -29,6 +29,7 @@ from ironic.common import keystone
from ironic.common import pxe_utils
from ironic.common import states
from ironic.common import utils
from ironic.conductor import manager
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_base_vendor
@ -1048,6 +1049,61 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
clean_up_instance_mock.assert_called_once_with(
task.driver.boot, task)
@mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True)
def test_prepare_cleaning(self, prepare_inband_cleaning_mock):
prepare_inband_cleaning_mock.return_value = states.CLEANWAIT
with task_manager.acquire(self.context, self.node.uuid) as task:
self.assertEqual(
states.CLEANWAIT, task.driver.deploy.prepare_cleaning(task))
prepare_inband_cleaning_mock.assert_called_once_with(
task, manage_boot=True)
@mock.patch.object(deploy_utils, 'tear_down_inband_cleaning',
autospec=True)
def test_tear_down_cleaning(self, tear_down_cleaning_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
task.driver.deploy.tear_down_cleaning(task)
tear_down_cleaning_mock.assert_called_once_with(
task, manage_boot=True)
@mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps',
autospec=True)
def test_get_clean_steps(self, mock_get_clean_steps):
# Test getting clean steps
self.config(group='deploy', erase_devices_priority=10)
mock_steps = [{'priority': 10, 'interface': 'deploy',
'step': 'erase_devices'}]
self.node.driver_internal_info = {'agent_url': 'foo'}
self.node.save()
mock_get_clean_steps.return_value = mock_steps
with task_manager.acquire(self.context, self.node.uuid) as task:
steps = task.driver.deploy.get_clean_steps(task)
mock_get_clean_steps.assert_called_once_with(
task, interface='deploy',
override_priorities={
'erase_devices': 10})
self.assertEqual(mock_steps, steps)
@mock.patch('ironic.drivers.modules.deploy_utils.agent_get_clean_steps',
autospec=True)
def test_get_clean_steps_no_agent_url(self, mock_get_clean_steps):
# Test getting clean steps
self.node.driver_internal_info = {}
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
steps = task.driver.deploy.get_clean_steps(task)
self.assertEqual([], steps)
self.assertFalse(mock_get_clean_steps.called)
@mock.patch.object(deploy_utils, 'agent_execute_clean_step', autospec=True)
def test_execute_clean_step(self, agent_execute_clean_step_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
task.driver.deploy.execute_clean_step(
task, {'some-step': 'step-info'})
agent_execute_clean_step_mock.assert_called_once_with(
task, {'some-step': 'step-info'})
class TestVendorPassthru(db_base.DbTestCase):
@ -1077,6 +1133,16 @@ class TestVendorPassthru(db_base.DbTestCase):
address='123456', iqn='aaa-bbb',
key='fake-56789')
def test_validate_pass_deploy_info_during_cleaning(self):
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
task.node.provision_state = states.CLEANWAIT
# Assert that it doesn't raise.
self.assertIsNone(
task.driver.vendor.validate(task, method='pass_deploy_info',
address='123456', iqn='aaa-bbb',
key='fake-56789'))
def test_validate_fail(self):
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
@ -1094,6 +1160,41 @@ class TestVendorPassthru(db_base.DbTestCase):
address='123456', iqn='aaa-bbb',
key='fake-12345')
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(iscsi_deploy, 'LOG', spec=['warning'])
def test__initiate_cleaning(self, log_mock, set_node_cleaning_steps_mock,
notify_mock):
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
task.driver.vendor._initiate_cleaning(task)
log_mock.warning.assert_called_once_with(mock.ANY, mock.ANY)
set_node_cleaning_steps_mock.assert_called_once_with(task)
notify_mock.assert_called_once_with(self.driver.vendor, task)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'notify_conductor_resume_clean',
autospec=True)
@mock.patch.object(manager, 'cleaning_error_handler', autospec=True)
@mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(iscsi_deploy, 'LOG', spec=['warning'])
def test__initiate_cleaning_exception(
self, log_mock, set_node_cleaning_steps_mock,
cleaning_error_handler_mock, notify_mock):
set_node_cleaning_steps_mock.side_effect = RuntimeError()
with task_manager.acquire(self.context, self.node.uuid,
shared=True) as task:
task.driver.vendor._initiate_cleaning(task)
log_mock.warning.assert_called_once_with(mock.ANY, mock.ANY)
set_node_cleaning_steps_mock.assert_called_once_with(task)
cleaning_error_handler_mock.assert_called_once_with(task, mock.ANY)
self.assertFalse(notify_mock.called)
@mock.patch.object(fake.FakeBoot, 'prepare_instance', autospec=True)
@mock.patch.object(deploy_utils, 'notify_ramdisk_to_proceed',
autospec=True)
@ -1219,6 +1320,20 @@ class TestVendorPassthru(db_base.DbTestCase):
self.assertEqual(1, mock_deploy_info.call_count,
"pass_deploy_info was not called once.")
@mock.patch.object(iscsi_deploy.VendorPassthru,
'_initiate_cleaning', autospec=True)
def test_pass_deploy_info_cleaning(self, initiate_cleaning_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
task.node.provision_state = states.CLEANWAIT
task.driver.vendor.pass_deploy_info(
task, address='123456', iqn='aaa-bbb', key='fake-56789')
initiate_cleaning_mock.assert_called_once_with(
task.driver.vendor, task)
# Asserting if we are still on CLEANWAIT state confirms that
# we return from pass_deploy_info method after initiating
# cleaning.
self.assertEqual(states.CLEANWAIT, task.node.provision_state)
def test_vendor_routes(self):
expected = ['heartbeat', 'pass_deploy_info',
'pass_bootloader_install_info']