fast tracked deployment support

Provides a facility to minimize the power state changes of
a baremetal node to save critical time during deployment
operations.

Story: #2004965
Task: #29408
Depends-On: https://review.openstack.org/636778
Change-Id: I7ebbaddb33b38c87246c10165339ac4bac0ac6fc
This commit is contained in:
Julia Kreger 2019-02-08 20:26:41 -08:00
parent 9625b8aa71
commit 3f734c8728
18 changed files with 540 additions and 43 deletions

View File

@ -449,6 +449,8 @@ IRONIC_DEPLOY_LOGS_STORAGE_BACKEND=${IRONIC_DEPLOY_LOGS_STORAGE_BACKEND:-local}
# The path to the directory where Ironic should put the logs when IRONIC_DEPLOY_LOGS_STORAGE_BACKEND is set to "local"
IRONIC_DEPLOY_LOGS_LOCAL_PATH=${IRONIC_DEPLOY_LOGS_LOCAL_PATH:-$IRONIC_VM_LOG_DIR/deploy_logs}
# Fast track option
IRONIC_DEPLOY_FAST_TRACK=${IRONIC_DEPLOY_FAST_TRACK:-False}
# Define baremetal min_microversion in tempest config. Default value None is picked from tempest.
TEMPEST_BAREMETAL_MIN_MICROVERSION=${TEMPEST_BAREMETAL_MIN_MICROVERSION:-}
@ -1115,6 +1117,9 @@ function configure_ironic {
iniset $IRONIC_CONF_FILE DEFAULT rpc_transport $IRONIC_RPC_TRANSPORT
iniset $IRONIC_CONF_FILE json_rpc port $IRONIC_JSON_RPC_PORT
# Set fast track options
iniset $IRONIC_CONF_FILE deploy fast_track $IRONIC_DEPLOY_FAST_TRACK
# Configure Ironic conductor, if it was enabled.
if is_service_enabled ir-cond; then
configure_ironic_conductor
@ -2553,6 +2558,8 @@ function ironic_configure_tempest {
# Enabled features
iniset $TEMPEST_CONFIG baremetal_feature_enabled ipxe_enabled $IRONIC_IPXE_ENABLED
iniset $TEMPEST_CONFIG baremetal_feature_enabled fast_track_discovery $IRONIC_DEPLOY_FAST_TRACK
}
function get_ironic_node_prefix {

View File

@ -37,10 +37,6 @@ LOG = log.getLogger(__name__)
_LOOKUP_RETURN_FIELDS = ('uuid', 'properties', 'instance_info',
'driver_internal_info')
_LOOKUP_ALLOWED_STATES = {states.DEPLOYING, states.DEPLOYWAIT,
states.CLEANING, states.CLEANWAIT,
states.INSPECTING,
states.RESCUING, states.RESCUEWAIT}
def config():
@ -83,6 +79,12 @@ class LookupResult(base.APIBase):
class LookupController(rest.RestController):
"""Controller handling node lookup for a deploy ramdisk."""
@property
def lookup_allowed_states(self):
if CONF.deploy.fast_track:
return states.FASTTRACK_LOOKUP_ALLOWED_STATES
return states.LOOKUP_ALLOWED_STATES
@expose.expose(LookupResult, types.listtype, types.uuid)
def get_all(self, addresses=None, node_uuid=None):
"""Look up a node by its MAC addresses and optionally UUID.
@ -144,7 +146,7 @@ class LookupController(rest.RestController):
raise exception.NotFound()
if (CONF.api.restrict_lookup
and node.provision_state not in _LOOKUP_ALLOWED_STATES):
and node.provision_state not in self.lookup_allowed_states):
raise exception.NotFound()
return LookupResult.convert_with_links(node)

View File

@ -799,7 +799,12 @@ def build_service_pxe_config(task, instance_image_info,
# NOTE(pas-ha) if it is takeover of ACTIVE node or node performing
# unrescue operation, first ensure that basic PXE configs and links
# are in place before switching pxe config
if (node.provision_state in [states.ACTIVE, states.UNRESCUING]
# NOTE(TheJulia): Also consider deploying a valid state to go ahead
# and check things before continuing, as otherwise deployments can
# fail if the agent was booted outside the direct actions of the
# boot interface.
if (node.provision_state in [states.ACTIVE, states.UNRESCUING,
states.DEPLOYING]
and not os.path.isfile(pxe_config_path)):
pxe_options = build_pxe_config_options(task, instance_image_info,
service=True,
@ -808,6 +813,7 @@ def build_service_pxe_config(task, instance_image_info,
create_pxe_config(task, pxe_options, pxe_config_template,
ipxe_enabled=ipxe_enabled)
iwdi = node.driver_internal_info.get('is_whole_disk_image')
deploy_utils.switch_pxe_config(
pxe_config_path, root_uuid_or_disk_id,
boot_mode_utils.get_boot_mode(node),

View File

@ -243,6 +243,20 @@ If a node gets stuck with one of these states for some reason
to fail state.
"""
_LOOKUP_ALLOWED_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT,
INSPECTING, RESCUING, RESCUEWAIT)
LOOKUP_ALLOWED_STATES = frozenset(_LOOKUP_ALLOWED_STATES)
"""States when API lookups are normally allowed for nodes."""
_FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
DEPLOYING, DEPLOYWAIT, CLEANING,
CLEANWAIT, INSPECTING, RESCUING,
RESCUEWAIT)
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
"""States where API lookups are permitted with fast track enabled."""
##############
# Power states
##############

View File

@ -13,6 +13,7 @@
# under the License.
import collections
import datetime
import time
from openstack.baremetal import configdrive as os_configdrive
@ -21,6 +22,7 @@ from oslo_log import log
from oslo_serialization import jsonutils
from oslo_service import loopingcall
from oslo_utils import excutils
from oslo_utils import timeutils
import six
from ironic.common import boot_devices
@ -1408,3 +1410,63 @@ def build_configdrive(node, configdrive):
LOG.debug('Building a configdrive for node %s', node.uuid)
return os_configdrive.build(meta_data, user_data=user_data,
network_data=configdrive.get('network_data'))
def fast_track_able(task):
"""Checks if the operation can be a streamlined deployment sequence.
This is mainly focused on ensuring that we are able to quickly sequence
through operations if we already have a ramdisk heartbeating through
external means.
:param task: Taskmanager object
:returns: True if [deploy]fast_track is set to True, no iSCSI boot
configuration is present, and no last_error is present for
the node indicating that there was a recent failure.
"""
return (CONF.deploy.fast_track
# TODO(TheJulia): Network model aside, we should be able to
# fast-track through initial sequence to complete deployment.
# This needs to be validated.
# TODO(TheJulia): Do we need a secondary guard? To prevent
# driving through this we could query the API endpoint of
# the agent with a short timeout such as 10 seconds, which
# would help verify if the node is online.
# TODO(TheJulia): Should we check the provisioning/deployment
# networks match config wise? Do we care? #decisionsdecisions
and task.driver.storage.should_write_image(task)
and task.node.last_error is None)
def is_fast_track(task):
"""Checks a fast track is available.
This method first ensures that the node and conductor configuration
is valid to perform a fast track sequence meaning that we already
have a ramdisk running through another means like discovery.
If not valid, False is returned.
The method then checks for the last agent heartbeat, and if it occured
within the timeout set by [deploy]fast_track_timeout and the power
state for the machine is POWER_ON, then fast track is permitted.
:param node: A node object.
:returns: True if the last heartbeat that was recorded was within
the [deploy]fast_track_timeout setting.
"""
if not fast_track_able(task):
return False
# use native datetime objects for conversion and compare
# slightly odd because py2 compatability :(
last = datetime.datetime.strptime(
task.node.driver_internal_info.get(
'agent_last_heartbeat',
'1970-01-01T00:00:00.000000'),
"%Y-%m-%dT%H:%M:%S.%f")
# If we found nothing, we assume that the time is essentially epoch.
time_delta = datetime.timedelta(seconds=CONF.deploy.fast_track_timeout)
last_valid = timeutils.utcnow() - time_delta
# Checking the power state, because if we find the machine off due to
# any action, we can't actually fast track the node. :(
return (last_valid <= last
and task.driver.power.get_power_state(task) == states.POWER_ON)

View File

@ -113,6 +113,27 @@ opts = [
'images for the direct deploy interface, when local '
'HTTP service is incorporated to provide instance image '
'instead of swift tempurls.')),
cfg.BoolOpt('fast_track',
default=False,
help=_('Whether to allow deployment agents to perform lookup, '
'heartbeat operations during initial states of a '
'machine lifecycle and by-pass the normal setup '
'procedures for a ramdisk. This feature also enables '
'power operations which are part of deployment '
'processes to be bypassed if the ramdisk has performed '
'a heartbeat operation using the fast_track_timeout '
'setting.')),
cfg.IntOpt('fast_track_timeout',
default=300,
min=0,
max=300,
help=_('Seconds for which the last heartbeat event is to be '
'considered valid for the purpose of a fast '
'track sequence. This setting should generally be '
'less than the number of seconds for "Power-On Self '
'Test" and typical ramdisk start-up. This value should '
'not exceed the [api]ramdisk_heartbeat_timeout '
'setting.')),
]

View File

@ -455,7 +455,14 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
:param task: a TaskManager instance.
:returns: status of the deploy. One of ironic.common.states.
"""
if task.driver.storage.should_write_image(task):
if manager_utils.is_fast_track(task):
LOG.debug('Performing a fast track deployment for %(node)s.',
{'node': task.node.uuid})
# Update the database for the API and the task tracking resumes
# the state machine state going from DEPLOYWAIT -> DEPLOYING
task.process_event('wait')
self.continue_deploy(task)
elif task.driver.storage.should_write_image(task):
manager_utils.node_power_action(task, states.REBOOT)
return states.DEPLOYWAIT
else:
@ -521,6 +528,12 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
:raises: exception.InvalidParameterValue if network validation fails.
:raises: any boot interface's prepare_ramdisk exceptions.
"""
def _update_instance_info():
node.instance_info = (
deploy_utils.build_instance_info_for_deploy(task))
node.save()
node = task.node
deploy_utils.populate_storage_driver_internal_info(task)
if node.provision_state == states.DEPLOYING:
@ -547,25 +560,44 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
task.driver.network.validate(task)
else:
ctx.reraise = True
# Adding the node to provisioning network so that the dhcp
# options get added for the provisioning port.
manager_utils.node_power_action(task, states.POWER_OFF)
# Determine if this is a fast track sequence
fast_track_deploy = manager_utils.is_fast_track(task)
if fast_track_deploy:
# The agent has already recently checked in and we are
# configured to take that as an indicator that we can
# skip ahead.
LOG.debug('The agent for node %(node)s has recently checked '
'in, and the node power will remain unmodified.',
{'node': task.node.uuid})
else:
# Powering off node to setup networking for port and
# ensure that the state is reset if it is inadvertently
# on for any unknown reason.
manager_utils.node_power_action(task, states.POWER_OFF)
if task.driver.storage.should_write_image(task):
# NOTE(vdrok): in case of rebuild, we have tenant network
# already configured, unbind tenant ports if present
power_state_to_restore = (
manager_utils.power_on_node_if_needed(task))
if not fast_track_deploy:
power_state_to_restore = (
manager_utils.power_on_node_if_needed(task))
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_provisioning_network(task)
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
if not fast_track_deploy:
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
else:
# Fast track sequence in progress
_update_instance_info()
# Signal to storage driver to attach volumes
task.driver.storage.attach_volumes(task)
if not task.driver.storage.should_write_image(task):
if (not task.driver.storage.should_write_image(task)
or fast_track_deploy):
# We have nothing else to do as this is handled in the
# backend storage system, and we can return to the caller
# as we do not need to boot the agent to deploy.
# Alternatively, we could be in a fast track deployment
# and again, we should have nothing to do here.
return
if node.provision_state in (states.ACTIVE, states.UNRESCUING):
# Call is due to conductor takeover
@ -573,9 +605,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
elif node.provision_state != states.ADOPTING:
if node.provision_state not in (states.RESCUING, states.RESCUEWAIT,
states.RESCUE, states.RESCUEFAIL):
node.instance_info = (
deploy_utils.build_instance_info_for_deploy(task))
node.save()
_update_instance_info()
if CONF.agent.manage_agent_boot:
deploy_opts = deploy_utils.build_agent_options(node)
task.driver.boot.prepare_ramdisk(task, deploy_opts)

View File

@ -66,6 +66,19 @@ VENDOR_PROPERTIES = {
'older deploy ramdisks. Defaults to False. Optional.')
}
__HEARTBEAT_RECORD_ONLY = (states.ENROLL, states.MANAGEABLE,
states.AVAILABLE)
_HEARTBEAT_RECORD_ONLY = frozenset(__HEARTBEAT_RECORD_ONLY)
_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT)
HEARTBEAT_ALLOWED = frozenset(_HEARTBEAT_ALLOWED)
_FASTTRACK_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT,
states.RESCUEWAIT, states.ENROLL,
states.MANAGEABLE, states.AVAILABLE,
states.DEPLOYING)
FASTTRACK_HEARTBEAT_ALLOWED = frozenset(_FASTTRACK_HEARTBEAT_ALLOWED)
def _get_client():
client = agent_client.AgentClient()
@ -260,7 +273,9 @@ class HeartbeatMixin(object):
@property
def heartbeat_allowed_states(self):
"""Define node states where heartbeating is allowed"""
return (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT)
if CONF.deploy.fast_track:
return FASTTRACK_HEARTBEAT_ALLOWED
return HEARTBEAT_ALLOWED
@METRICS.timer('HeartbeatMixin.heartbeat')
def heartbeat(self, task, callback_url, agent_version):
@ -271,7 +286,8 @@ class HeartbeatMixin(object):
:param agent_version: The version of the agent that is heartbeating
"""
# NOTE(pas-ha) immediately skip the rest if nothing to do
if task.node.provision_state not in self.heartbeat_allowed_states:
if (task.node.provision_state not in self.heartbeat_allowed_states
and not manager_utils.fast_track_able(task)):
LOG.debug('Heartbeat from node %(node)s in unsupported '
'provision state %(state)s, not taking any action.',
{'node': task.node.uuid,
@ -288,13 +304,25 @@ class HeartbeatMixin(object):
node = task.node
LOG.debug('Heartbeat from node %s', node.uuid)
driver_internal_info = node.driver_internal_info
driver_internal_info['agent_url'] = callback_url
driver_internal_info['agent_version'] = agent_version
# Record the last heartbeat event time in UTC, so we can make
# decisions about it later. Can be decoded to datetime object with:
# datetime.datetime.strptime(var, "%Y-%m-%d %H:%M:%S.%f")
driver_internal_info['agent_last_heartbeat'] = str(
timeutils.utcnow().isoformat())
node.driver_internal_info = driver_internal_info
node.save()
if node.provision_state in _HEARTBEAT_RECORD_ONLY:
# We shouldn't take any additional action. The agent will
# silently continue to heartbeat to ironic until user initiated
# state change occurs causing it to match a state below.
LOG.debug('Heartbeat from %(node)s recorded to identify the '
'node as on-line.', {'node': task.node.uuid})
return
# Async call backs don't set error state on their own
# TODO(jimrollenhagen) improve error messages here
msg = _('Failed checking if deploy is done.')

View File

@ -899,10 +899,22 @@ def prepare_inband_cleaning(task, manage_boot=True):
:raises: InvalidParameterValue if cleaning network UUID config option has
an invalid value.
"""
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
fast_track = manager_utils.is_fast_track(task)
if not fast_track:
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
# WARNING(TheJulia): When fast track is available, trying to plug the
# cleaning network is problematic and in practice this may fail if
# cleaning/provisioning/discovery all take place on different
# networks when..
# Translation: Here be a realistically unavoidable footgun
# fast track support.
# TODO(TheJulia): Lets improve this somehow such that the agent host
# gracefully handles these sorts of changes.
task.driver.network.add_cleaning_network(task)
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
if not fast_track:
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
# Append required config parameters to node's driver_internal_info
# to pass to IPA.
@ -912,7 +924,8 @@ def prepare_inband_cleaning(task, manage_boot=True):
ramdisk_opts = build_agent_options(task.node)
task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
manager_utils.node_power_action(task, states.REBOOT)
if not fast_track:
manager_utils.node_power_action(task, states.REBOOT)
# Tell the conductor we are waiting for the agent to boot.
return states.CLEANWAIT
@ -936,14 +949,18 @@ def tear_down_inband_cleaning(task, manage_boot=True):
:raises: NetworkError, NodeCleaningFailure if the cleaning ports cannot be
removed.
"""
manager_utils.node_power_action(task, states.POWER_OFF)
fast_track = manager_utils.is_fast_track(task)
if not fast_track:
manager_utils.node_power_action(task, states.POWER_OFF)
if manage_boot:
task.driver.boot.clean_up_ramdisk(task)
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
task.driver.network.remove_cleaning_network(task)
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
if not fast_track:
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
def get_image_instance_info(node):

View File

@ -408,13 +408,24 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
:returns: deploy state DEPLOYWAIT.
"""
node = task.node
if task.driver.storage.should_write_image(task):
if manager_utils.is_fast_track(task):
LOG.debug('Performing a fast track deployment for %(node)s.',
{'node': task.node.uuid})
deploy_utils.cache_instance_image(task.context, node)
check_image_size(task)
# Update the database for the API and the task tracking resumes
# the state machine state going from DEPLOYWAIT -> DEPLOYING
task.process_event('wait')
self.continue_deploy(task)
elif task.driver.storage.should_write_image(task):
# Standard deploy process
deploy_utils.cache_instance_image(task.context, node)
check_image_size(task)
manager_utils.node_power_action(task, states.REBOOT)
return states.DEPLOYWAIT
else:
# Boot to an Storage Volume
# TODO(TheJulia): At some point, we should de-dupe this code
# as it is nearly identical to the agent deploy interface.
# This is not being done now as it is expected to be
@ -486,23 +497,38 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
task.driver.boot.prepare_instance(task)
else:
if node.provision_state == states.DEPLOYING:
# Adding the node to provisioning network so that the dhcp
# options get added for the provisioning port.
manager_utils.node_power_action(task, states.POWER_OFF)
fast_track_deploy = manager_utils.is_fast_track(task)
if fast_track_deploy:
# The agent has already recently checked in and we are
# configured to take that as an indicator that we can
# skip ahead.
LOG.debug('The agent for node %(node)s has recently '
'checked in, and the node power will remain '
'unmodified.',
{'node': task.node.uuid})
else:
# Adding the node to provisioning network so that the dhcp
# options get added for the provisioning port.
manager_utils.node_power_action(task, states.POWER_OFF)
# NOTE(vdrok): in case of rebuild, we have tenant network
# already configured, unbind tenant ports if present
if task.driver.storage.should_write_image(task):
power_state_to_restore = (
manager_utils.power_on_node_if_needed(task))
if not fast_track_deploy:
power_state_to_restore = (
manager_utils.power_on_node_if_needed(task))
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_provisioning_network(task)
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
if not fast_track_deploy:
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
task.driver.storage.attach_volumes(task)
if not task.driver.storage.should_write_image(task):
if (not task.driver.storage.should_write_image(task)
or fast_track_deploy):
# We have nothing else to do as this is handled in the
# backend storage system, and we can return to the caller
# as we do not need to boot the agent to deploy.
# Alternatively, we are in a fast track deployment
# and have nothing else to do.
return
deploy_opts = deploy_utils.build_agent_options(node)

View File

@ -24,6 +24,7 @@ from six.moves import http_client
from ironic.api.controllers import base as api_base
from ironic.api.controllers import v1 as api_v1
from ironic.api.controllers.v1 import ramdisk
from ironic.common import states
from ironic.conductor import rpcapi
from ironic.tests.unit.api import base as test_api_base
from ironic.tests.unit.objects import utils as obj_utils
@ -160,6 +161,17 @@ class TestLookup(test_api_base.BaseApiTest):
set(data['node']))
self._check_config(data)
def test_fast_deploy_lookup(self):
CONF.set_override('fast_track', True, 'deploy')
for provision_state in [states.ENROLL, states.MANAGEABLE,
states.AVAILABLE]:
self.node.provision_state = provision_state
data = self.get_json(
'/lookup?addresses=%s&node_uuid=%s' %
(','.join(self.addresses), self.node.uuid),
headers={api_base.Version.string: str(api_v1.max_version())})
self.assertEqual(self.node.uuid, data['node']['uuid'])
@mock.patch.object(rpcapi.ConductorAPI, 'get_topic_for',
lambda *n: 'test-topic')

View File

@ -13,6 +13,7 @@ import time
import mock
from oslo_config import cfg
from oslo_utils import timeutils
from oslo_utils import uuidutils
from ironic.common import boot_devices
@ -2537,3 +2538,53 @@ class ValidateDeployTemplatesTestCase(db_base.DbTestCase):
self.assertRaises(exception.InvalidParameterValue,
conductor_utils.validate_deploy_templates, task)
mock_validated.assert_called_once_with(task)
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
class FastTrackTestCase(db_base.DbTestCase):
def setUp(self):
super(FastTrackTestCase, self).setUp()
self.node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
uuid=uuidutils.generate_uuid(),
driver_internal_info={
'agent_last_heartbeat': str(timeutils.utcnow().isoformat())})
self.config(fast_track=True, group='deploy')
def test_is_fast_track(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertTrue(conductor_utils.is_fast_track(task))
def test_is_fast_track_config_false(self, mock_get_power):
self.config(fast_track=False, group='deploy')
mock_get_power.return_value = states.POWER_ON
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_power_off_false(self, mock_get_power):
mock_get_power.return_value = states.POWER_OFF
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_no_heartbeat(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
i_info = self.node.driver_internal_info
i_info.pop('agent_last_heartbeat')
self.node.driver_internal_info = i_info
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertFalse(conductor_utils.is_fast_track(task))
def test_is_fast_track_error_blocks(self, mock_get_power):
mock_get_power.return_value = states.POWER_ON
self.node.last_error = "bad things happened"
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertFalse(conductor_utils.is_fast_track(task))

View File

@ -337,6 +337,39 @@ class TestAgentDeploy(db_base.DbTestCase):
self.assertIsNone(driver_return)
self.assertTrue(mock_pxe_instance.called)
@mock.patch.object(agent_client.AgentClient, 'prepare_image',
autospec=True)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
mock_is_fast_track, prepare_image_mock):
mock_is_fast_track.return_value = True
self.node.target_provision_state = states.ACTIVE
self.node.provision_state = states.DEPLOYING
test_temp_url = 'http://image'
expected_image_info = {
'urls': [test_temp_url],
'id': 'fake-image',
'node_uuid': self.node.uuid,
'checksum': 'checksum',
'disk_format': 'qcow2',
'container_format': 'bare',
'stream_raw_images': CONF.agent.stream_raw_images,
}
self.node.save()
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.driver.deploy(task)
self.assertFalse(power_mock.called)
self.assertFalse(mock_pxe_instance.called)
task.node.refresh()
prepare_image_mock.assert_called_with(mock.ANY, task.node,
expected_image_info)
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
@ -778,6 +811,52 @@ class TestAgentDeploy(db_base.DbTestCase):
build_options_mock.assert_not_called()
pxe_prepare_ramdisk_mock.assert_not_called()
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
autospec=True)
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info')
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk')
@mock.patch.object(deploy_utils, 'build_agent_options')
@mock.patch.object(deploy_utils, 'build_instance_info_for_deploy')
@mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
'unconfigure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'validate',
spec_set=True, autospec=True)
def test_prepare_fast_track(
self, validate_net_mock,
unconfigure_tenant_net_mock, add_provisioning_net_mock,
build_instance_info_mock, build_options_mock,
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
storage_attach_volumes_mock, is_fast_track_mock):
# TODO(TheJulia): We should revisit this test. Smartnic
# support didn't wire in tightly on testing for power in
# these tests, and largely fast_track impacts power operations.
node = self.node
node.network_interface = 'flat'
node.save()
is_fast_track_mock.return_value = True
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
task.node.provision_state = states.DEPLOYING
build_options_mock.return_value = {'a': 'b'}
self.driver.prepare(task)
storage_driver_info_mock.assert_called_once_with(task)
validate_net_mock.assert_called_once_with(mock.ANY, task)
add_provisioning_net_mock.assert_called_once_with(mock.ANY, task)
unconfigure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
self.assertTrue(storage_attach_volumes_mock.called)
self.assertTrue(build_instance_info_mock.called)
# TODO(TheJulia): We should likely consider executing the
# next two methods at some point in order to facilitate
# continuity. While not explicitly required for this feature
# to work, reboots as part of deployment would need the ramdisk
# present and ready.
self.assertFalse(build_options_mock.called)
self.assertFalse(pxe_prepare_ramdisk_mock.called)
@mock.patch('ironic.common.dhcp_factory.DHCPFactory._set_dhcp_provider')
@mock.patch('ironic.common.dhcp_factory.DHCPFactory.clean_dhcp')
@mock.patch.object(pxe.PXEBoot, 'clean_up_instance')

View File

@ -355,8 +355,29 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
task.node.driver_internal_info['agent_url'])
self.assertEqual('3.2.0',
task.node.driver_internal_info['agent_version'])
self.assertIsNotNone(
task.node.driver_internal_info['agent_last_heartbeat'])
mock_touch.assert_called_once_with(mock.ANY)
def test_heartbeat_records_fast_track(self):
self.config(fast_track=True, group='deploy')
for provision_state in [states.ENROLL, states.MANAGEABLE,
states.AVAILABLE]:
self.node.driver_internal_info = {}
self.node.provision_state = provision_state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '3.2.0')
self.assertEqual('http://127.0.0.1:8080',
task.node.driver_internal_info['agent_url'])
self.assertEqual('3.2.0',
task.node.driver_internal_info[
'agent_version'])
self.assertIsNotNone(
task.node.driver_internal_info['agent_last_heartbeat'])
self.assertEqual(provision_state, task.node.provision_state)
class AgentRescueTests(AgentDeployMixinBaseTest):

View File

@ -1699,6 +1699,7 @@ class AgentMethodsTestCase(db_base.DbTestCase):
self.assertEqual(8, task.node.driver_internal_info[
'disk_erasure_concurrency'])
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(utils, 'build_agent_options', autospec=True)
@ -1707,15 +1708,19 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def _test_prepare_inband_cleaning(
self, add_cleaning_network_mock,
build_options_mock, power_mock, prepare_ramdisk_mock,
manage_boot=True):
is_fast_track_mock, manage_boot=True, fast_track=False):
build_options_mock.return_value = {'a': 'b'}
is_fast_track_mock.return_value = fast_track
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
self.assertEqual(
states.CLEANWAIT,
utils.prepare_inband_cleaning(task, manage_boot=manage_boot))
add_cleaning_network_mock.assert_called_once_with(task)
power_mock.assert_called_once_with(task, states.REBOOT)
if not fast_track:
power_mock.assert_called_once_with(task, states.REBOOT)
else:
self.assertFalse(power_mock.called)
self.assertEqual(1, task.node.driver_internal_info[
'agent_erase_devices_iterations'])
self.assertIs(True, task.node.driver_internal_info[
@ -1734,17 +1739,26 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def test_prepare_inband_cleaning_manage_boot_false(self):
self._test_prepare_inband_cleaning(manage_boot=False)
def test_prepare_inband_cleaning_fast_track(self):
self._test_prepare_inband_cleaning(fast_track=True)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
'remove_cleaning_network')
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def _test_tear_down_inband_cleaning(
self, power_mock, remove_cleaning_network_mock,
clean_up_ramdisk_mock, manage_boot=True):
clean_up_ramdisk_mock, is_fast_track_mock,
manage_boot=True, fast_track=False):
is_fast_track_mock.return_value = fast_track
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
utils.tear_down_inband_cleaning(task, manage_boot=manage_boot)
power_mock.assert_called_once_with(task, states.POWER_OFF)
if not fast_track:
power_mock.assert_called_once_with(task, states.POWER_OFF)
else:
self.assertFalse(power_mock.called)
remove_cleaning_network_mock.assert_called_once_with(task)
if manage_boot:
clean_up_ramdisk_mock.assert_called_once_with(
@ -1758,6 +1772,9 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def test_tear_down_inband_cleaning_manage_boot_false(self):
self._test_tear_down_inband_cleaning(manage_boot=False)
def test_tear_down_inband_cleaning_fast_track(self):
self._test_tear_down_inband_cleaning(fast_track=True)
def test_build_agent_options_conf(self):
self.config(api_url='https://api-url', group='conductor')
options = utils.build_agent_options(self.node)

View File

@ -734,6 +734,53 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
task.driver.storage, task)
self.assertEqual(2, storage_should_write_mock.call_count)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
autospec=True)
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info')
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk')
@mock.patch.object(deploy_utils, 'build_agent_options')
@mock.patch.object(deploy_utils, 'build_instance_info_for_deploy')
@mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
'unconfigure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'validate',
spec_set=True, autospec=True)
def test_prepare_fast_track(
self, validate_net_mock,
unconfigure_tenant_net_mock, add_provisioning_net_mock,
build_instance_info_mock, build_options_mock,
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
storage_attach_volumes_mock, is_fast_track_mock):
# TODO(TheJulia): We should revisit this test. Smartnic
# support didn't wire in tightly on testing for power in
# these tests, and largely fast_track impacts power operations.
node = self.node
node.network_interface = 'flat'
node.save()
is_fast_track_mock.return_value = True
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
task.node.provision_state = states.DEPLOYING
build_options_mock.return_value = {'a': 'b'}
task.driver.deploy.prepare(task)
storage_driver_info_mock.assert_called_once_with(task)
# NOTE: Validate is the primary difference between agent/iscsi
self.assertFalse(validate_net_mock.called)
add_provisioning_net_mock.assert_called_once_with(mock.ANY, task)
unconfigure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
self.assertTrue(storage_attach_volumes_mock.called)
self.assertFalse(build_instance_info_mock.called)
# TODO(TheJulia): We should likely consider executing the
# next two methods at some point in order to facilitate
# continuity. While not explicitly required for this feature
# to work, reboots as part of deployment would need the ramdisk
# present and ready.
self.assertFalse(build_options_mock.called)
self.assertFalse(pxe_prepare_ramdisk_mock.called)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(iscsi_deploy, 'check_image_size', autospec=True)
@mock.patch.object(deploy_utils, 'cache_instance_image', autospec=True)
@ -787,6 +834,36 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
self.assertEqual(2, mock_node_power_action.call_count)
self.assertEqual(states.DEPLOYING, task.node.provision_state)
@mock.patch.object(iscsi_deploy, 'check_image_size', autospec=True)
@mock.patch.object(deploy_utils, 'cache_instance_image', autospec=True)
@mock.patch.object(iscsi_deploy.ISCSIDeploy, 'continue_deploy',
autospec=True)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
mock_is_fast_track, continue_deploy_mock,
cache_image_mock, check_image_size_mock):
mock_is_fast_track.return_value = True
self.node.target_provision_state = states.ACTIVE
self.node.provision_state = states.DEPLOYING
i_info = self.node.driver_internal_info
i_info['agent_url'] = 'http://1.2.3.4:1234'
self.node.driver_internal_info = i_info
self.node.save()
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
task.driver.deploy.deploy(task)
self.assertFalse(power_mock.called)
self.assertFalse(mock_pxe_instance.called)
task.node.refresh()
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
self.assertEqual(states.ACTIVE,
task.node.target_provision_state)
cache_image_mock.assert_called_with(mock.ANY, task.node)
check_image_size_mock.assert_called_with(task)
continue_deploy_mock.assert_called_with(mock.ANY, task)
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
autospec=True)
@mock.patch.object(flat_network.FlatNetwork,

View File

@ -0,0 +1,16 @@
---
features:
- |
Adds a new feature called `fast-track` which allows an operator to
optionally configure the Bare Metal API Service and the Bare Metal
conductor service to permit lookup and heartbeat for nodes that are
in the process of being enrolled and created.
These nodes can be left online, from a process such as discovery.
If ironic-python-agent has communicated with the Bare Metal Service
API endpoint with-in the last `300` seconds, then setup steps that
are normally involved with preparing to launch a ramdisk on the node,
are skipped along with power operations to enable a baremetal node to
undergo discovery through to deployment with a single power cycle.
Fast track functionality may be enabled through the ``[deploy]fast_track``
option.

View File

@ -466,3 +466,14 @@
Run python 3 unit tests with driver dependencies installed.
vars:
tox_envlist: unit-with-driver-libs-python3
- job:
name: ironic-inspector-tempest-discovery-fast-track
description: ironic-inspector-tempest-discovery-fast-track
parent: ironic-inspector-tempest-discovery
vars:
tempest_test_regex: BareMetalFastTrackTest
devstack_localrc:
IRONIC_INSPECTOR_POWER_OFF: False
IRONIC_DEPLOY_FAST_TRACK: True
IRONIC_DEPLOY_FAST_TRACK_CLEANING: True