Merge "Generic changes for Node Inspection"
This commit is contained in:
commit
f203681441
@ -15,7 +15,7 @@
|
||||
#rpc_conn_pool_size=30
|
||||
|
||||
# Qpid broker hostname. (string value)
|
||||
#qpid_hostname=localhost
|
||||
#qpid_hostname=ironic
|
||||
|
||||
# Qpid broker port. (integer value)
|
||||
#qpid_port=5672
|
||||
@ -76,7 +76,7 @@
|
||||
|
||||
# The RabbitMQ broker address where a single node is used.
|
||||
# (string value)
|
||||
#rabbit_host=localhost
|
||||
#rabbit_host=ironic
|
||||
|
||||
# The RabbitMQ broker port where a single node is used.
|
||||
# (integer value)
|
||||
@ -604,6 +604,10 @@
|
||||
# when configdrive_use_swift is True. (string value)
|
||||
#configdrive_swift_container=ironic_configdrive_container
|
||||
|
||||
# Timeout (seconds) for waiting for node inspection. 0 -
|
||||
# unlimited. (integer value) (integer value)
|
||||
#inspect_timeout=1800
|
||||
|
||||
|
||||
[console]
|
||||
|
||||
@ -957,7 +961,7 @@
|
||||
# Timeout (in seconds) for iRMC operations (integer value)
|
||||
#client_timeout=60
|
||||
|
||||
# Sensor Data retrieval method either "ipmitool" or "scci"
|
||||
# Sensor data retrieval method, either "ipmitool" or "scci"
|
||||
# (string value)
|
||||
#sensor_method=ipmitool
|
||||
|
||||
|
@ -60,7 +60,8 @@ MIN_VER_STR = '1.1'
|
||||
# v1.3: Add node.driver_internal_info
|
||||
# v1.4: Add MANAGEABLE state
|
||||
# v1.5: Add logical node names
|
||||
MAX_VER_STR = '1.5'
|
||||
# v1.6: Add INSPECT* states
|
||||
MAX_VER_STR = '1.6'
|
||||
|
||||
|
||||
MIN_VER = base.Version({base.Version.string: MIN_VER_STR},
|
||||
|
@ -72,9 +72,11 @@ def hide_driver_internal_info(obj):
|
||||
def check_allow_management_verbs(verb):
|
||||
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
|
||||
# and out of that state. Reject requests to do this in older versions
|
||||
if (pecan.request.version.minor < 4 and
|
||||
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
|
||||
raise exception.NotAcceptable()
|
||||
if ((pecan.request.version.minor < 4 and
|
||||
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
|
||||
or (pecan.request.version.minor < 6 and
|
||||
verb == ir_states.VERBS['inspect'])):
|
||||
raise exception.NotAcceptable()
|
||||
|
||||
|
||||
def allow_logical_names():
|
||||
@ -127,7 +129,8 @@ class NodePatchType(types.JsonPatchType):
|
||||
'/power_state', '/provision_state', '/reservation',
|
||||
'/target_power_state', '/target_provision_state',
|
||||
'/provision_updated_at', '/maintenance_reason',
|
||||
'/driver_internal_info']
|
||||
'/driver_internal_info', '/inspection_finished_at',
|
||||
'/inspection_started_at', ]
|
||||
|
||||
@staticmethod
|
||||
def mandatory_attrs():
|
||||
@ -439,6 +442,9 @@ class NodeStatesController(rest.RestController):
|
||||
elif target == ir_states.DELETED:
|
||||
pecan.request.rpcapi.do_node_tear_down(
|
||||
pecan.request.context, rpc_node.uuid, topic)
|
||||
elif target == ir_states.VERBS['inspect']:
|
||||
pecan.request.rpcapi.inspect_hardware(
|
||||
pecan.request.context, rpc_node.uuid, topic=topic)
|
||||
elif target in (
|
||||
ir_states.VERBS['manage'], ir_states.VERBS['provide']):
|
||||
pecan.request.rpcapi.do_provisioning_action(
|
||||
@ -511,6 +517,12 @@ class Node(base.APIBase):
|
||||
provision_updated_at = datetime.datetime
|
||||
"""The UTC date and time of the last provision state change"""
|
||||
|
||||
inspection_finished_at = datetime.datetime
|
||||
"""The UTC date and time when the last inspection finished successfully."""
|
||||
|
||||
inspection_started_at = datetime.datetime
|
||||
"""The UTC date and time of the hardware when inspection was started"""
|
||||
|
||||
maintenance = types.boolean
|
||||
"""Indicates whether the node is in maintenance mode."""
|
||||
|
||||
@ -636,6 +648,7 @@ class Node(base.APIBase):
|
||||
'cpus': '1'}, updated_at=time, created_at=time,
|
||||
provision_updated_at=time, instance_info={},
|
||||
maintenance=False, maintenance_reason=None,
|
||||
inspection_finished_at=None, inspection_started_at=time,
|
||||
console_enabled=False)
|
||||
# NOTE(matty_dubs): The chassis_uuid getter() is based on the
|
||||
# _chassis_uuid variable:
|
||||
|
@ -163,8 +163,12 @@ conductor_opts = [
|
||||
default='ironic_configdrive_container',
|
||||
help='Name of the Swift container to store config drive '
|
||||
'data. Used when configdrive_use_swift is True.'),
|
||||
]
|
||||
cfg.IntOpt('inspect_timeout',
|
||||
default=1800,
|
||||
help='Timeout (seconds) for waiting for node inspection. '
|
||||
'0 - unlimited. (integer value)'),
|
||||
|
||||
]
|
||||
CONF = cfg.CONF
|
||||
CONF.register_opts(conductor_opts, 'conductor')
|
||||
|
||||
@ -173,7 +177,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
"""Ironic Conductor manager main class."""
|
||||
|
||||
# NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's.
|
||||
RPC_API_VERSION = '1.23'
|
||||
RPC_API_VERSION = '1.24'
|
||||
|
||||
target = messaging.Target(version=RPC_API_VERSION)
|
||||
|
||||
@ -921,40 +925,11 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
'provision_state': states.DEPLOYWAIT,
|
||||
'maintenance': False,
|
||||
'provisioned_before': callback_timeout}
|
||||
columns = ['uuid', 'driver']
|
||||
node_list = self.dbapi.get_nodeinfo_list(
|
||||
columns=columns,
|
||||
filters=filters,
|
||||
sort_key='provision_updated_at',
|
||||
sort_dir='asc')
|
||||
|
||||
workers_count = 0
|
||||
for node_uuid, driver in node_list:
|
||||
if not self._mapped_to_this_conductor(node_uuid, driver):
|
||||
continue
|
||||
try:
|
||||
with task_manager.acquire(context, node_uuid) as task:
|
||||
# NOTE(comstud): Recheck maintenance and provision_state
|
||||
# now that we have the lock. We don't need to re-check
|
||||
# updated_at unless we expect the state to have flipped
|
||||
# to something else and then back to DEPLOYWAIT between
|
||||
# the call to get_nodeinfo_list and now.
|
||||
if (task.node.maintenance or
|
||||
task.node.provision_state != states.DEPLOYWAIT):
|
||||
continue
|
||||
# timeout has been reached - fail the deploy
|
||||
task.process_event('fail',
|
||||
callback=self._spawn_worker,
|
||||
call_args=(utils.cleanup_after_timeout,
|
||||
task),
|
||||
err_handler=provisioning_error_handler)
|
||||
except exception.NoFreeConductorWorker:
|
||||
break
|
||||
except (exception.NodeLocked, exception.NodeNotFound):
|
||||
continue
|
||||
workers_count += 1
|
||||
if workers_count == CONF.conductor.periodic_max_workers:
|
||||
break
|
||||
sort_key = 'provision_updated_at'
|
||||
callback_method = utils.cleanup_after_timeout
|
||||
err_handler = provisioning_error_handler
|
||||
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
|
||||
sort_key, callback_method, err_handler)
|
||||
|
||||
def _do_takeover(self, task):
|
||||
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
|
||||
@ -1425,6 +1400,135 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
driver=task.node.driver, extension='management')
|
||||
return task.driver.management.get_supported_boot_devices()
|
||||
|
||||
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
|
||||
exception.NodeLocked,
|
||||
exception.HardwareInspectionFailure,
|
||||
exception.UnsupportedDriverExtension)
|
||||
def inspect_hardware(self, context, node_id):
|
||||
"""Inspect hardware to obtain hardware properties.
|
||||
|
||||
Initiate the inspection of a node. Validations are done
|
||||
synchronously and the actual inspection work is performed in
|
||||
background (asynchronously).
|
||||
|
||||
:param context: request context.
|
||||
:param node_id: node id or uuid.
|
||||
:raises: NodeLocked if node is locked by another conductor.
|
||||
:raises: UnsupportedDriverExtension if the node's driver doesn't
|
||||
support inspect.
|
||||
:raises: NoFreeConductorWorker when there is no free worker to start
|
||||
async task
|
||||
:raises: HardwareInspectionFailure when unable to get
|
||||
essential scheduling properties from hardware.
|
||||
|
||||
"""
|
||||
LOG.debug('RPC inspect_hardware called for node %s', node_id)
|
||||
with task_manager.acquire(context, node_id, shared=False) as task:
|
||||
node = task.node
|
||||
if not getattr(task.driver, 'inspect', None):
|
||||
raise exception.UnsupportedDriverExtension(
|
||||
driver=task.node.driver, extension='inspect')
|
||||
|
||||
try:
|
||||
task.driver.power.validate(task)
|
||||
task.driver.inspect.validate(task)
|
||||
except (exception.InvalidParameterValue,
|
||||
exception.MissingParameterValue) as e:
|
||||
error = (_("RPC inspect_hardware failed to validate "
|
||||
"inspection or power info. Error: %(msg)s")
|
||||
% {'msg': e})
|
||||
raise exception.HardwareInspectionFailure(error=error)
|
||||
|
||||
try:
|
||||
task.process_event('inspect',
|
||||
callback=self._spawn_worker,
|
||||
call_args=(_do_inspect_hardware, task,
|
||||
self.conductor.id),
|
||||
err_handler=provisioning_error_handler)
|
||||
|
||||
except exception.InvalidState:
|
||||
error = (_("Inspection is not possible while node "
|
||||
"%(node)s is in state %(state)s")
|
||||
% {'node': node.uuid,
|
||||
'state': node.provision_state})
|
||||
raise exception.HardwareInspectionFailure(error=error)
|
||||
|
||||
@periodic_task.periodic_task(
|
||||
spacing=CONF.conductor.check_provision_state_interval)
|
||||
def _check_inspect_timeouts(self, context):
|
||||
"""Periodically checks inspect_timeout and fails upon reaching it.
|
||||
|
||||
:param: context: request context
|
||||
|
||||
"""
|
||||
callback_timeout = CONF.conductor.inspect_timeout
|
||||
if not callback_timeout:
|
||||
return
|
||||
|
||||
filters = {'reserved': False,
|
||||
'provision_state': states.INSPECTING,
|
||||
'inspection_started_before': callback_timeout}
|
||||
sort_key = 'inspection_started_at'
|
||||
last_error = _("timeout reached while waiting for callback")
|
||||
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
|
||||
sort_key, last_error=last_error)
|
||||
|
||||
def _fail_if_timeout_reached(self, context, filters, provision_state,
|
||||
sort_key, callback_method=None,
|
||||
err_handler=None, last_error=None):
|
||||
"""Checks if the async(background) process has reached timeout.
|
||||
|
||||
:param: context: request context
|
||||
:param: filters: criteria (as a dictionary) to get the desired
|
||||
list of nodes.
|
||||
:param: provision_state: provision_state that the node is in,
|
||||
for the provisioning activity to have failed.
|
||||
:param: sort_key: the nodes are sorted based on this key.
|
||||
:param: callback_method: the callback method to be invoked in a
|
||||
spawned thread, for a failed node. This
|
||||
method must take a :class:`TaskManager` as
|
||||
the first (and only required) parameter.
|
||||
:param: err_handler: the error handler to be invoked if an error.
|
||||
occurs trying to spawn a thread for a failed node.
|
||||
:param: last_error: the error message to be updated in node.last_error
|
||||
|
||||
"""
|
||||
columns = ['uuid', 'driver']
|
||||
node_list = self.dbapi.get_nodeinfo_list(columns=columns,
|
||||
filters=filters,
|
||||
sort_key=sort_key,
|
||||
sort_dir='asc')
|
||||
|
||||
workers_count = 0
|
||||
for node_uuid, driver in node_list:
|
||||
if not self._mapped_to_this_conductor(node_uuid, driver):
|
||||
continue
|
||||
try:
|
||||
with task_manager.acquire(context, node_uuid) as task:
|
||||
if (task.node.maintenance or
|
||||
task.node.provision_state != provision_state):
|
||||
continue
|
||||
|
||||
# timeout has been reached - process the event 'fail'
|
||||
if callback_method:
|
||||
task.process_event('fail',
|
||||
callback=self._spawn_worker,
|
||||
call_args=(callback_method, task),
|
||||
err_handler=err_handler)
|
||||
else:
|
||||
if not last_error:
|
||||
last_error = _("timeout reached while waiting "
|
||||
"for callback")
|
||||
task.node.last_error = last_error
|
||||
task.process_event('fail')
|
||||
except exception.NoFreeConductorWorker:
|
||||
break
|
||||
except (exception.NodeLocked, exception.NodeNotFound):
|
||||
continue
|
||||
workers_count += 1
|
||||
if workers_count >= CONF.conductor.periodic_max_workers:
|
||||
break
|
||||
|
||||
|
||||
def get_vendor_passthru_metadata(route_dict):
|
||||
d = {}
|
||||
@ -1696,3 +1800,37 @@ def do_sync_power_state(task, count):
|
||||
node.save()
|
||||
|
||||
return count
|
||||
|
||||
|
||||
def _do_inspect_hardware(task, conductor_id):
|
||||
"""Prepare the environment and inspect a node."""
|
||||
node = task.node
|
||||
|
||||
def handle_failure(e):
|
||||
# NOTE(deva): there is no need to clear conductor_affinity
|
||||
node.last_error = e
|
||||
task.process_event('fail')
|
||||
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
|
||||
{'node': node.uuid, 'err': e})
|
||||
|
||||
try:
|
||||
new_state = task.driver.inspect.inspect_hardware(task)
|
||||
|
||||
except Exception as e:
|
||||
with excutils.save_and_reraise_exception():
|
||||
error = str(e)
|
||||
handle_failure(error)
|
||||
|
||||
# Update conductor_affinity to reference this conductor's ID
|
||||
# since there may be local persistent state
|
||||
node.conductor_affinity = conductor_id
|
||||
|
||||
if new_state == states.MANAGEABLE:
|
||||
task.process_event('done')
|
||||
LOG.info(_LI('Successfully inspected node %(node)s')
|
||||
% {'node': node.uuid})
|
||||
elif new_state != states.INSPECTING:
|
||||
error = (_("Driver returned unexpected state in inspection"
|
||||
"%(state)s") % {'state': new_state})
|
||||
handle_failure(error)
|
||||
raise exception.HardwareInspectionFailure(error=error)
|
||||
|
@ -66,11 +66,12 @@ class ConductorAPI(object):
|
||||
| get_driver_vendor_passthru_methods
|
||||
| 1.22 - Added configdrive parameter to do_node_deploy.
|
||||
| 1.23 - Added do_provisioning_action
|
||||
| 1.24 - Added inspect_hardware method
|
||||
|
||||
"""
|
||||
|
||||
# NOTE(rloo): This must be in sync with manager.ConductorManager's.
|
||||
RPC_API_VERSION = '1.23'
|
||||
RPC_API_VERSION = '1.24'
|
||||
|
||||
def __init__(self, topic=None):
|
||||
super(ConductorAPI, self).__init__()
|
||||
@ -483,3 +484,20 @@ class ConductorAPI(object):
|
||||
cctxt = self.client.prepare(topic=topic or self.topic, version='1.17')
|
||||
return cctxt.call(context, 'get_supported_boot_devices',
|
||||
node_id=node_id)
|
||||
|
||||
def inspect_hardware(self, context, node_id, topic=None):
|
||||
"""Signals the conductor service to perform hardware introspection.
|
||||
|
||||
:param context: request context.
|
||||
:param node_id: node id or uuid.
|
||||
:param topic: RPC topic. Defaults to self.topic.
|
||||
:raises: NodeLocked if node is locked by another conductor.
|
||||
:raises: HardwareInspectionFailure
|
||||
:raises: NoFreeConductorWorker when there is no free worker to start
|
||||
async task.
|
||||
:raises: UnsupportedDriverExtension if the node's driver doesn't
|
||||
support inspection.
|
||||
|
||||
"""
|
||||
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')
|
||||
return cctxt.call(context, 'inspect_hardware', node_id=node_id)
|
||||
|
@ -0,0 +1,40 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""add inspection_started_at and inspection_finished_at
|
||||
|
||||
Revision ID: 1e1d5ace7dc6
|
||||
Revises: 3ae36a5f5131
|
||||
Create Date: 2015-02-26 10:46:46.861927
|
||||
|
||||
"""
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '1e1d5ace7dc6'
|
||||
down_revision = '3ae36a5f5131'
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column('nodes', sa.Column('inspection_started_at',
|
||||
sa.DateTime(),
|
||||
nullable=True))
|
||||
op.add_column('nodes', sa.Column('inspection_finished_at',
|
||||
sa.DateTime(),
|
||||
nullable=True))
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_column('nodes', 'inspection_started_at')
|
||||
op.drop_column('nodes', 'inspection_finished_at')
|
@ -191,6 +191,11 @@ class Connection(api.Connection):
|
||||
limit = timeutils.utcnow() - datetime.timedelta(
|
||||
seconds=filters['provisioned_before'])
|
||||
query = query.filter(models.Node.provision_updated_at < limit)
|
||||
if 'inspection_started_before' in filters:
|
||||
limit = ((timeutils.utcnow()) -
|
||||
(datetime.timedelta(
|
||||
seconds=filters['inspection_started_before'])))
|
||||
query = query.filter(models.Node.inspection_started_at < limit)
|
||||
|
||||
return query
|
||||
|
||||
@ -371,6 +376,17 @@ class Connection(api.Connection):
|
||||
|
||||
if 'provision_state' in values:
|
||||
values['provision_updated_at'] = timeutils.utcnow()
|
||||
if values['provision_state'] == states.INSPECTING:
|
||||
values['inspection_started_at'] = timeutils.utcnow()
|
||||
values['inspection_finished_at'] = None
|
||||
|
||||
if (ref.provision_state == states.INSPECTING and
|
||||
values.get('provision_state') == states.MANAGEABLE):
|
||||
values['inspection_finished_at'] = timeutils.utcnow()
|
||||
values['inspection_started_at'] = None
|
||||
elif (ref.provision_state == states.INSPECTING and
|
||||
values.get('provision_state') == states.INSPECTFAIL):
|
||||
values['inspection_started_at'] = None
|
||||
|
||||
ref.update(values)
|
||||
return ref
|
||||
|
@ -185,6 +185,8 @@ class Node(Base):
|
||||
maintenance = Column(Boolean, default=False)
|
||||
maintenance_reason = Column(Text, nullable=True)
|
||||
console_enabled = Column(Boolean, default=False)
|
||||
inspection_finished_at = Column(DateTime, nullable=True)
|
||||
inspection_started_at = Column(DateTime, nullable=True)
|
||||
extra = Column(JSONEncodedDict)
|
||||
|
||||
|
||||
|
@ -181,4 +181,4 @@ class FakeInspect(base.InspectInterface):
|
||||
pass
|
||||
|
||||
def inspect_hardware(self, task):
|
||||
pass
|
||||
return states.MANAGEABLE
|
||||
|
@ -79,6 +79,9 @@ class Node(base.IronicObject):
|
||||
# that started but failed to finish.
|
||||
'last_error': obj_utils.str_or_none,
|
||||
|
||||
'inspection_finished_at': obj_utils.datetime_or_str_or_none,
|
||||
'inspection_started_at': obj_utils.datetime_or_str_or_none,
|
||||
|
||||
'extra': obj_utils.dict_or_none,
|
||||
}
|
||||
|
||||
|
@ -199,6 +199,8 @@ class TestListNodes(test_api_base.FunctionalTest):
|
||||
self.assertIn('reservation', data)
|
||||
self.assertIn('maintenance_reason', data)
|
||||
self.assertIn('name', data)
|
||||
self.assertIn('inspection_finished_at', data)
|
||||
self.assertIn('inspection_started_at', data)
|
||||
# never expose the chassis_id
|
||||
self.assertNotIn('chassis_id', data)
|
||||
|
||||
@ -219,6 +221,8 @@ class TestListNodes(test_api_base.FunctionalTest):
|
||||
self.assertIn('target_power_state', data['nodes'][0])
|
||||
self.assertIn('target_provision_state', data['nodes'][0])
|
||||
self.assertIn('provision_updated_at', data['nodes'][0])
|
||||
self.assertIn('inspection_finished_at', data['nodes'][0])
|
||||
self.assertIn('inspection_started_at', data['nodes'][0])
|
||||
# never expose the chassis_id
|
||||
self.assertNotIn('chassis_id', data['nodes'][0])
|
||||
|
||||
@ -1504,6 +1508,9 @@ class TestPut(test_api_base.FunctionalTest):
|
||||
p = mock.patch.object(rpcapi.ConductorAPI, 'do_node_tear_down')
|
||||
self.mock_dntd = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
p = mock.patch.object(rpcapi.ConductorAPI, 'inspect_hardware')
|
||||
self.mock_dnih = p.start()
|
||||
self.addCleanup(p.stop)
|
||||
|
||||
def test_power_state(self):
|
||||
response = self.put_json('/nodes/%s/states/power' % self.node.uuid,
|
||||
@ -1695,6 +1702,17 @@ class TestPut(test_api_base.FunctionalTest):
|
||||
states.VERBS['provide'],
|
||||
'test-topic')
|
||||
|
||||
def test_inspect_already_in_progress(self):
|
||||
node = self.node
|
||||
node.provision_state = states.INSPECTING
|
||||
node.target_provision_state = states.MANAGEABLE
|
||||
node.reservation = 'fake-host'
|
||||
node.save()
|
||||
ret = self.put_json('/nodes/%s/states/provision' % node.uuid,
|
||||
{'target': states.MANAGEABLE},
|
||||
expect_errors=True)
|
||||
self.assertEqual(409, ret.status_code) # Conflict
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action')
|
||||
def test_manage_from_available(self, mock_dpa):
|
||||
self.node.provision_state = states.AVAILABLE
|
||||
|
@ -3070,3 +3070,356 @@ class StoreConfigDriveTestCase(tests_base.TestCase):
|
||||
mock_swift.return_value.get_temp_url.assert_called_once_with(
|
||||
container_name, expected_obj_name, timeout)
|
||||
self.assertEqual(expected_instance_info, self.node.instance_info)
|
||||
|
||||
|
||||
@_mock_record_keepalive
|
||||
class NodeInspectHardware(_ServiceSetUpMixin,
|
||||
tests_db_base.DbTestCase):
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_ok(self, mock_inspect):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.INSPECTING)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = states.MANAGEABLE
|
||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
||||
node.refresh()
|
||||
self.assertEqual(states.MANAGEABLE, node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||
self.assertIsNone(node.last_error)
|
||||
mock_inspect.assert_called_once_with(mock.ANY)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_return_inspecting(self, mock_inspect):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.INSPECTING)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = states.INSPECTING
|
||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTING, node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||
self.assertIsNone(node.last_error)
|
||||
mock_inspect.assert_called_once_with(mock.ANY)
|
||||
|
||||
@mock.patch.object(manager, 'LOG')
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_return_other_state(self, mock_inspect, log_mock):
|
||||
self._start_service()
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.INSPECTING)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = None
|
||||
self.assertRaises(exception.HardwareInspectionFailure,
|
||||
manager._do_inspect_hardware, task,
|
||||
self.service.conductor.id)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
mock_inspect.assert_called_once_with(mock.ANY)
|
||||
self.assertTrue(log_mock.error.called)
|
||||
|
||||
def test__check_inspect_timeouts(self):
|
||||
self._start_service()
|
||||
CONF.set_override('inspect_timeout', 1, group='conductor')
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.INSPECTING,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
provision_updated_at=datetime.datetime(2000, 1, 1, 0, 0),
|
||||
inspection_started_at=datetime.datetime(2000, 1, 1, 0, 0))
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
self.service._worker_pool.waitall()
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
|
||||
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
|
||||
def test_inspect_hardware_worker_pool_full(self, mock_spawn):
|
||||
prv_state = states.MANAGEABLE
|
||||
tgt_prv_state = states.NOSTATE
|
||||
node = obj_utils.create_test_node(self.context,
|
||||
provision_state=prv_state,
|
||||
target_provision_state=tgt_prv_state,
|
||||
last_error=None, driver='fake')
|
||||
self._start_service()
|
||||
|
||||
mock_spawn.side_effect = exception.NoFreeConductorWorker()
|
||||
|
||||
exc = self.assertRaises(messaging.rpc.ExpectedException,
|
||||
self.service.inspect_hardware,
|
||||
self.context, node.uuid)
|
||||
# Compare true exception hidden by @messaging.expected_exceptions
|
||||
self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0])
|
||||
self.service._worker_pool.waitall()
|
||||
node.refresh()
|
||||
# Make sure things were rolled back
|
||||
self.assertEqual(prv_state, node.provision_state)
|
||||
self.assertEqual(tgt_prv_state, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
# Verify reservation has been cleared.
|
||||
self.assertIsNone(node.reservation)
|
||||
|
||||
def _test_inspect_hardware_validate_fail(self, mock_validate):
|
||||
mock_validate.side_effect = exception.InvalidParameterValue('error')
|
||||
node = obj_utils.create_test_node(self.context, driver='fake')
|
||||
exc = self.assertRaises(messaging.rpc.ExpectedException,
|
||||
self.service.inspect_hardware,
|
||||
self.context, node.uuid)
|
||||
# Compare true exception hidden by @messaging.expected_exceptions
|
||||
self.assertEqual(exception.HardwareInspectionFailure, exc.exc_info[0])
|
||||
# This is a sync operation last_error should be None.
|
||||
self.assertIsNone(node.last_error)
|
||||
# Verify reservation has been cleared.
|
||||
self.assertIsNone(node.reservation)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.validate')
|
||||
def test_inspect_hardware_validate_fail(self, mock_validate):
|
||||
self._test_inspect_hardware_validate_fail(mock_validate)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakePower.validate')
|
||||
def test_inspect_hardware_power_validate_fail(self, mock_validate):
|
||||
self._test_inspect_hardware_validate_fail(mock_validate)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
|
||||
def test_inspect_hardware_raises_error(self, mock_inspect):
|
||||
self._start_service()
|
||||
mock_inspect.side_effect = exception.HardwareInspectionFailure('test')
|
||||
state = states.MANAGEABLE
|
||||
node = obj_utils.create_test_node(self.context, driver='fake',
|
||||
provision_state=states.INSPECTING,
|
||||
target_provision_state=state)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
|
||||
self.assertRaises(exception.HardwareInspectionFailure,
|
||||
manager._do_inspect_hardware, task,
|
||||
self.service.conductor.id)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
self.assertTrue(mock_inspect.called)
|
||||
|
||||
|
||||
@mock.patch.object(task_manager, 'acquire')
|
||||
@mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor')
|
||||
@mock.patch.object(dbapi.IMPL, 'get_nodeinfo_list')
|
||||
class ManagerCheckInspectTimeoutsTestCase(_CommonMixIn,
|
||||
tests_db_base.DbTestCase):
|
||||
def setUp(self):
|
||||
super(ManagerCheckInspectTimeoutsTestCase, self).setUp()
|
||||
self.config(inspect_timeout=300, group='conductor')
|
||||
self.service = manager.ConductorManager('hostname', 'test-topic')
|
||||
self.service.dbapi = self.dbapi
|
||||
|
||||
self.node = self._create_node(provision_state=states.INSPECTING,
|
||||
target_provision_state=states.MANAGEABLE)
|
||||
self.task = self._create_task(node=self.node)
|
||||
|
||||
self.node2 = self._create_node(provision_state=states.INSPECTING,
|
||||
target_provision_state=states.MANAGEABLE)
|
||||
self.task2 = self._create_task(node=self.node2)
|
||||
|
||||
self.filters = {'reserved': False,
|
||||
'inspection_started_before': 300,
|
||||
'provision_state': states.INSPECTING}
|
||||
self.columns = ['uuid', 'driver']
|
||||
|
||||
def _assert_get_nodeinfo_args(self, get_nodeinfo_mock):
|
||||
get_nodeinfo_mock.assert_called_once_with(sort_dir='asc',
|
||||
columns=self.columns, filters=self.filters,
|
||||
sort_key='inspection_started_at')
|
||||
|
||||
def test__check_inspect_timeouts_disabled(self, get_nodeinfo_mock,
|
||||
mapped_mock, acquire_mock):
|
||||
self.config(inspect_timeout=0, group='conductor')
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self.assertFalse(get_nodeinfo_mock.called)
|
||||
self.assertFalse(mapped_mock.called)
|
||||
self.assertFalse(acquire_mock.called)
|
||||
|
||||
def test__check_inspect_timeouts_not_mapped(self, get_nodeinfo_mock,
|
||||
mapped_mock, acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
|
||||
mapped_mock.return_value = False
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver)
|
||||
self.assertFalse(acquire_mock.called)
|
||||
|
||||
def test__check_inspect_timeout(self, get_nodeinfo_mock,
|
||||
mapped_mock, acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(self.task)
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context, self.node.uuid)
|
||||
self.task.process_event.assert_called_with('fail')
|
||||
|
||||
def test__check_inspect_timeouts_acquire_node_disappears(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = exception.NodeNotFound(node='fake')
|
||||
|
||||
# Exception eaten
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
mapped_mock.assert_called_once_with(
|
||||
self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context,
|
||||
self.node.uuid)
|
||||
self.assertFalse(self.task.process_event.called)
|
||||
|
||||
def test__check_inspect_timeouts_acquire_node_locked(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = exception.NodeLocked(node='fake',
|
||||
host='fake')
|
||||
|
||||
# Exception eaten
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
mapped_mock.assert_called_once_with(
|
||||
self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context,
|
||||
self.node.uuid)
|
||||
self.assertFalse(self.task.process_event.called)
|
||||
|
||||
def test__check_inspect_timeouts_no_acquire_after_lock(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
task = self._create_task(
|
||||
node_attrs=dict(provision_state=states.AVAILABLE,
|
||||
uuid=self.node.uuid))
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(task)
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
mapped_mock.assert_called_once_with(
|
||||
self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context,
|
||||
self.node.uuid)
|
||||
self.assertFalse(task.process_event.called)
|
||||
|
||||
def test__check_inspect_timeouts_to_maintenance_after_lock(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
task = self._create_task(
|
||||
node_attrs=dict(provision_state=states.INSPECTING,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
maintenance=True,
|
||||
uuid=self.node.uuid))
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
|
||||
[task.node, self.node2])
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(
|
||||
[task, self.task2])
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
self.assertEqual([mock.call(self.node.uuid, task.node.driver),
|
||||
mock.call(self.node2.uuid, self.node2.driver)],
|
||||
mapped_mock.call_args_list)
|
||||
self.assertEqual([mock.call(self.context, self.node.uuid),
|
||||
mock.call(self.context, self.node2.uuid)],
|
||||
acquire_mock.call_args_list)
|
||||
# First node skipped
|
||||
self.assertFalse(task.process_event.called)
|
||||
# Second node spawned
|
||||
self.task2.process_event.assert_called_with('fail')
|
||||
|
||||
def test__check_inspect_timeouts_exiting_no_worker_avail(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
|
||||
[self.node, self.node2])
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(
|
||||
[(self.task, exception.NoFreeConductorWorker()), self.task2])
|
||||
|
||||
# Exception should be nuked
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
# mapped should be only called for the first node as we should
|
||||
# have exited the loop early due to NoFreeConductorWorker
|
||||
mapped_mock.assert_called_once_with(
|
||||
self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context,
|
||||
self.node.uuid)
|
||||
self.task.process_event.assert_called_with('fail')
|
||||
|
||||
def test__check_inspect_timeouts_exit_with_other_exception(self,
|
||||
get_nodeinfo_mock,
|
||||
mapped_mock,
|
||||
acquire_mock):
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
|
||||
[self.node, self.node2])
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(
|
||||
[(self.task, exception.IronicException('foo')), self.task2])
|
||||
|
||||
# Should re-raise
|
||||
self.assertRaises(exception.IronicException,
|
||||
self.service._check_inspect_timeouts,
|
||||
self.context)
|
||||
|
||||
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
|
||||
# mapped should be only called for the first node as we should
|
||||
# have exited the loop early due to unknown exception
|
||||
mapped_mock.assert_called_once_with(
|
||||
self.node.uuid, self.node.driver)
|
||||
acquire_mock.assert_called_once_with(self.context,
|
||||
self.node.uuid)
|
||||
self.task.process_event.assert_called_with('fail')
|
||||
|
||||
def test__check_inspect_timeouts_worker_limit(self, get_nodeinfo_mock,
|
||||
mapped_mock, acquire_mock):
|
||||
self.config(periodic_max_workers=2, group='conductor')
|
||||
|
||||
# Use the same nodes/tasks to make life easier in the tests
|
||||
# here
|
||||
|
||||
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
|
||||
[self.node] * 3)
|
||||
mapped_mock.return_value = True
|
||||
acquire_mock.side_effect = self._get_acquire_side_effect(
|
||||
[self.task] * 3)
|
||||
|
||||
self.service._check_inspect_timeouts(self.context)
|
||||
|
||||
# Should only have ran 2.
|
||||
self.assertEqual([mock.call(self.node.uuid, self.node.driver)] * 2,
|
||||
mapped_mock.call_args_list)
|
||||
self.assertEqual([mock.call(self.context, self.node.uuid)] * 2,
|
||||
acquire_mock.call_args_list)
|
||||
process_event_call = mock.call('fail')
|
||||
self.assertEqual([process_event_call] * 2,
|
||||
self.task.process_event.call_args_list)
|
||||
|
@ -282,3 +282,9 @@ class RPCAPITestCase(base.DbTestCase):
|
||||
'call',
|
||||
version='1.21',
|
||||
driver_name='fake-driver')
|
||||
|
||||
def test_inspect_hardware(self):
|
||||
self._test_rpcapi('inspect_hardware',
|
||||
'call',
|
||||
version='1.24',
|
||||
node_id=self.fake_node['uuid'])
|
||||
|
@ -164,6 +164,32 @@ class DbNodeTestCase(base.DbTestCase):
|
||||
states.DEPLOYWAIT})
|
||||
self.assertEqual([node2.id], [r[0] for r in res])
|
||||
|
||||
@mock.patch.object(timeutils, 'utcnow')
|
||||
def test_get_nodeinfo_list_inspection(self, mock_utcnow):
|
||||
past = datetime.datetime(2000, 1, 1, 0, 0)
|
||||
next = past + datetime.timedelta(minutes=8)
|
||||
present = past + datetime.timedelta(minutes=10)
|
||||
mock_utcnow.return_value = past
|
||||
|
||||
# node with provision_updated timeout
|
||||
node1 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
|
||||
inspection_started_at=past)
|
||||
# node with None in provision_updated_at
|
||||
node2 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
|
||||
provision_state=states.INSPECTING)
|
||||
# node without timeout
|
||||
utils.create_test_node(uuid=uuidutils.generate_uuid(),
|
||||
inspection_started_at=next)
|
||||
|
||||
mock_utcnow.return_value = present
|
||||
res = self.dbapi.get_nodeinfo_list(
|
||||
filters={'inspection_started_before': 300})
|
||||
self.assertEqual([node1.id], [r[0] for r in res])
|
||||
|
||||
res = self.dbapi.get_nodeinfo_list(filters={'provision_state':
|
||||
states.INSPECTING})
|
||||
self.assertEqual([node2.id], [r[0] for r in res])
|
||||
|
||||
def test_get_node_list(self):
|
||||
uuids = []
|
||||
for i in range(1, 6):
|
||||
@ -355,6 +381,31 @@ class DbNodeTestCase(base.DbTestCase):
|
||||
node = utils.create_test_node()
|
||||
res = self.dbapi.update_node(node.id, {'extra': {'foo': 'bar'}})
|
||||
self.assertIsNone(res['provision_updated_at'])
|
||||
self.assertIsNone(res['inspection_started_at'])
|
||||
|
||||
@mock.patch.object(timeutils, 'utcnow')
|
||||
def test_update_node_inspection_started_at(self, mock_utcnow):
|
||||
mocked_time = datetime.datetime(2000, 1, 1, 0, 0)
|
||||
mock_utcnow.return_value = mocked_time
|
||||
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
|
||||
inspection_started_at=mocked_time)
|
||||
res = self.dbapi.update_node(node.id, {'provision_state': 'fake'})
|
||||
result = res['inspection_started_at']
|
||||
self.assertEqual(mocked_time,
|
||||
timeutils.normalize_time(result))
|
||||
self.assertIsNone(res['inspection_finished_at'])
|
||||
|
||||
@mock.patch.object(timeutils, 'utcnow')
|
||||
def test_update_node_inspection_finished_at(self, mock_utcnow):
|
||||
mocked_time = datetime.datetime(2000, 1, 1, 0, 0)
|
||||
mock_utcnow.return_value = mocked_time
|
||||
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
|
||||
inspection_finished_at=mocked_time)
|
||||
res = self.dbapi.update_node(node.id, {'provision_state': 'fake'})
|
||||
result = res['inspection_finished_at']
|
||||
self.assertEqual(mocked_time,
|
||||
timeutils.normalize_time(result))
|
||||
self.assertIsNone(res['inspection_started_at'])
|
||||
|
||||
def test_reserve_node(self):
|
||||
node = utils.create_test_node()
|
||||
|
@ -203,6 +203,8 @@ def get_test_node(**kw):
|
||||
'extra': kw.get('extra', {}),
|
||||
'updated_at': kw.get('updated_at'),
|
||||
'created_at': kw.get('created_at'),
|
||||
'inspection_finished_at': kw.get('inspection_finished_at'),
|
||||
'inspection_started_at': kw.get('inspection_started_at'),
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user