Merge "Generic changes for Node Inspection"

This commit is contained in:
Jenkins 2015-03-02 19:38:26 +00:00 committed by Gerrit Code Review
commit f203681441
15 changed files with 711 additions and 46 deletions

View File

@ -15,7 +15,7 @@
#rpc_conn_pool_size=30
# Qpid broker hostname. (string value)
#qpid_hostname=localhost
#qpid_hostname=ironic
# Qpid broker port. (integer value)
#qpid_port=5672
@ -76,7 +76,7 @@
# The RabbitMQ broker address where a single node is used.
# (string value)
#rabbit_host=localhost
#rabbit_host=ironic
# The RabbitMQ broker port where a single node is used.
# (integer value)
@ -604,6 +604,10 @@
# when configdrive_use_swift is True. (string value)
#configdrive_swift_container=ironic_configdrive_container
# Timeout (seconds) for waiting for node inspection. 0 -
# unlimited. (integer value) (integer value)
#inspect_timeout=1800
[console]
@ -957,7 +961,7 @@
# Timeout (in seconds) for iRMC operations (integer value)
#client_timeout=60
# Sensor Data retrieval method either "ipmitool" or "scci"
# Sensor data retrieval method, either "ipmitool" or "scci"
# (string value)
#sensor_method=ipmitool

View File

@ -60,7 +60,8 @@ MIN_VER_STR = '1.1'
# v1.3: Add node.driver_internal_info
# v1.4: Add MANAGEABLE state
# v1.5: Add logical node names
MAX_VER_STR = '1.5'
# v1.6: Add INSPECT* states
MAX_VER_STR = '1.6'
MIN_VER = base.Version({base.Version.string: MIN_VER_STR},

View File

@ -72,9 +72,11 @@ def hide_driver_internal_info(obj):
def check_allow_management_verbs(verb):
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
# and out of that state. Reject requests to do this in older versions
if (pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
raise exception.NotAcceptable()
if ((pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
or (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect'])):
raise exception.NotAcceptable()
def allow_logical_names():
@ -127,7 +129,8 @@ class NodePatchType(types.JsonPatchType):
'/power_state', '/provision_state', '/reservation',
'/target_power_state', '/target_provision_state',
'/provision_updated_at', '/maintenance_reason',
'/driver_internal_info']
'/driver_internal_info', '/inspection_finished_at',
'/inspection_started_at', ]
@staticmethod
def mandatory_attrs():
@ -439,6 +442,9 @@ class NodeStatesController(rest.RestController):
elif target == ir_states.DELETED:
pecan.request.rpcapi.do_node_tear_down(
pecan.request.context, rpc_node.uuid, topic)
elif target == ir_states.VERBS['inspect']:
pecan.request.rpcapi.inspect_hardware(
pecan.request.context, rpc_node.uuid, topic=topic)
elif target in (
ir_states.VERBS['manage'], ir_states.VERBS['provide']):
pecan.request.rpcapi.do_provisioning_action(
@ -511,6 +517,12 @@ class Node(base.APIBase):
provision_updated_at = datetime.datetime
"""The UTC date and time of the last provision state change"""
inspection_finished_at = datetime.datetime
"""The UTC date and time when the last inspection finished successfully."""
inspection_started_at = datetime.datetime
"""The UTC date and time of the hardware when inspection was started"""
maintenance = types.boolean
"""Indicates whether the node is in maintenance mode."""
@ -636,6 +648,7 @@ class Node(base.APIBase):
'cpus': '1'}, updated_at=time, created_at=time,
provision_updated_at=time, instance_info={},
maintenance=False, maintenance_reason=None,
inspection_finished_at=None, inspection_started_at=time,
console_enabled=False)
# NOTE(matty_dubs): The chassis_uuid getter() is based on the
# _chassis_uuid variable:

View File

@ -163,8 +163,12 @@ conductor_opts = [
default='ironic_configdrive_container',
help='Name of the Swift container to store config drive '
'data. Used when configdrive_use_swift is True.'),
]
cfg.IntOpt('inspect_timeout',
default=1800,
help='Timeout (seconds) for waiting for node inspection. '
'0 - unlimited. (integer value)'),
]
CONF = cfg.CONF
CONF.register_opts(conductor_opts, 'conductor')
@ -173,7 +177,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
"""Ironic Conductor manager main class."""
# NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's.
RPC_API_VERSION = '1.23'
RPC_API_VERSION = '1.24'
target = messaging.Target(version=RPC_API_VERSION)
@ -921,40 +925,11 @@ class ConductorManager(periodic_task.PeriodicTasks):
'provision_state': states.DEPLOYWAIT,
'maintenance': False,
'provisioned_before': callback_timeout}
columns = ['uuid', 'driver']
node_list = self.dbapi.get_nodeinfo_list(
columns=columns,
filters=filters,
sort_key='provision_updated_at',
sort_dir='asc')
workers_count = 0
for node_uuid, driver in node_list:
if not self._mapped_to_this_conductor(node_uuid, driver):
continue
try:
with task_manager.acquire(context, node_uuid) as task:
# NOTE(comstud): Recheck maintenance and provision_state
# now that we have the lock. We don't need to re-check
# updated_at unless we expect the state to have flipped
# to something else and then back to DEPLOYWAIT between
# the call to get_nodeinfo_list and now.
if (task.node.maintenance or
task.node.provision_state != states.DEPLOYWAIT):
continue
# timeout has been reached - fail the deploy
task.process_event('fail',
callback=self._spawn_worker,
call_args=(utils.cleanup_after_timeout,
task),
err_handler=provisioning_error_handler)
except exception.NoFreeConductorWorker:
break
except (exception.NodeLocked, exception.NodeNotFound):
continue
workers_count += 1
if workers_count == CONF.conductor.periodic_max_workers:
break
sort_key = 'provision_updated_at'
callback_method = utils.cleanup_after_timeout
err_handler = provisioning_error_handler
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
sort_key, callback_method, err_handler)
def _do_takeover(self, task):
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
@ -1425,6 +1400,135 @@ class ConductorManager(periodic_task.PeriodicTasks):
driver=task.node.driver, extension='management')
return task.driver.management.get_supported_boot_devices()
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked,
exception.HardwareInspectionFailure,
exception.UnsupportedDriverExtension)
def inspect_hardware(self, context, node_id):
"""Inspect hardware to obtain hardware properties.
Initiate the inspection of a node. Validations are done
synchronously and the actual inspection work is performed in
background (asynchronously).
:param context: request context.
:param node_id: node id or uuid.
:raises: NodeLocked if node is locked by another conductor.
:raises: UnsupportedDriverExtension if the node's driver doesn't
support inspect.
:raises: NoFreeConductorWorker when there is no free worker to start
async task
:raises: HardwareInspectionFailure when unable to get
essential scheduling properties from hardware.
"""
LOG.debug('RPC inspect_hardware called for node %s', node_id)
with task_manager.acquire(context, node_id, shared=False) as task:
node = task.node
if not getattr(task.driver, 'inspect', None):
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='inspect')
try:
task.driver.power.validate(task)
task.driver.inspect.validate(task)
except (exception.InvalidParameterValue,
exception.MissingParameterValue) as e:
error = (_("RPC inspect_hardware failed to validate "
"inspection or power info. Error: %(msg)s")
% {'msg': e})
raise exception.HardwareInspectionFailure(error=error)
try:
task.process_event('inspect',
callback=self._spawn_worker,
call_args=(_do_inspect_hardware, task,
self.conductor.id),
err_handler=provisioning_error_handler)
except exception.InvalidState:
error = (_("Inspection is not possible while node "
"%(node)s is in state %(state)s")
% {'node': node.uuid,
'state': node.provision_state})
raise exception.HardwareInspectionFailure(error=error)
@periodic_task.periodic_task(
spacing=CONF.conductor.check_provision_state_interval)
def _check_inspect_timeouts(self, context):
"""Periodically checks inspect_timeout and fails upon reaching it.
:param: context: request context
"""
callback_timeout = CONF.conductor.inspect_timeout
if not callback_timeout:
return
filters = {'reserved': False,
'provision_state': states.INSPECTING,
'inspection_started_before': callback_timeout}
sort_key = 'inspection_started_at'
last_error = _("timeout reached while waiting for callback")
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
sort_key, last_error=last_error)
def _fail_if_timeout_reached(self, context, filters, provision_state,
sort_key, callback_method=None,
err_handler=None, last_error=None):
"""Checks if the async(background) process has reached timeout.
:param: context: request context
:param: filters: criteria (as a dictionary) to get the desired
list of nodes.
:param: provision_state: provision_state that the node is in,
for the provisioning activity to have failed.
:param: sort_key: the nodes are sorted based on this key.
:param: callback_method: the callback method to be invoked in a
spawned thread, for a failed node. This
method must take a :class:`TaskManager` as
the first (and only required) parameter.
:param: err_handler: the error handler to be invoked if an error.
occurs trying to spawn a thread for a failed node.
:param: last_error: the error message to be updated in node.last_error
"""
columns = ['uuid', 'driver']
node_list = self.dbapi.get_nodeinfo_list(columns=columns,
filters=filters,
sort_key=sort_key,
sort_dir='asc')
workers_count = 0
for node_uuid, driver in node_list:
if not self._mapped_to_this_conductor(node_uuid, driver):
continue
try:
with task_manager.acquire(context, node_uuid) as task:
if (task.node.maintenance or
task.node.provision_state != provision_state):
continue
# timeout has been reached - process the event 'fail'
if callback_method:
task.process_event('fail',
callback=self._spawn_worker,
call_args=(callback_method, task),
err_handler=err_handler)
else:
if not last_error:
last_error = _("timeout reached while waiting "
"for callback")
task.node.last_error = last_error
task.process_event('fail')
except exception.NoFreeConductorWorker:
break
except (exception.NodeLocked, exception.NodeNotFound):
continue
workers_count += 1
if workers_count >= CONF.conductor.periodic_max_workers:
break
def get_vendor_passthru_metadata(route_dict):
d = {}
@ -1696,3 +1800,37 @@ def do_sync_power_state(task, count):
node.save()
return count
def _do_inspect_hardware(task, conductor_id):
"""Prepare the environment and inspect a node."""
node = task.node
def handle_failure(e):
# NOTE(deva): there is no need to clear conductor_affinity
node.last_error = e
task.process_event('fail')
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
{'node': node.uuid, 'err': e})
try:
new_state = task.driver.inspect.inspect_hardware(task)
except Exception as e:
with excutils.save_and_reraise_exception():
error = str(e)
handle_failure(error)
# Update conductor_affinity to reference this conductor's ID
# since there may be local persistent state
node.conductor_affinity = conductor_id
if new_state == states.MANAGEABLE:
task.process_event('done')
LOG.info(_LI('Successfully inspected node %(node)s')
% {'node': node.uuid})
elif new_state != states.INSPECTING:
error = (_("Driver returned unexpected state in inspection"
"%(state)s") % {'state': new_state})
handle_failure(error)
raise exception.HardwareInspectionFailure(error=error)

View File

@ -66,11 +66,12 @@ class ConductorAPI(object):
| get_driver_vendor_passthru_methods
| 1.22 - Added configdrive parameter to do_node_deploy.
| 1.23 - Added do_provisioning_action
| 1.24 - Added inspect_hardware method
"""
# NOTE(rloo): This must be in sync with manager.ConductorManager's.
RPC_API_VERSION = '1.23'
RPC_API_VERSION = '1.24'
def __init__(self, topic=None):
super(ConductorAPI, self).__init__()
@ -483,3 +484,20 @@ class ConductorAPI(object):
cctxt = self.client.prepare(topic=topic or self.topic, version='1.17')
return cctxt.call(context, 'get_supported_boot_devices',
node_id=node_id)
def inspect_hardware(self, context, node_id, topic=None):
"""Signals the conductor service to perform hardware introspection.
:param context: request context.
:param node_id: node id or uuid.
:param topic: RPC topic. Defaults to self.topic.
:raises: NodeLocked if node is locked by another conductor.
:raises: HardwareInspectionFailure
:raises: NoFreeConductorWorker when there is no free worker to start
async task.
:raises: UnsupportedDriverExtension if the node's driver doesn't
support inspection.
"""
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')
return cctxt.call(context, 'inspect_hardware', node_id=node_id)

View File

@ -0,0 +1,40 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""add inspection_started_at and inspection_finished_at
Revision ID: 1e1d5ace7dc6
Revises: 3ae36a5f5131
Create Date: 2015-02-26 10:46:46.861927
"""
# revision identifiers, used by Alembic.
revision = '1e1d5ace7dc6'
down_revision = '3ae36a5f5131'
from alembic import op
import sqlalchemy as sa
def upgrade():
op.add_column('nodes', sa.Column('inspection_started_at',
sa.DateTime(),
nullable=True))
op.add_column('nodes', sa.Column('inspection_finished_at',
sa.DateTime(),
nullable=True))
def downgrade():
op.drop_column('nodes', 'inspection_started_at')
op.drop_column('nodes', 'inspection_finished_at')

View File

@ -191,6 +191,11 @@ class Connection(api.Connection):
limit = timeutils.utcnow() - datetime.timedelta(
seconds=filters['provisioned_before'])
query = query.filter(models.Node.provision_updated_at < limit)
if 'inspection_started_before' in filters:
limit = ((timeutils.utcnow()) -
(datetime.timedelta(
seconds=filters['inspection_started_before'])))
query = query.filter(models.Node.inspection_started_at < limit)
return query
@ -371,6 +376,17 @@ class Connection(api.Connection):
if 'provision_state' in values:
values['provision_updated_at'] = timeutils.utcnow()
if values['provision_state'] == states.INSPECTING:
values['inspection_started_at'] = timeutils.utcnow()
values['inspection_finished_at'] = None
if (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.MANAGEABLE):
values['inspection_finished_at'] = timeutils.utcnow()
values['inspection_started_at'] = None
elif (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.INSPECTFAIL):
values['inspection_started_at'] = None
ref.update(values)
return ref

View File

@ -185,6 +185,8 @@ class Node(Base):
maintenance = Column(Boolean, default=False)
maintenance_reason = Column(Text, nullable=True)
console_enabled = Column(Boolean, default=False)
inspection_finished_at = Column(DateTime, nullable=True)
inspection_started_at = Column(DateTime, nullable=True)
extra = Column(JSONEncodedDict)

View File

@ -181,4 +181,4 @@ class FakeInspect(base.InspectInterface):
pass
def inspect_hardware(self, task):
pass
return states.MANAGEABLE

View File

@ -79,6 +79,9 @@ class Node(base.IronicObject):
# that started but failed to finish.
'last_error': obj_utils.str_or_none,
'inspection_finished_at': obj_utils.datetime_or_str_or_none,
'inspection_started_at': obj_utils.datetime_or_str_or_none,
'extra': obj_utils.dict_or_none,
}

View File

@ -199,6 +199,8 @@ class TestListNodes(test_api_base.FunctionalTest):
self.assertIn('reservation', data)
self.assertIn('maintenance_reason', data)
self.assertIn('name', data)
self.assertIn('inspection_finished_at', data)
self.assertIn('inspection_started_at', data)
# never expose the chassis_id
self.assertNotIn('chassis_id', data)
@ -219,6 +221,8 @@ class TestListNodes(test_api_base.FunctionalTest):
self.assertIn('target_power_state', data['nodes'][0])
self.assertIn('target_provision_state', data['nodes'][0])
self.assertIn('provision_updated_at', data['nodes'][0])
self.assertIn('inspection_finished_at', data['nodes'][0])
self.assertIn('inspection_started_at', data['nodes'][0])
# never expose the chassis_id
self.assertNotIn('chassis_id', data['nodes'][0])
@ -1504,6 +1508,9 @@ class TestPut(test_api_base.FunctionalTest):
p = mock.patch.object(rpcapi.ConductorAPI, 'do_node_tear_down')
self.mock_dntd = p.start()
self.addCleanup(p.stop)
p = mock.patch.object(rpcapi.ConductorAPI, 'inspect_hardware')
self.mock_dnih = p.start()
self.addCleanup(p.stop)
def test_power_state(self):
response = self.put_json('/nodes/%s/states/power' % self.node.uuid,
@ -1695,6 +1702,17 @@ class TestPut(test_api_base.FunctionalTest):
states.VERBS['provide'],
'test-topic')
def test_inspect_already_in_progress(self):
node = self.node
node.provision_state = states.INSPECTING
node.target_provision_state = states.MANAGEABLE
node.reservation = 'fake-host'
node.save()
ret = self.put_json('/nodes/%s/states/provision' % node.uuid,
{'target': states.MANAGEABLE},
expect_errors=True)
self.assertEqual(409, ret.status_code) # Conflict
@mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action')
def test_manage_from_available(self, mock_dpa):
self.node.provision_state = states.AVAILABLE

View File

@ -3070,3 +3070,356 @@ class StoreConfigDriveTestCase(tests_base.TestCase):
mock_swift.return_value.get_temp_url.assert_called_once_with(
container_name, expected_obj_name, timeout)
self.assertEqual(expected_instance_info, self.node.instance_info)
@_mock_record_keepalive
class NodeInspectHardware(_ServiceSetUpMixin,
tests_db_base.DbTestCase):
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_ok(self, mock_inspect):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task, self.service.conductor.id)
node.refresh()
self.assertEqual(states.MANAGEABLE, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertIsNone(node.last_error)
mock_inspect.assert_called_once_with(mock.ANY)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_return_inspecting(self, mock_inspect):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.INSPECTING
manager._do_inspect_hardware(task, self.service.conductor.id)
node.refresh()
self.assertEqual(states.INSPECTING, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
self.assertIsNone(node.last_error)
mock_inspect.assert_called_once_with(mock.ANY)
@mock.patch.object(manager, 'LOG')
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_return_other_state(self, mock_inspect, log_mock):
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = None
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
mock_inspect.assert_called_once_with(mock.ANY)
self.assertTrue(log_mock.error.called)
def test__check_inspect_timeouts(self):
self._start_service()
CONF.set_override('inspect_timeout', 1, group='conductor')
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.INSPECTING,
target_provision_state=states.MANAGEABLE,
provision_updated_at=datetime.datetime(2000, 1, 1, 0, 0),
inspection_started_at=datetime.datetime(2000, 1, 1, 0, 0))
self.service._check_inspect_timeouts(self.context)
self.service._worker_pool.waitall()
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_inspect_hardware_worker_pool_full(self, mock_spawn):
prv_state = states.MANAGEABLE
tgt_prv_state = states.NOSTATE
node = obj_utils.create_test_node(self.context,
provision_state=prv_state,
target_provision_state=tgt_prv_state,
last_error=None, driver='fake')
self._start_service()
mock_spawn.side_effect = exception.NoFreeConductorWorker()
exc = self.assertRaises(messaging.rpc.ExpectedException,
self.service.inspect_hardware,
self.context, node.uuid)
# Compare true exception hidden by @messaging.expected_exceptions
self.assertEqual(exception.NoFreeConductorWorker, exc.exc_info[0])
self.service._worker_pool.waitall()
node.refresh()
# Make sure things were rolled back
self.assertEqual(prv_state, node.provision_state)
self.assertEqual(tgt_prv_state, node.target_provision_state)
self.assertIsNotNone(node.last_error)
# Verify reservation has been cleared.
self.assertIsNone(node.reservation)
def _test_inspect_hardware_validate_fail(self, mock_validate):
mock_validate.side_effect = exception.InvalidParameterValue('error')
node = obj_utils.create_test_node(self.context, driver='fake')
exc = self.assertRaises(messaging.rpc.ExpectedException,
self.service.inspect_hardware,
self.context, node.uuid)
# Compare true exception hidden by @messaging.expected_exceptions
self.assertEqual(exception.HardwareInspectionFailure, exc.exc_info[0])
# This is a sync operation last_error should be None.
self.assertIsNone(node.last_error)
# Verify reservation has been cleared.
self.assertIsNone(node.reservation)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.validate')
def test_inspect_hardware_validate_fail(self, mock_validate):
self._test_inspect_hardware_validate_fail(mock_validate)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate')
def test_inspect_hardware_power_validate_fail(self, mock_validate):
self._test_inspect_hardware_validate_fail(mock_validate)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.inspect_hardware')
def test_inspect_hardware_raises_error(self, mock_inspect):
self._start_service()
mock_inspect.side_effect = exception.HardwareInspectionFailure('test')
state = states.MANAGEABLE
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.INSPECTING,
target_provision_state=state)
task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
self.assertTrue(mock_inspect.called)
@mock.patch.object(task_manager, 'acquire')
@mock.patch.object(manager.ConductorManager, '_mapped_to_this_conductor')
@mock.patch.object(dbapi.IMPL, 'get_nodeinfo_list')
class ManagerCheckInspectTimeoutsTestCase(_CommonMixIn,
tests_db_base.DbTestCase):
def setUp(self):
super(ManagerCheckInspectTimeoutsTestCase, self).setUp()
self.config(inspect_timeout=300, group='conductor')
self.service = manager.ConductorManager('hostname', 'test-topic')
self.service.dbapi = self.dbapi
self.node = self._create_node(provision_state=states.INSPECTING,
target_provision_state=states.MANAGEABLE)
self.task = self._create_task(node=self.node)
self.node2 = self._create_node(provision_state=states.INSPECTING,
target_provision_state=states.MANAGEABLE)
self.task2 = self._create_task(node=self.node2)
self.filters = {'reserved': False,
'inspection_started_before': 300,
'provision_state': states.INSPECTING}
self.columns = ['uuid', 'driver']
def _assert_get_nodeinfo_args(self, get_nodeinfo_mock):
get_nodeinfo_mock.assert_called_once_with(sort_dir='asc',
columns=self.columns, filters=self.filters,
sort_key='inspection_started_at')
def test__check_inspect_timeouts_disabled(self, get_nodeinfo_mock,
mapped_mock, acquire_mock):
self.config(inspect_timeout=0, group='conductor')
self.service._check_inspect_timeouts(self.context)
self.assertFalse(get_nodeinfo_mock.called)
self.assertFalse(mapped_mock.called)
self.assertFalse(acquire_mock.called)
def test__check_inspect_timeouts_not_mapped(self, get_nodeinfo_mock,
mapped_mock, acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
mapped_mock.return_value = False
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver)
self.assertFalse(acquire_mock.called)
def test__check_inspect_timeout(self, get_nodeinfo_mock,
mapped_mock, acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(self.task)
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
mapped_mock.assert_called_once_with(self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context, self.node.uuid)
self.task.process_event.assert_called_with('fail')
def test__check_inspect_timeouts_acquire_node_disappears(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
mapped_mock.return_value = True
acquire_mock.side_effect = exception.NodeNotFound(node='fake')
# Exception eaten
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
mapped_mock.assert_called_once_with(
self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context,
self.node.uuid)
self.assertFalse(self.task.process_event.called)
def test__check_inspect_timeouts_acquire_node_locked(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
mapped_mock.return_value = True
acquire_mock.side_effect = exception.NodeLocked(node='fake',
host='fake')
# Exception eaten
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
mapped_mock.assert_called_once_with(
self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context,
self.node.uuid)
self.assertFalse(self.task.process_event.called)
def test__check_inspect_timeouts_no_acquire_after_lock(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
task = self._create_task(
node_attrs=dict(provision_state=states.AVAILABLE,
uuid=self.node.uuid))
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response()
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(task)
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
mapped_mock.assert_called_once_with(
self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context,
self.node.uuid)
self.assertFalse(task.process_event.called)
def test__check_inspect_timeouts_to_maintenance_after_lock(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
task = self._create_task(
node_attrs=dict(provision_state=states.INSPECTING,
target_provision_state=states.MANAGEABLE,
maintenance=True,
uuid=self.node.uuid))
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
[task.node, self.node2])
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(
[task, self.task2])
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
self.assertEqual([mock.call(self.node.uuid, task.node.driver),
mock.call(self.node2.uuid, self.node2.driver)],
mapped_mock.call_args_list)
self.assertEqual([mock.call(self.context, self.node.uuid),
mock.call(self.context, self.node2.uuid)],
acquire_mock.call_args_list)
# First node skipped
self.assertFalse(task.process_event.called)
# Second node spawned
self.task2.process_event.assert_called_with('fail')
def test__check_inspect_timeouts_exiting_no_worker_avail(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
[self.node, self.node2])
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(
[(self.task, exception.NoFreeConductorWorker()), self.task2])
# Exception should be nuked
self.service._check_inspect_timeouts(self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
# mapped should be only called for the first node as we should
# have exited the loop early due to NoFreeConductorWorker
mapped_mock.assert_called_once_with(
self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context,
self.node.uuid)
self.task.process_event.assert_called_with('fail')
def test__check_inspect_timeouts_exit_with_other_exception(self,
get_nodeinfo_mock,
mapped_mock,
acquire_mock):
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
[self.node, self.node2])
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(
[(self.task, exception.IronicException('foo')), self.task2])
# Should re-raise
self.assertRaises(exception.IronicException,
self.service._check_inspect_timeouts,
self.context)
self._assert_get_nodeinfo_args(get_nodeinfo_mock)
# mapped should be only called for the first node as we should
# have exited the loop early due to unknown exception
mapped_mock.assert_called_once_with(
self.node.uuid, self.node.driver)
acquire_mock.assert_called_once_with(self.context,
self.node.uuid)
self.task.process_event.assert_called_with('fail')
def test__check_inspect_timeouts_worker_limit(self, get_nodeinfo_mock,
mapped_mock, acquire_mock):
self.config(periodic_max_workers=2, group='conductor')
# Use the same nodes/tasks to make life easier in the tests
# here
get_nodeinfo_mock.return_value = self._get_nodeinfo_list_response(
[self.node] * 3)
mapped_mock.return_value = True
acquire_mock.side_effect = self._get_acquire_side_effect(
[self.task] * 3)
self.service._check_inspect_timeouts(self.context)
# Should only have ran 2.
self.assertEqual([mock.call(self.node.uuid, self.node.driver)] * 2,
mapped_mock.call_args_list)
self.assertEqual([mock.call(self.context, self.node.uuid)] * 2,
acquire_mock.call_args_list)
process_event_call = mock.call('fail')
self.assertEqual([process_event_call] * 2,
self.task.process_event.call_args_list)

View File

@ -282,3 +282,9 @@ class RPCAPITestCase(base.DbTestCase):
'call',
version='1.21',
driver_name='fake-driver')
def test_inspect_hardware(self):
self._test_rpcapi('inspect_hardware',
'call',
version='1.24',
node_id=self.fake_node['uuid'])

View File

@ -164,6 +164,32 @@ class DbNodeTestCase(base.DbTestCase):
states.DEPLOYWAIT})
self.assertEqual([node2.id], [r[0] for r in res])
@mock.patch.object(timeutils, 'utcnow')
def test_get_nodeinfo_list_inspection(self, mock_utcnow):
past = datetime.datetime(2000, 1, 1, 0, 0)
next = past + datetime.timedelta(minutes=8)
present = past + datetime.timedelta(minutes=10)
mock_utcnow.return_value = past
# node with provision_updated timeout
node1 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
inspection_started_at=past)
# node with None in provision_updated_at
node2 = utils.create_test_node(uuid=uuidutils.generate_uuid(),
provision_state=states.INSPECTING)
# node without timeout
utils.create_test_node(uuid=uuidutils.generate_uuid(),
inspection_started_at=next)
mock_utcnow.return_value = present
res = self.dbapi.get_nodeinfo_list(
filters={'inspection_started_before': 300})
self.assertEqual([node1.id], [r[0] for r in res])
res = self.dbapi.get_nodeinfo_list(filters={'provision_state':
states.INSPECTING})
self.assertEqual([node2.id], [r[0] for r in res])
def test_get_node_list(self):
uuids = []
for i in range(1, 6):
@ -355,6 +381,31 @@ class DbNodeTestCase(base.DbTestCase):
node = utils.create_test_node()
res = self.dbapi.update_node(node.id, {'extra': {'foo': 'bar'}})
self.assertIsNone(res['provision_updated_at'])
self.assertIsNone(res['inspection_started_at'])
@mock.patch.object(timeutils, 'utcnow')
def test_update_node_inspection_started_at(self, mock_utcnow):
mocked_time = datetime.datetime(2000, 1, 1, 0, 0)
mock_utcnow.return_value = mocked_time
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
inspection_started_at=mocked_time)
res = self.dbapi.update_node(node.id, {'provision_state': 'fake'})
result = res['inspection_started_at']
self.assertEqual(mocked_time,
timeutils.normalize_time(result))
self.assertIsNone(res['inspection_finished_at'])
@mock.patch.object(timeutils, 'utcnow')
def test_update_node_inspection_finished_at(self, mock_utcnow):
mocked_time = datetime.datetime(2000, 1, 1, 0, 0)
mock_utcnow.return_value = mocked_time
node = utils.create_test_node(uuid=uuidutils.generate_uuid(),
inspection_finished_at=mocked_time)
res = self.dbapi.update_node(node.id, {'provision_state': 'fake'})
result = res['inspection_finished_at']
self.assertEqual(mocked_time,
timeutils.normalize_time(result))
self.assertIsNone(res['inspection_started_at'])
def test_reserve_node(self):
node = utils.create_test_node()

View File

@ -203,6 +203,8 @@ def get_test_node(**kw):
'extra': kw.get('extra', {}),
'updated_at': kw.get('updated_at'),
'created_at': kw.get('created_at'),
'inspection_finished_at': kw.get('inspection_finished_at'),
'inspection_started_at': kw.get('inspection_started_at'),
}