From f39a7c5bab1ed83dd21395dd9d05d6e869f041bb Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Fri, 7 Aug 2015 10:19:58 -0400 Subject: [PATCH] Inspector inspection fails due to node locked error This change solves an issue whereby inspection using ironic inspector could fail due to a race condition between ironic and inspector. The failure was caused by the ironic inspector driver's periodic task to check the inspection status holding the node's lock during an API request to inspector. This change only acquires the lock after the API request has completed, when the inspection has finished or failed. Change-Id: If4c5f7d4addcaf6d53073b71cd4fdb71e2a3b2bd Closes-bug: #1482646 --- ironic/drivers/modules/inspector.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/ironic/drivers/modules/inspector.py b/ironic/drivers/modules/inspector.py index 1531e04e97..44dd525d38 100644 --- a/ironic/drivers/modules/inspector.py +++ b/ironic/drivers/modules/inspector.py @@ -130,10 +130,9 @@ class Inspector(base.InspectInterface): for node_uuid, driver in node_iter: try: - # TODO(dtantsur): we need an exclusive lock only once - # inspection is finished. lock_purpose = 'checking hardware inspection status' with task_manager.acquire(context, node_uuid, + shared=True, purpose=lock_purpose) as task: _check_status(task) except (exception.NodeLocked, exception.NodeNotFound): @@ -194,14 +193,24 @@ def _check_status(task): node.uuid) return - if status.get('error'): + error = status.get('error') + finished = status.get('finished') + if not error and not finished: + return + + # If the inspection has finished or failed, we need to update the node, so + # upgrade our lock to an exclusive one. + task.upgrade_lock() + node = task.node + + if error: LOG.error(_LE('Inspection failed for node %(uuid)s ' 'with error: %(err)s'), - {'uuid': node.uuid, 'err': status['error']}) + {'uuid': node.uuid, 'err': error}) node.last_error = (_('ironic-inspector inspection failed: %s') - % status['error']) + % error) task.process_event('fail') - elif status.get('finished'): + elif finished: LOG.info(_LI('Inspection finished successfully for node %s'), node.uuid) task.process_event('done')