Fix two severe errors in the firmware caching code
First, it tries to create components even if the current version is not
known and fails with a database constraint error (because the initial
version cannot be NULL). Can be reproduced with sushy-tools before
37f118237a
Second, unexpected exceptions are not handled in the caching code, so
any of them will cause the node to get stuck in cleaning forever.
On top of that, the caching code is missing a metrics decorator.
This change does not update any unit tests because none currently exist.
Change-Id: Iaa242ca6aa6138fcdaaf63b763708e2f1e559cb0
This commit is contained in:
parent
dcea5f5a1d
commit
23745d97fe
@ -1449,8 +1449,8 @@ def node_cache_bios_settings(task, node):
|
||||
except exception.UnsupportedDriverExtension:
|
||||
LOG.warning('BIOS settings are not supported for node %s, '
|
||||
'skipping', node.uuid)
|
||||
# TODO(zshi) remove this check when classic drivers are removed
|
||||
except Exception:
|
||||
# NOTE(dtantsur): the caller expects this function to never fail
|
||||
msg = (_('Caching of bios settings failed on node %(node)s.')
|
||||
% {'node': node.uuid})
|
||||
LOG.exception(msg)
|
||||
@ -1474,6 +1474,7 @@ def node_cache_vendor(task):
|
||||
except exception.UnsupportedDriverExtension:
|
||||
return
|
||||
except Exception as exc:
|
||||
# NOTE(dtantsur): the caller expects this function to never fail
|
||||
LOG.warning('Unexpected exception when trying to detect vendor '
|
||||
'for node %(node)s. %(class)s: %(exc)s',
|
||||
{'node': task.node.uuid,
|
||||
@ -1840,6 +1841,10 @@ def node_cache_firmware_components(task):
|
||||
except exception.UnsupportedDriverExtension:
|
||||
LOG.warning('Firmware Components are not supported for node %s, '
|
||||
'skipping', task.node.uuid)
|
||||
except Exception:
|
||||
# NOTE(dtantsur): the caller expects this function to never fail
|
||||
LOG.exception('Caching of firmware components failed on node %s',
|
||||
task.node.uuid)
|
||||
|
||||
|
||||
def run_node_action(task, call, error_msg, success_msg=None, **kwargs):
|
||||
|
@ -71,6 +71,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
||||
"""
|
||||
redfish_utils.parse_driver_info(task.node)
|
||||
|
||||
@METRICS.timer('RedfishFirmware.cache_firmware_components')
|
||||
def cache_firmware_components(self, task):
|
||||
"""Store or update Firmware Components on the given node.
|
||||
|
||||
@ -90,25 +91,27 @@ class RedfishFirmware(base.FirmwareInterface):
|
||||
|
||||
system = redfish_utils.get_system(task.node)
|
||||
|
||||
bios_fw = {'component': 'bios',
|
||||
'current_version': system.bios_version}
|
||||
settings.append(bios_fw)
|
||||
if system.bios_version:
|
||||
bios_fw = {'component': 'bios',
|
||||
'current_version': system.bios_version}
|
||||
settings.append(bios_fw)
|
||||
|
||||
# NOTE(iurygregory): normally we only relay on the System to
|
||||
# perform actions, but to retrieve the BMC Firmware we need to
|
||||
# access the Manager.
|
||||
try:
|
||||
manager = redfish_utils.get_manager(task.node, system)
|
||||
bmc_fw = {'component': 'bmc',
|
||||
'current_version': manager.firmware_version}
|
||||
settings.append(bmc_fw)
|
||||
if manager.firmware_version:
|
||||
bmc_fw = {'component': 'bmc',
|
||||
'current_version': manager.firmware_version}
|
||||
settings.append(bmc_fw)
|
||||
except exception.RedfishError:
|
||||
LOG.warning('No manager available to retrieve Firmware '
|
||||
'from the bmc of node %s', task.node.uuid)
|
||||
|
||||
if not settings:
|
||||
error_msg = (_('Cannot retrieve firmware for node %s.')
|
||||
% task.node.uuid)
|
||||
error_msg = (_('Cannot retrieve firmware for node %s: no '
|
||||
'supported components') % task.node.uuid)
|
||||
LOG.error(error_msg)
|
||||
raise exception.UnsupportedDriverExtension(error_msg)
|
||||
|
||||
|
8
releasenotes/notes/firmware-fail-c6f6c70220373033.yaml
Normal file
8
releasenotes/notes/firmware-fail-c6f6c70220373033.yaml
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Nodes no longer get stuck in cleaning when the firmware components caching
|
||||
code raises an unexpected exception.
|
||||
- |
|
||||
Prevents a database constraints error on caching firmware components
|
||||
when a supported component does not have the current version.
|
Loading…
Reference in New Issue
Block a user