From a6e3a7f50cd8594520cf80fa4ef0a07646221809 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 1 Dec 2023 17:36:31 +0100 Subject: [PATCH] Handle internal server errors while configuring secure boot At least on some Dell machines, the Redfish SecureBoot resource is unavailable during configuration, GET requests return HTTP 503. Sushy does retry these, but not for long enough (the error message suggests at least 30 seconds, which would be too much to just integrate in Sushy). This change treats internal errors the same way as mismatching "enabled" value, i.e. just waits. Change-Id: I676f48de6b6195a69ea76b4e8b45a034220db2fa --- ironic/drivers/modules/redfish/management.py | 14 +++++++++++--- .../drivers/modules/redfish/test_management.py | 3 +++ .../notes/redfish-500-fea3a8f86c0aecc7.yaml | 6 ++++++ 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 releasenotes/notes/redfish-500-fea3a8f86c0aecc7.yaml diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index fd20d2b7be..0b4472f411 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -1225,8 +1225,16 @@ class RedfishManagement(base.ManagementInterface): def _wait_for_secure_boot(self, task, sb, state): # NOTE(dtantsur): at least Dell machines change secure boot status via # a BIOS configuration job. A reboot is needed to apply it. - sb.refresh(force=True) - if sb.enabled == state: + + def _try_refresh(): + try: + sb.refresh(force=True) + except sushy.exceptions.ServerSideError: + return False # sushy already does logging, just return + else: + return True + + if _try_refresh() and sb.enabled == state: return LOG.info('Rebooting node %(node)s to change secure boot state to ' @@ -1244,7 +1252,7 @@ class RedfishManagement(base.ManagementInterface): {'node': task.node.uuid, 'value': state, 'current': sb.enabled}) time.sleep(BOOT_MODE_CONFIG_INTERVAL) - sb.refresh(force=True) + _try_refresh() if sb.enabled != state: msg = (_('Timeout reached while waiting for secure boot state ' diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py index 6a6778ea46..02a0ac7a9c 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_management.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py @@ -1745,6 +1745,9 @@ class RedfishManagementTestCase(db_base.DbTestCase): def side_effect(force): nonlocal attempts attempts -= 1 + if attempts >= 2: + raise sushy.exceptions.ServerSideError( + "POST", 'img-url', mock.MagicMock()) if attempts <= 0: fake_sb.enabled = True diff --git a/releasenotes/notes/redfish-500-fea3a8f86c0aecc7.yaml b/releasenotes/notes/redfish-500-fea3a8f86c0aecc7.yaml new file mode 100644 index 0000000000..078ce6ec48 --- /dev/null +++ b/releasenotes/notes/redfish-500-fea3a8f86c0aecc7.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + When configuring secure boot via Redfish, internal server errors are now + retried for a longer period than by default, accounting for the SecureBoot + resource unavailability during configuration on some hardware.