diff --git a/ironic/drivers/modules/redfish/utils.py b/ironic/drivers/modules/redfish/utils.py index a590494ae1..eed3911789 100644 --- a/ironic/drivers/modules/redfish/utils.py +++ b/ironic/drivers/modules/redfish/utils.py @@ -240,6 +240,16 @@ class SessionCache(object): # NOTE(etingof): perhaps this session token is no good if isinstance(exc_val, sushy.exceptions.ConnectionError): self.__class__._sessions.pop(self._session_key, None) + # NOTE(TheJulia): A hard access error has surfaced, we + # likely need to eliminate the session. + if isinstance(exc_val, sushy.exceptions.AccessError): + self.__class__._sessions.pop(self._session_key, None) + # NOTE(TheJulia): Something very bad has happened, such + # as the session is out of date, and refresh of the SessionService + # failed resulting in an AttributeError surfacing. + # https://storyboard.openstack.org/#!/story/2009719 + if isinstance(exc_val, AttributeError): + self.__class__._sessions.pop(self._session_key, None) @classmethod def _expire_oldest_session(cls): @@ -364,6 +374,23 @@ def _get_connection(node, lambda_fun, *args): 'auth_type': driver_info['auth_type'], 'node': node.uuid, 'error': e}) raise exception.RedfishConnectionError(node=node.uuid, error=e) + except sushy.exceptions.AccessError as e: + LOG.warning('For node %(node)s, we receieved an authentication ' + 'access error from address %(address)s with auth_type ' + '%(auth_type)s. The client will not be re-used upon ' + 'the next re-attempt. Please ensure your using the ' + 'correct credentials. Error: %(error)s', + {'address': driver_info['address'], + 'auth_type': driver_info['auth_type'], + 'node': node.uuid, 'error': e}) + raise exception.RedfishError(node=node.uuid, error=e) + except AttributeError as e: + LOG.warning('For node %(node)s, we receieved at AttributeError ' + 'when attempting to utilize the client. A new ' + 'client session shall be used upon the next attempt.' + 'Error: %(error)s', + {'node': node.uuid, 'error': e}) + raise exception.RedfishError(node=node.uuid, error=e) try: return _get_cached_connection(lambda_fun, *args) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_utils.py b/ironic/tests/unit/drivers/modules/redfish/test_utils.py index 9bf59532fb..ca8aba9da8 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_utils.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_utils.py @@ -426,8 +426,9 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase): # Redfish specific configurations self.config(connection_attempts=3, group='redfish') - fake_conn = mock_sushy.return_value + fake_conn = mock.Mock() fake_conn.get_system.side_effect = sushy.exceptions.ConnectionError() + mock_sushy.return_value = fake_conn self.assertRaises(exception.RedfishConnectionError, redfish_utils.get_system, self.node) @@ -477,3 +478,72 @@ class RedfishUtilsSystemTestCase(db_base.DbTestCase): redfish_utils.wait_until_get_system_ready, self.node) self.assertEqual(fake_conn.get_system.call_count, 2) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(sushy, 'Sushy', autospec=True) + @mock.patch('ironic.drivers.modules.redfish.utils.' + 'SessionCache._sessions', {}) + def test_get_system_resource_access_error_retry(self, mock_sushy): + + # Sushy access errors HTTP Errors + class fake_response(object): + status_code = 401 + body = None + + def json(): + return {} + + fake_conn = mock_sushy.return_value + fake_system = mock.Mock() + fake_conn.get_system.side_effect = iter( + [ + sushy.exceptions.AccessError( + method='GET', + url='http://path/to/url', + response=fake_response), + fake_system, + ]) + + self.assertRaises(exception.RedfishError, + redfish_utils.get_system, self.node) + # Retry, as in next power sync perhaps + client = redfish_utils.get_system(self.node) + client('foo') + + expected_get_system_calls = [ + mock.call(self.parsed_driver_info['system_id']), + mock.call(self.parsed_driver_info['system_id']), + ] + fake_conn.get_system.assert_has_calls(expected_get_system_calls) + fake_system.assert_called_with('foo') + self.assertEqual(fake_conn.get_system.call_count, 2) + + @mock.patch.object(time, 'sleep', lambda seconds: None) + @mock.patch.object(sushy, 'Sushy', autospec=True) + @mock.patch('ironic.drivers.modules.redfish.utils.' + 'SessionCache._sessions', {}) + def test_get_system_resource_attribute_error(self, mock_sushy): + + fake_conn = mock_sushy.return_value + fake_system = mock.Mock() + fake_conn.get_system.side_effect = iter( + [ + AttributeError, + fake_system, + ]) + # We need to check for AttributeError explicitly as + # otherwise we break existing tests if we try to catch + # it explicitly. + self.assertRaises(exception.RedfishError, + redfish_utils.get_system, self.node) + # Retry, as in next power sync perhaps + client = redfish_utils.get_system(self.node) + client('bar') + expected_get_system_calls = [ + mock.call(self.parsed_driver_info['system_id']), + mock.call(self.parsed_driver_info['system_id']), + ] + + fake_conn.get_system.assert_has_calls(expected_get_system_calls) + fake_system.assert_called_once_with('bar') + self.assertEqual(fake_conn.get_system.call_count, 2) diff --git a/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml b/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml new file mode 100644 index 0000000000..5bca75335e --- /dev/null +++ b/releasenotes/notes/redfish-connection-cache-pool-accesserror-743e39a2f017b990.yaml @@ -0,0 +1,13 @@ +--- +fixes: + - | + Fixes connection caching issues with Redfish BMCs where AccessErrors were + previously not disqualifying the cached connection from being re-used. + Ironic will now explicitly open a new connection instead of using the + previous connection in the cache. Under normal circumstances, the + ``sushy`` redfish library would detect and refresh sessions, + however a prior case exists where it may not detect a failure and contain + cached session credential data which is ultimately invalid, blocking + future access to the BMC via Redfish until the cache entry expired or + the ``ironic-conductor`` service was restarted. For more information + please see `story 2009719 `_.