From 3436c193c58ac7eb386d1af223f69cd999feafad Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Mon, 31 Aug 2015 18:27:17 -0400 Subject: [PATCH] Add retry options to iBoot power driver This patch adds two new iBoot specific options to facilitate a max_retry and retry_interval around the internal conn.switch() and conn.get_relays() functions. This should help avoid provisioning failures if there is any sort of network blip or failure that may occasionally cause an iboot command to fail. Change-Id: I3b7813ae56dd3b18008f814cd6272d801dd6f274 Closes-bug: #1490760 --- etc/ironic/ironic.conf.sample | 14 +++++ ironic/drivers/modules/iboot.py | 88 ++++++++++++++++++++++++------ ironic/tests/drivers/test_iboot.py | 51 ++++++++++++++--- 3 files changed, 127 insertions(+), 26 deletions(-) diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index d50dfcf98a..cb3e84e609 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -884,6 +884,20 @@ #auth_strategy=keystone +[iboot] + +# +# Options defined in ironic.drivers.modules.iboot +# + +# Maximum retries for iBoot operations (integer value) +#max_retry=3 + +# Time between retry attempts for iBoot operations (integer +# value) +#retry_interval=1 + + [ilo] # diff --git a/ironic/drivers/modules/iboot.py b/ironic/drivers/modules/iboot.py index 328854a59e..e41668ea2c 100644 --- a/ironic/drivers/modules/iboot.py +++ b/ironic/drivers/modules/iboot.py @@ -19,7 +19,9 @@ Ironic iBoot PDU power manager. """ +from oslo_config import cfg from oslo_log import log as logging +from oslo_service import loopingcall from oslo_utils import importutils from ironic.common import exception @@ -31,6 +33,20 @@ from ironic.drivers import base iboot = importutils.try_import('iboot') +opts = [ + cfg.IntOpt('max_retry', + default=3, + help=_('Maximum retries for iBoot operations')), + cfg.IntOpt('retry_interval', + default=1, + help=_('Time between retry attempts for iBoot operations')), +] + +CONF = cfg.CONF +opt_group = cfg.OptGroup(name='iboot', + title='Options for the iBoot power driver') +CONF.register_group(opt_group) +CONF.register_opts(opts, opt_group) LOG = logging.getLogger(__name__) @@ -95,30 +111,66 @@ def _get_connection(driver_info): def _switch(driver_info, enabled): conn = _get_connection(driver_info) relay_id = driver_info['relay_id'] - return conn.switch(relay_id, enabled) + + def _wait_for_switch(mutable): + if mutable['retries'] > CONF.iboot.max_retry: + LOG.warning(_LW( + 'Reached maximum number of attempts ( %(attempts)d ) to set ' + 'power state for node %(node)s to "%(op)s"'), + {'attempts': mutable['retries'], 'node': driver_info['uuid'], + 'op': states.POWER_ON if enabled else states.POWER_OFF}) + raise loopingcall.LoopingCallDone() + + try: + mutable['retries'] += 1 + mutable['response'] = conn.switch(relay_id, enabled) + if mutable['response']: + raise loopingcall.LoopingCallDone() + except (TypeError, IndexError): + LOG.warning(_LW("Cannot call set power state for node '%(node)s' " + "at relay '%(relay)s'. iBoot switch() failed."), + {'node': driver_info['uuid'], 'relay': relay_id}) + + mutable = {'response': False, 'retries': 0} + timer = loopingcall.FixedIntervalLoopingCall(_wait_for_switch, + mutable) + timer.start(interval=CONF.iboot.retry_interval).wait() + return mutable['response'] def _power_status(driver_info): conn = _get_connection(driver_info) relay_id = driver_info['relay_id'] - try: - response = conn.get_relays() - status = response[relay_id - 1] - except TypeError: - msg = (_("Cannot get power status for node '%(node)s'. iBoot " - "get_relays() failed.") % {'node': driver_info['uuid']}) - LOG.error(msg) - raise exception.IBootOperationError(message=msg) - except IndexError: - LOG.warning(_LW("Cannot get power status for node '%(node)s' at relay " - "'%(relay)s'. iBoot get_relays() failed."), - {'node': driver_info['uuid'], 'relay': relay_id}) - return states.ERROR - if status: - return states.POWER_ON - else: - return states.POWER_OFF + def _wait_for_power_status(mutable): + + if mutable['retries'] > CONF.iboot.max_retry: + LOG.warning(_LW( + 'Reached maximum number of attempts ( %(attempts)d ) to get ' + 'power state for node %(node)s'), + {'attempts': mutable['retries'], 'node': driver_info['uuid']}) + raise loopingcall.LoopingCallDone() + + try: + mutable['retries'] += 1 + response = conn.get_relays() + status = response[relay_id - 1] + if status: + mutable['state'] = states.POWER_ON + else: + mutable['state'] = states.POWER_OFF + raise loopingcall.LoopingCallDone() + except (TypeError, IndexError): + LOG.warning(_LW("Cannot get power state for node '%(node)s' at " + "relay '%(relay)s'. iBoot get_relays() failed."), + {'node': driver_info['uuid'], 'relay': relay_id}) + + mutable = {'state': states.ERROR, 'retries': 0} + + timer = loopingcall.FixedIntervalLoopingCall(_wait_for_power_status, + mutable) + timer.start(interval=CONF.iboot.retry_interval).wait() + return mutable['state'] class IBootPower(base.PowerInterface): diff --git a/ironic/tests/drivers/test_iboot.py b/ironic/tests/drivers/test_iboot.py index 1de4200adc..0b1c9ed7d3 100644 --- a/ironic/tests/drivers/test_iboot.py +++ b/ironic/tests/drivers/test_iboot.py @@ -37,6 +37,11 @@ INFO_DICT = db_utils.get_test_iboot_info() class IBootPrivateMethodTestCase(db_base.DbTestCase): + def setUp(self): + super(IBootPrivateMethodTestCase, self).setUp() + self.config(max_retry=0, group='iboot') + self.config(retry_interval=0, group='iboot') + def test__parse_driver_info_good(self): node = obj_utils.create_test_node( self.context, @@ -169,10 +174,8 @@ class IBootPrivateMethodTestCase(db_base.DbTestCase): driver_info=INFO_DICT) info = iboot._parse_driver_info(node) - self.assertRaises(exception.IBootOperationError, - iboot._power_status, - info) - + status = iboot._power_status(info) + self.assertEqual(states.ERROR, status) mock_get_conn.assert_called_once_with(info) mock_connection.get_relays.assert_called_once_with() @@ -189,10 +192,8 @@ class IBootPrivateMethodTestCase(db_base.DbTestCase): driver_info=INFO_DICT) info = iboot._parse_driver_info(node) - self.assertRaises(exception.IBootOperationError, - iboot._power_status, - info) - + status = iboot._power_status(info) + self.assertEqual(states.ERROR, status) mock_get_conn.assert_called_once_with(info) mock_connection.get_relays.assert_called_once_with() @@ -231,6 +232,26 @@ class IBootPrivateMethodTestCase(db_base.DbTestCase): mock_get_conn.assert_called_once_with(info) mock_connection.get_relays.assert_called_once_with() + @mock.patch.object(iboot, '_get_connection', autospec=True) + def test__power_status_retries(self, mock_get_conn): + self.config(max_retry=1, group='iboot') + + mock_connection = mock.MagicMock(spec_set=['get_relays']) + side_effect = TypeError("Surprise!") + mock_connection.get_relays.side_effect = side_effect + + mock_get_conn.return_value = mock_connection + node = obj_utils.create_test_node( + self.context, + driver='fake_iboot', + driver_info=INFO_DICT) + info = iboot._parse_driver_info(node) + + status = iboot._power_status(info) + self.assertEqual(states.ERROR, status) + mock_get_conn.assert_called_once_with(info) + self.assertEqual(2, mock_connection.get_relays.call_count) + class IBootDriverTestCase(db_base.DbTestCase): @@ -318,6 +339,20 @@ class IBootDriverTestCase(db_base.DbTestCase): self.assertEqual(manager.mock_calls, expected) + @mock.patch.object(iboot, '_power_status', autospec=True) + @mock.patch.object(iboot, '_get_connection', autospec=True) + def test__switch_retries(self, mock_get_conn, mock_power_status): + self.config(max_retry=1, group='iboot') + mock_power_status.return_value = states.POWER_ON + + mock_connection = mock.MagicMock(spec_set=['switch']) + side_effect = TypeError("Surprise!") + mock_connection.switch.side_effect = side_effect + mock_get_conn.return_value = mock_connection + + iboot._switch(self.info, False) + self.assertEqual(2, mock_connection.switch.call_count) + @mock.patch.object(iboot, '_power_status', autospec=True) def test_get_power_state(self, mock_power_status): mock_power_status.return_value = states.POWER_ON