diff --git a/ironic/conductor/task_manager.py b/ironic/conductor/task_manager.py index b4089d6187..7ed3eab3c4 100644 --- a/ironic/conductor/task_manager.py +++ b/ironic/conductor/task_manager.py @@ -283,7 +283,7 @@ class TaskManager(object): reserve_node() - def upgrade_lock(self, purpose=None): + def upgrade_lock(self, purpose=None, retry=None): """Upgrade a shared lock to an exclusive lock. Also reloads node object from the database. @@ -291,11 +291,16 @@ class TaskManager(object): when provided with one. :param purpose: optionally change the purpose of the lock + :param retry: whether to retry locking if it fails, the class-level + value is used by default :raises: NodeLocked if an exclusive lock remains on the node after "node_locked_retry_attempts" """ if purpose is not None: self._purpose = purpose + if retry is not None: + self._retry = retry + if self.shared: LOG.debug('Upgrading shared lock on node %(uuid)s for %(purpose)s ' 'to an exclusive one (shared lock was held %(time).2f ' diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index 00b66952f1..ee1af290d4 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -626,7 +626,7 @@ class HeartbeatMixin(object): return try: - task.upgrade_lock() + task.upgrade_lock(retry=False) except exception.NodeLocked: LOG.warning('Node %s is currently locked, skipping heartbeat ' 'processing (will retry on the next heartbeat)', diff --git a/ironic/tests/unit/conductor/test_task_manager.py b/ironic/tests/unit/conductor/test_task_manager.py index 8a57d7deaa..e49982d593 100644 --- a/ironic/tests/unit/conductor/test_task_manager.py +++ b/ironic/tests/unit/conductor/test_task_manager.py @@ -200,6 +200,28 @@ class TaskManagerTestCase(db_base.DbTestCase): reserve_mock.assert_called_once_with(self.context, self.host, 'fake-node-id') + def test_excl_lock_upgade_exception_no_retries( + self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock, + get_ports_mock, build_driver_mock, + reserve_mock, release_mock, node_get_mock): + retry_attempts = 3 + self.config(node_locked_retry_attempts=retry_attempts, + group='conductor') + + node_get_mock.return_value = self.node + # Fail on the first lock attempt, succeed on the second. + reserve_mock.side_effect = [exception.NodeLocked(node='foo', + host='foo'), + self.node] + + task = task_manager.TaskManager(self.context, 'fake-node-id', + shared=True) + self.assertRaises(exception.NodeLocked, + task.upgrade_lock, retry=False) + + reserve_mock.assert_called_once_with(self.context, self.host, + 'fake-node-id') + def test_excl_lock_reserve_exception( self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock, get_ports_mock, build_driver_mock, diff --git a/releasenotes/notes/no-heartbeat-retries-d6837684e7257249.yaml b/releasenotes/notes/no-heartbeat-retries-d6837684e7257249.yaml new file mode 100644 index 0000000000..4932fb66b5 --- /dev/null +++ b/releasenotes/notes/no-heartbeat-retries-d6837684e7257249.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Agent heartbeat implementation will no longer retry attempts to acquire + an exclusive lock. This is done to avoid several attempts to get a lock + in a busy environment with shorter heartbeat period.