From b07ebc4123d08f81e67dc52e7c0cce0292711317 Mon Sep 17 00:00:00 2001 From: Derek Higgins Date: Tue, 2 Jul 2024 17:36:44 +0100 Subject: [PATCH] Set node "alive" when inspection finished To make the node fast trackable as soon as inspection finishes, in addition add a wait for the agent to callback should it not be available when fast track is attempted. Closes-Bug: #2078820 Change-Id: I8a95fc08cf355b7b745a565e3a05c9dc0875a63e --- ironic/conductor/cleaning.py | 6 ++++++ ironic/conductor/deployments.py | 5 +++++ ironic/conductor/utils.py | 21 ++++++++++++++----- ironic/tests/unit/conductor/test_utils.py | 12 +++++++++++ ...-inspection-finished-1ec74828852eaeef.yaml | 7 +++++++ 5 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 releasenotes/notes/set-node-alive-when-inspection-finished-1ec74828852eaeef.yaml diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index 8f8f0020b2..a7f75be686 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -95,6 +95,12 @@ def do_node_clean(task, clean_steps=None, disable_ramdisk=False): 'out-of-band only cleaning has been requested for node ' '%s', node.uuid) prepare_result = None + except exception.AgentConnectionFailed: + LOG.info('Agent is not yet running on node %(node)s, waiting for' + ' agent to come up for fast track', {'node': node.uuid}) + target_state = states.MANAGEABLE if manual_clean else None + task.process_event('wait', target_state=target_state) + return except Exception as e: msg = (_('Failed to prepare node %(node)s for cleaning: %(e)s') % {'node': node.uuid, 'e': e}) diff --git a/ironic/conductor/deployments.py b/ironic/conductor/deployments.py index a14898ede4..f9c4f463d8 100644 --- a/ironic/conductor/deployments.py +++ b/ironic/conductor/deployments.py @@ -187,6 +187,11 @@ def do_node_deploy(task, conductor_id=None, configdrive=None, try: task.driver.deploy.prepare(task) + except exception.AgentConnectionFailed: + LOG.info('Agent is not yet running on node %(node)s, waiting for agent' + ' to come up for fast track', {'node': node.uuid}) + task.process_event('wait') + return except exception.IronicException as e: with excutils.save_and_reraise_exception(): utils.deploying_error_handler( diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 1bed945390..af44db2988 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -1150,13 +1150,18 @@ def fast_track_able(task): def value_within_timeout(value, timeout): """Checks if the time is within the previous timeout seconds from now. - :param value: a string representing date and time or None. + :param value: a datetime or string representing date and time or None. :param timeout: timeout in seconds. """ # use native datetime objects for conversion and compare # slightly odd because py2 compatibility :( - last = datetime.datetime.strptime(value or '1970-01-01T00:00:00.000000', - "%Y-%m-%dT%H:%M:%S.%f") + if isinstance(value, datetime.datetime): + # Converts to a offset-naive datetime(as created by timeutils.utcnow()) + last = value.replace(tzinfo=None) + else: + defaultdt = '1970-01-01T00:00:00.000000' + last = datetime.datetime.strptime(value or defaultdt, + '%Y-%m-%dT%H:%M:%S.%f') # If we found nothing, we assume that the time is essentially epoch. time_delta = datetime.timedelta(seconds=timeout) last_valid = timeutils.utcnow() - time_delta @@ -1173,14 +1178,20 @@ def agent_is_alive(node, timeout=None): :param node: A node object. :param timeout: Heartbeat timeout, defaults to `fast_track_timeout`. """ + + timeout = timeout or CONF.deploy.fast_track_timeout + if node.power_state == states.POWER_ON and \ + node.inspection_finished_at and \ + value_within_timeout(node.inspection_finished_at, timeout): + return True + # If no agent_url is present then we have powered down since the # last agent heartbeat if not node.driver_internal_info.get('agent_url'): return False return value_within_timeout( - node.driver_internal_info.get('agent_last_heartbeat'), - timeout or CONF.deploy.fast_track_timeout) + node.driver_internal_info.get('agent_last_heartbeat'), timeout) def is_fast_track(task): diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index f40e2c6d4b..8d2839afd8 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -2238,6 +2238,18 @@ class FastTrackTestCase(db_base.DbTestCase): self.context, self.node.uuid, shared=False) as task: self.assertFalse(conductor_utils.is_fast_track(task)) + def test_is_fast_track_inspected_no_heartbeat(self, mock_get_power): + mock_get_power.return_value = states.POWER_ON + self.node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + uuid=uuidutils.generate_uuid(), + inspection_finished_at=timeutils.utcnow(), + power_state=states.POWER_ON + ) + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + self.assertTrue(conductor_utils.is_fast_track(task)) + def test_is_fast_track_powered_after_heartbeat(self, mock_get_power): mock_get_power.return_value = states.POWER_ON with task_manager.acquire( diff --git a/releasenotes/notes/set-node-alive-when-inspection-finished-1ec74828852eaeef.yaml b/releasenotes/notes/set-node-alive-when-inspection-finished-1ec74828852eaeef.yaml new file mode 100644 index 0000000000..275455967d --- /dev/null +++ b/releasenotes/notes/set-node-alive-when-inspection-finished-1ec74828852eaeef.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Set node "alive" and make it fast trackable + as soon as inspection is finished, in addition + add a wait for the agent to callback should + it not be available when fast track is attempted.