From c253ebfef348969f6719efdf5c77bdf06896721f Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 24 Apr 2024 11:03:50 -0700 Subject: [PATCH] Add min-retention-time to metastatic driver This adds the ability to configure the minimum retention time for the backing node of a metastatic resource. This is useful for cloud resources with minimum billing intervals (e.g., you are billed for 24 hours of an instance even if you use it less). Change-Id: Ibb0588f244fc6697950f0401145e0ec5ad2482c3 --- doc/source/metastatic.rst | 10 +++ nodepool/driver/metastatic/adapter.py | 9 ++- nodepool/driver/metastatic/config.py | 5 +- nodepool/tests/fixtures/metastatic.yaml | 13 ++++ nodepool/tests/unit/test_driver_metastatic.py | 69 ++++++++++++++++++- ...static-min-retention-94451b1bdc9c0f44.yaml | 7 ++ 6 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 releasenotes/notes/metastatic-min-retention-94451b1bdc9c0f44.yaml diff --git a/doc/source/metastatic.rst b/doc/source/metastatic.rst index fbcbd5416..dc0110f33 100644 --- a/doc/source/metastatic.rst +++ b/doc/source/metastatic.rst @@ -193,6 +193,16 @@ itself, which is "meta". arrive for this label before deleting the backing node. Set this value to the amount of time in seconds to wait. + .. attr:: min-retention-time + :type: int + + If this value is set, the backing node will not be + deleted unless this amount of time (in seconds) has + passed since the backing node was launched. For backing + node resources with minimum billing times, this can be + used to ensure that the backing node is retained for at + least the minimum billing interval. + .. attr:: host-key-checking :type: bool :default: False diff --git a/nodepool/driver/metastatic/adapter.py b/nodepool/driver/metastatic/adapter.py index ee59df59e..7aefdac75 100644 --- a/nodepool/driver/metastatic/adapter.py +++ b/nodepool/driver/metastatic/adapter.py @@ -218,6 +218,7 @@ class BackingNodeRecord: self.request_id = None self.allocated_nodes = [None for x in range(slot_count)] self.failed = False + self.launched = time.time() self.last_used = time.time() def hasAvailableSlot(self): @@ -296,14 +297,18 @@ class MetastaticAdapter(statemachine.Adapter): label_config = self.provider._getLabel(bnr.label_name) if label_config: grace_time = label_config.grace_time + min_time = label_config.min_retention_time else: # The label doesn't exist in our config any more, # it must have been removed. grace_time = 0 + min_time = 0 if bnr.failed: grace_time = 0 + min_time = 0 if (bnr.isEmpty() and - now - bnr.last_used > grace_time): + now - bnr.last_used > grace_time and + now - bnr.launched > min_time): self.log.info("Backing node %s has been idle for " "%s seconds, releasing", bnr.node_id, now - bnr.last_used) @@ -373,6 +378,7 @@ class MetastaticAdapter(statemachine.Adapter): user_data['slots']) backing_node_record.node_id = node.id backing_node_record.failed = user_data.get('failed', False) + backing_node_record.launched = user_data.get('launched', 0) self.log.info("Found backing node %s for %s", node.id, user_data['label']) self._addBackingNode(user_data['label'], @@ -454,6 +460,7 @@ class MetastaticAdapter(statemachine.Adapter): 'label': bnr.label_name, 'slots': bnr.slot_count, 'failed': bnr.failed, + 'launched': bnr.launched, }) def _checkBackingNodeRequests(self): diff --git a/nodepool/driver/metastatic/config.py b/nodepool/driver/metastatic/config.py index 1f245ab71..4ab9ab88d 100644 --- a/nodepool/driver/metastatic/config.py +++ b/nodepool/driver/metastatic/config.py @@ -45,6 +45,7 @@ class MetastaticLabel(ConfigValue): self.cloud_image = MetastaticCloudImage() self.max_parallel_jobs = label.get('max-parallel-jobs', 1) self.grace_time = label.get('grace-time', 60) + self.min_retention_time = label.get('min-retention-time', 0) self.host_key_checking = label.get('host-key-checking', self.pool.host_key_checking) @@ -55,6 +56,7 @@ class MetastaticLabel(ConfigValue): v.Required('backing-label'): str, 'max-parallel-jobs': int, 'grace-time': int, + 'min-retention-time': int, 'host-key-checking': bool, } @@ -63,7 +65,8 @@ class MetastaticLabel(ConfigValue): return ( self.backing_label == other.backing_label and self.max_parallel_jobs == other.max_parallel_jobs and - self.grace_time == other.grace_time + self.grace_time == other.grace_time and + self.min_retention_time == other.min_retention_time ) diff --git a/nodepool/tests/fixtures/metastatic.yaml b/nodepool/tests/fixtures/metastatic.yaml index b3127839e..343562869 100644 --- a/nodepool/tests/fixtures/metastatic.yaml +++ b/nodepool/tests/fixtures/metastatic.yaml @@ -21,6 +21,10 @@ labels: min-ready: 0 - name: user-label min-ready: 0 + - name: backing-label-min-retention + min-ready: 0 + - name: user-label-min-retention + min-ready: 0 providers: # The backing node provider: a cloud @@ -42,6 +46,10 @@ providers: cloud-image: fake-image min-ram: 8192 flavor-name: 'Fake' + - name: backing-label-min-retention + cloud-image: fake-image + min-ram: 8192 + flavor-name: 'Fake' - name: meta-provider driver: metastatic @@ -59,3 +67,8 @@ providers: max-parallel-jobs: 2 grace-time: 2 host-key-checking: true + - name: user-label-min-retention + backing-label: backing-label-min-retention + grace-time: 2 + min-retention-time: 300 + host-key-checking: true diff --git a/nodepool/tests/unit/test_driver_metastatic.py b/nodepool/tests/unit/test_driver_metastatic.py index 231ca4076..feaa7358e 100644 --- a/nodepool/tests/unit/test_driver_metastatic.py +++ b/nodepool/tests/unit/test_driver_metastatic.py @@ -35,11 +35,11 @@ class TestDriverMetastatic(tests.DBTestCase): StateMachineProvider.MINIMUM_SLEEP = 0.1 StateMachineProvider.MAXIMUM_SLEEP = 1 - def _requestNode(self): + def _requestNode(self, label='user-label'): req = zk.NodeRequest() req.state = zk.REQUESTED req.tenant_name = 'tenant-1' - req.node_types.append('user-label') + req.node_types.append(label) self.zk.storeNodeRequest(req) req = self.waitForNodeRequest(req) @@ -307,3 +307,68 @@ class TestDriverMetastatic(tests.DBTestCase): self.waitForNodeDeletion(bn1) nodes = self._getNodes() self.assertEqual(nodes, []) + + def test_metastatic_min_retention(self): + # Test that the metastatic driver honors min-retention + configfile = self.setup_config('metastatic.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.startPool(pool) + manager = pool.getProviderManager('fake-provider') + + pool_worker = pool.getPoolWorkers('meta-provider')[0] + pool_config = pool_worker.getPoolConfig() + self.assertEqual(pool_config.max_servers, 10) + self.assertEqual(pool_config.priority, 1) + + manager.adapter._client.create_image(name="fake-image") + + # Request a node, verify that there is a backing node, and it + # has the same connection info + node1 = self._requestNode('user-label-min-retention') + nodes = self._getNodes() + self.assertEqual(len(nodes), 2) + self.assertEqual(nodes[0], node1) + self.assertNotEqual(nodes[1], node1) + bn1 = nodes[1] + self.assertEqual(bn1.provider, 'fake-provider') + self.assertEqual(bn1.interface_ip, node1.interface_ip) + self.assertEqual(bn1.python_path, node1.python_path) + self.assertEqual('auto', node1.python_path) + self.assertEqual(bn1.shell_type, node1.shell_type) + self.assertEqual(bn1.cloud, node1.cloud) + self.assertEqual(None, node1.shell_type) + self.assertEqual(bn1.host_keys, node1.host_keys) + self.assertEqual(['ssh-rsa FAKEKEY'], node1.host_keys) + self.assertEqual(bn1.id, node1.driver_data['backing_node']) + self.assertEqual(bn1.attributes, { + 'backattr': 'back', + 'testattr': 'backing', + }) + self.assertEqual(node1.attributes, { + 'backattr': 'back', + 'metaattr': 'meta', + 'testattr': 'metastatic', + }) + + # Delete node, verify that backing node still exists + node1.state = zk.DELETING + self.zk.storeNode(node1) + self.waitForNodeDeletion(node1) + + # This has the side effect of deleting the backing node when + # idle, but it should not in this case because the + # min-retention time is not met. + meta_manager = pool.getProviderManager('meta-provider') + meta_manager.adapter.listResources() + nodes = self._getNodes() + self.assertEqual(nodes, [bn1]) + self.assertEqual(nodes[0].state, zk.IN_USE) + + # Falsify the launch time to trigger cleanup: + bnr = meta_manager.adapter.backing_node_records[ + 'user-label-min-retention'][0] + bnr.launched = 0 + + meta_manager.adapter.listResources() + nodes = self._getNodes() + self.waitForNodeDeletion(bn1) diff --git a/releasenotes/notes/metastatic-min-retention-94451b1bdc9c0f44.yaml b/releasenotes/notes/metastatic-min-retention-94451b1bdc9c0f44.yaml new file mode 100644 index 000000000..6a3d905e1 --- /dev/null +++ b/releasenotes/notes/metastatic-min-retention-94451b1bdc9c0f44.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + The metastatic driver now supports a + :attr:`providers.[metastatic].pools.labels.min-retention-time` + attribute to set the minimum retention time for a backing node + (useful for cloud resources with minimum billing intervals).