diff --git a/doc/source/metastatic.rst b/doc/source/metastatic.rst index dc0110f33..7412a9843 100644 --- a/doc/source/metastatic.rst +++ b/doc/source/metastatic.rst @@ -203,6 +203,18 @@ itself, which is "meta". used to ensure that the backing node is retained for at least the minimum billing interval. + .. attr:: max-age + :type: int + + If this value is set, the backing node will be removed + from service after this amount of time (in seconds) has + passed since the backing node was launched. After a + backing node reaches this point, any existing jobs will + be permitted to run to completion, but no new metastatic + nodes will be created with that backing node and once all + metastatic nodes using it have been deleted, then backing + node will be deleted. + .. attr:: host-key-checking :type: bool :default: False diff --git a/nodepool/driver/metastatic/adapter.py b/nodepool/driver/metastatic/adapter.py index 7aefdac75..6c71cd024 100644 --- a/nodepool/driver/metastatic/adapter.py +++ b/nodepool/driver/metastatic/adapter.py @@ -298,6 +298,15 @@ class MetastaticAdapter(statemachine.Adapter): if label_config: grace_time = label_config.grace_time min_time = label_config.min_retention_time + if label_config.max_age: + if now - bnr.launched > label_config.max_age: + # Mark it as failed; even though it + # hasn't really failed, the lifecycle + # is the same: do not allocate any + # more jobs to this node but let any + # remaining ones finish, then delete + # ASAP. + bnr.failed = True else: # The label doesn't exist in our config any more, # it must have been removed. diff --git a/nodepool/driver/metastatic/config.py b/nodepool/driver/metastatic/config.py index 4ab9ab88d..af1291576 100644 --- a/nodepool/driver/metastatic/config.py +++ b/nodepool/driver/metastatic/config.py @@ -46,8 +46,12 @@ class MetastaticLabel(ConfigValue): self.max_parallel_jobs = label.get('max-parallel-jobs', 1) self.grace_time = label.get('grace-time', 60) self.min_retention_time = label.get('min-retention-time', 0) + self.max_age = label.get('max-age', None) self.host_key_checking = label.get('host-key-checking', self.pool.host_key_checking) + if self.max_age and self.max_age < self.min_retention_time: + raise Exception("The max_age must be greater than or " + "equal to the min_retention_time") @staticmethod def getSchema(): @@ -57,6 +61,7 @@ class MetastaticLabel(ConfigValue): 'max-parallel-jobs': int, 'grace-time': int, 'min-retention-time': int, + 'max-age': int, 'host-key-checking': bool, } @@ -66,7 +71,8 @@ class MetastaticLabel(ConfigValue): self.backing_label == other.backing_label and self.max_parallel_jobs == other.max_parallel_jobs and self.grace_time == other.grace_time and - self.min_retention_time == other.min_retention_time + self.min_retention_time == other.min_retention_time and + self.max_age == other.max_age ) diff --git a/nodepool/tests/fixtures/metastatic.yaml b/nodepool/tests/fixtures/metastatic.yaml index 343562869..3a5b0e766 100644 --- a/nodepool/tests/fixtures/metastatic.yaml +++ b/nodepool/tests/fixtures/metastatic.yaml @@ -66,6 +66,7 @@ providers: backing-label: backing-label max-parallel-jobs: 2 grace-time: 2 + max-age: 300 host-key-checking: true - name: user-label-min-retention backing-label: backing-label-min-retention diff --git a/nodepool/tests/unit/test_driver_metastatic.py b/nodepool/tests/unit/test_driver_metastatic.py index feaa7358e..d53f57d2f 100644 --- a/nodepool/tests/unit/test_driver_metastatic.py +++ b/nodepool/tests/unit/test_driver_metastatic.py @@ -372,3 +372,47 @@ class TestDriverMetastatic(tests.DBTestCase): meta_manager.adapter.listResources() nodes = self._getNodes() self.waitForNodeDeletion(bn1) + + def test_metastatic_max_age(self): + # Test the max-age option + configfile = self.setup_config('metastatic.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.startPool(pool) + manager = pool.getProviderManager('fake-provider') + manager.adapter._client.create_image(name="fake-image") + + # Launch one metastatic node on a backing node + node1 = self._requestNode() + nodes = self._getNodes() + self.assertEqual(len(nodes), 2) + bn1 = nodes[1] + self.assertEqual(bn1.provider, 'fake-provider') + self.assertEqual(bn1.id, node1.driver_data['backing_node']) + + # Create a second node and verify it uses the same backing node. + node2 = self._requestNode() + nodes = self._getNodes() + self.assertEqual(len(nodes), 3) + self.assertEqual(bn1.id, node2.driver_data['backing_node']) + + # Delete the second node. + node2.state = zk.DELETING + self.zk.storeNode(node2) + self.waitForNodeDeletion(node2) + nodes = self._getNodes() + self.assertEqual(len(nodes), 2) + + # Falsify the launch time so that the node is older than + # max_age (300). + meta_manager = pool.getProviderManager('meta-provider') + bnr = meta_manager.adapter.backing_node_records['user-label'][0] + bnr.launched = 0 + + # This has the side effect of marking the backing node as failed. + meta_manager.adapter.listResources() + + # Create another node and verify it gets a new backing node. + node3 = self._requestNode() + nodes = self._getNodes() + self.assertEqual(len(nodes), 4) + self.assertNotEqual(bn1.id, node3.driver_data['backing_node'])