From 09b85516e5e6d1c2f84093db871448999f281bd5 Mon Sep 17 00:00:00 2001 From: Tudor Date: Thu, 12 Sep 2024 14:25:06 +0200 Subject: [PATCH] Node instances: label modifiers (aws spot, fleet) An instance property has been added to keep track of certain modifiers that change the behavior of node requests. The modifiers, delivered via config labels, are meant to track the AWS node requests for spot instances or fleet. Tracking these labels will provide better reporting of node usage within Zuul jobs and the capability of jobs to react on certain node types. The node_properties property is expected to be accessible in Zuul under the `nodepool` host facts. Change-Id: I2d91f8d336738fc65ddaa26f3de6db5cba09ac0d --- nodepool/driver/aws/adapter.py | 18 ++++++++++++++---- nodepool/driver/statemachine.py | 4 ++++ nodepool/tests/unit/test_driver_aws.py | 6 +++++- nodepool/tests/unit/test_zk.py | 2 ++ nodepool/zk/zookeeper.py | 6 +++++- 5 files changed, 30 insertions(+), 6 deletions(-) diff --git a/nodepool/driver/aws/adapter.py b/nodepool/driver/aws/adapter.py index d670950b5..40809fd48 100644 --- a/nodepool/driver/aws/adapter.py +++ b/nodepool/driver/aws/adapter.py @@ -223,7 +223,7 @@ GIB = 1024 ** 3 class AwsInstance(statemachine.Instance): - def __init__(self, provider, instance, host, quota): + def __init__(self, provider, instance, host, quota, label): super().__init__() self.external_id = dict() if instance: @@ -249,6 +249,14 @@ class AwsInstance(statemachine.Instance): self.interface_ip = (self.public_ipv4 or self.public_ipv6 or self.private_ipv4 or self.private_ipv6) + if label: + # `fleet` contains the parameters used to call the fleet API, in + # a dictionary form. The interesting point is if fleet API was + # used or not, so bool-it + self.node_properties['fleet'] = bool(label.fleet) + # `use_spot` is already bool, can directly be used as a flag + self.node_properties['spot'] = label.use_spot + def getQuotaInformation(self): return self.quota @@ -420,8 +428,10 @@ class AwsCreateStateMachine(statemachine.StateMachine): self.complete = True self.quota = self.adapter._getQuotaForLabel( self.label, self.instance['InstanceType']) - return AwsInstance(self.adapter.provider, self.instance, - self.host, self.quota) + return AwsInstance( + self.adapter.provider, self.instance, self.host, self.quota, + self.label + ) class EBSSnapshotUploader(ImageUploader): @@ -758,7 +768,7 @@ class AwsAdapter(statemachine.Adapter): continue quota.add(self._getQuotaForVolume(volume)) - yield AwsInstance(self.provider, instance, None, quota) + yield AwsInstance(self.provider, instance, None, quota, None) def getQuotaLimits(self): # Get the instance and volume types that this provider handles diff --git a/nodepool/driver/statemachine.py b/nodepool/driver/statemachine.py index 243fae604..3af7527b1 100644 --- a/nodepool/driver/statemachine.py +++ b/nodepool/driver/statemachine.py @@ -165,6 +165,7 @@ class StateMachineNodeLauncher(stats.StatsReporter): node.az = instance.az node.driver_data = instance.driver_data node.slot = instance.slot + node.node_properties = instance.node_properties # If we did not know the resource information before # launching, update it now. @@ -1053,6 +1054,9 @@ class Instance: self.metadata = {} self.driver_data = None self.slot = None + # Holds flags coming from label(s) that modify the node request, + # such as `spot` instance for AWS, `fleet` API or a metastatic node + self.node_properties = {} def __repr__(self): state = [] diff --git a/nodepool/tests/unit/test_driver_aws.py b/nodepool/tests/unit/test_driver_aws.py index 794f18a82..66e7dcf0b 100644 --- a/nodepool/tests/unit/test_driver_aws.py +++ b/nodepool/tests/unit/test_driver_aws.py @@ -742,7 +742,7 @@ class TestDriverAws(tests.DBTestCase): instance['InstanceType'] = 'test' provider = Dummy() provider.region_name = 'us-west-2' - awsi = AwsInstance(provider, instance, None, None) + awsi = AwsInstance(provider, instance, None, None, None) self.assertEqual(awsi.public_ipv4, '1.2.3.4') self.assertEqual(awsi.private_ipv4, '10.0.0.1') self.assertEqual(awsi.public_ipv6, 'fe80::dead:beef') @@ -1324,6 +1324,8 @@ class TestDriverAws(tests.DBTestCase): # moto doesn't provide the spot_instance_request_id # self.assertIsNotNone(instance.spot_instance_request_id) + self.assertTrue(node.node_properties['spot']) + def test_aws_dedicated_host(self): req = self.requestNode('aws/aws-dedicated-host.yaml', 'ubuntu') for _ in iterate_timeout(60, Exception, @@ -1550,6 +1552,8 @@ class TestDriverAws(tests.DBTestCase): ['InstanceType'], ('t3.nano', 't3.micro', 't3.small', 't3.medium')) + self.assertTrue(node.node_properties['fleet']) + self.assertTrue(node.node_properties['spot']) node.state = zk.USED self.zk.storeNode(node) self.waitForNodeDeletion(node) diff --git a/nodepool/tests/unit/test_zk.py b/nodepool/tests/unit/test_zk.py index c5dab9458..a6a4d0b4a 100644 --- a/nodepool/tests/unit/test_zk.py +++ b/nodepool/tests/unit/test_zk.py @@ -1097,6 +1097,7 @@ class TestZKModel(tests.BaseTestCase): o.hold_job = 'hold job' o.host_keys = ['key1', 'key2'] o.attributes = {'executor-zone': 'vpn'} + o.node_properties = {} d = o.toDict() self.assertNotIn('id', d) @@ -1120,6 +1121,7 @@ class TestZKModel(tests.BaseTestCase): self.assertEqual(d['hold_job'], o.hold_job) self.assertEqual(d['host_keys'], o.host_keys) self.assertEqual(d['attributes'], o.attributes) + self.assertEqual(d['node_properties'], o.node_properties) def test_Node_fromDict(self): now = int(time.time()) diff --git a/nodepool/zk/zookeeper.py b/nodepool/zk/zookeeper.py index 4a3339c48..89607193c 100644 --- a/nodepool/zk/zookeeper.py +++ b/nodepool/zk/zookeeper.py @@ -560,6 +560,7 @@ class Node(BaseModel): self.tenant_name = None self.driver_data = None self.requestor = None + self.node_properties = {} def __repr__(self): d = self.toDict() @@ -604,7 +605,8 @@ class Node(BaseModel): self.python_path == other.python_path and self.tenant_name == other.tenant_name and self.driver_data == other.driver_data and - self.requestor == other.requestor) + self.requestor == other.requestor and + self.node_properties == other.node_properties) else: return False @@ -660,6 +662,7 @@ class Node(BaseModel): d['tenant_name'] = self.tenant_name d['driver_data'] = self.driver_data d['requestor'] = self.requestor + d['node_properties'] = self.node_properties return d @staticmethod @@ -728,6 +731,7 @@ class Node(BaseModel): self.tenant_name = d.get('tenant_name') self.driver_data = d.get('driver_data') self.requestor = d.get('requestor') + self.node_properties = d.get('node_properties') class NodepoolTreeCache(abc.ABC):