Node instances: label modifiers (aws spot, fleet)

An instance property has been added to keep track of certain
modifiers that change the behavior of node requests. The modifiers,
delivered via config labels, are meant to track the AWS node
requests for spot instances or fleet.

Tracking these labels will provide better reporting of node
usage within Zuul jobs and the capability of jobs to react
on certain node types.

The node_properties property is expected to be accessible in Zuul
under the `nodepool` host facts.

Change-Id: I2d91f8d336738fc65ddaa26f3de6db5cba09ac0d
This commit is contained in:
Tudor 2024-09-12 14:25:06 +02:00 committed by Tudor Tabacel
parent d29a014699
commit 09b85516e5
5 changed files with 30 additions and 6 deletions

View File

@ -223,7 +223,7 @@ GIB = 1024 ** 3
class AwsInstance(statemachine.Instance):
def __init__(self, provider, instance, host, quota):
def __init__(self, provider, instance, host, quota, label):
super().__init__()
self.external_id = dict()
if instance:
@ -249,6 +249,14 @@ class AwsInstance(statemachine.Instance):
self.interface_ip = (self.public_ipv4 or self.public_ipv6 or
self.private_ipv4 or self.private_ipv6)
if label:
# `fleet` contains the parameters used to call the fleet API, in
# a dictionary form. The interesting point is if fleet API was
# used or not, so bool-it
self.node_properties['fleet'] = bool(label.fleet)
# `use_spot` is already bool, can directly be used as a flag
self.node_properties['spot'] = label.use_spot
def getQuotaInformation(self):
return self.quota
@ -420,8 +428,10 @@ class AwsCreateStateMachine(statemachine.StateMachine):
self.complete = True
self.quota = self.adapter._getQuotaForLabel(
self.label, self.instance['InstanceType'])
return AwsInstance(self.adapter.provider, self.instance,
self.host, self.quota)
return AwsInstance(
self.adapter.provider, self.instance, self.host, self.quota,
self.label
)
class EBSSnapshotUploader(ImageUploader):
@ -758,7 +768,7 @@ class AwsAdapter(statemachine.Adapter):
continue
quota.add(self._getQuotaForVolume(volume))
yield AwsInstance(self.provider, instance, None, quota)
yield AwsInstance(self.provider, instance, None, quota, None)
def getQuotaLimits(self):
# Get the instance and volume types that this provider handles

View File

@ -165,6 +165,7 @@ class StateMachineNodeLauncher(stats.StatsReporter):
node.az = instance.az
node.driver_data = instance.driver_data
node.slot = instance.slot
node.node_properties = instance.node_properties
# If we did not know the resource information before
# launching, update it now.
@ -1053,6 +1054,9 @@ class Instance:
self.metadata = {}
self.driver_data = None
self.slot = None
# Holds flags coming from label(s) that modify the node request,
# such as `spot` instance for AWS, `fleet` API or a metastatic node
self.node_properties = {}
def __repr__(self):
state = []

View File

@ -742,7 +742,7 @@ class TestDriverAws(tests.DBTestCase):
instance['InstanceType'] = 'test'
provider = Dummy()
provider.region_name = 'us-west-2'
awsi = AwsInstance(provider, instance, None, None)
awsi = AwsInstance(provider, instance, None, None, None)
self.assertEqual(awsi.public_ipv4, '1.2.3.4')
self.assertEqual(awsi.private_ipv4, '10.0.0.1')
self.assertEqual(awsi.public_ipv6, 'fe80::dead:beef')
@ -1324,6 +1324,8 @@ class TestDriverAws(tests.DBTestCase):
# moto doesn't provide the spot_instance_request_id
# self.assertIsNotNone(instance.spot_instance_request_id)
self.assertTrue(node.node_properties['spot'])
def test_aws_dedicated_host(self):
req = self.requestNode('aws/aws-dedicated-host.yaml', 'ubuntu')
for _ in iterate_timeout(60, Exception,
@ -1550,6 +1552,8 @@ class TestDriverAws(tests.DBTestCase):
['InstanceType'],
('t3.nano', 't3.micro', 't3.small', 't3.medium'))
self.assertTrue(node.node_properties['fleet'])
self.assertTrue(node.node_properties['spot'])
node.state = zk.USED
self.zk.storeNode(node)
self.waitForNodeDeletion(node)

View File

@ -1097,6 +1097,7 @@ class TestZKModel(tests.BaseTestCase):
o.hold_job = 'hold job'
o.host_keys = ['key1', 'key2']
o.attributes = {'executor-zone': 'vpn'}
o.node_properties = {}
d = o.toDict()
self.assertNotIn('id', d)
@ -1120,6 +1121,7 @@ class TestZKModel(tests.BaseTestCase):
self.assertEqual(d['hold_job'], o.hold_job)
self.assertEqual(d['host_keys'], o.host_keys)
self.assertEqual(d['attributes'], o.attributes)
self.assertEqual(d['node_properties'], o.node_properties)
def test_Node_fromDict(self):
now = int(time.time())

View File

@ -560,6 +560,7 @@ class Node(BaseModel):
self.tenant_name = None
self.driver_data = None
self.requestor = None
self.node_properties = {}
def __repr__(self):
d = self.toDict()
@ -604,7 +605,8 @@ class Node(BaseModel):
self.python_path == other.python_path and
self.tenant_name == other.tenant_name and
self.driver_data == other.driver_data and
self.requestor == other.requestor)
self.requestor == other.requestor and
self.node_properties == other.node_properties)
else:
return False
@ -660,6 +662,7 @@ class Node(BaseModel):
d['tenant_name'] = self.tenant_name
d['driver_data'] = self.driver_data
d['requestor'] = self.requestor
d['node_properties'] = self.node_properties
return d
@staticmethod
@ -728,6 +731,7 @@ class Node(BaseModel):
self.tenant_name = d.get('tenant_name')
self.driver_data = d.get('driver_data')
self.requestor = d.get('requestor')
self.node_properties = d.get('node_properties')
class NodepoolTreeCache(abc.ABC):