Merge "AWS multi quota support"
This commit is contained in:
commit
123a32f922
@ -109,6 +109,42 @@ Selecting the ``aws`` driver adds the following options to the
|
||||
until that instance is reported as "active". If the timeout is
|
||||
exceeded, the node launch is aborted and the instance deleted.
|
||||
|
||||
.. attr:: max-cores
|
||||
:type: int
|
||||
:default: unlimited
|
||||
|
||||
Maximum number of cores usable from this provider's pools by default.
|
||||
|
||||
.. attr:: max-servers
|
||||
:type: int
|
||||
:default: unlimited
|
||||
|
||||
Maximum number of servers spawnable from this provider's pools by default.
|
||||
|
||||
.. attr:: max-ram
|
||||
:type: int
|
||||
:default: unlimited
|
||||
|
||||
Maximum RAM usable from this provider's pools by default.
|
||||
|
||||
.. attr:: max-resources
|
||||
:type: dict
|
||||
:default: unlimited
|
||||
|
||||
A dictionary of other quota resource limits. AWS has quotas
|
||||
for certain instance types. These may be specified here to
|
||||
limit Nodepool's usage.
|
||||
|
||||
The following example limits the number of high-memory
|
||||
instance cores:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
max-resources:
|
||||
'L-43DA4232': 224
|
||||
|
||||
See `instance quotas`_ for more information.
|
||||
|
||||
.. attr:: launch-retries
|
||||
:default: 3
|
||||
|
||||
@ -379,6 +415,42 @@ Selecting the ``aws`` driver adds the following options to the
|
||||
A dictionary of key-value pairs that will be stored with the node data
|
||||
in ZooKeeper. The keys and values can be any arbitrary string.
|
||||
|
||||
.. attr:: max-cores
|
||||
:type: int
|
||||
|
||||
Maximum number of cores usable from this pool. Defaults to
|
||||
:attr:`providers.[aws].max-cores`.
|
||||
|
||||
.. attr:: max-servers
|
||||
:type: int
|
||||
|
||||
Maximum number of servers spawnable from this pool. Defaults to
|
||||
:attr:`providers.[aws].max-servers`.
|
||||
|
||||
.. attr:: max-ram
|
||||
:type: int
|
||||
|
||||
Maximum RAM usable from this pool. Defaults to
|
||||
:attr:`providers.[aws].max-ram`.
|
||||
|
||||
.. attr:: max-resources
|
||||
:type: dict
|
||||
|
||||
A dictionary of other quota resource limits. AWS has quotas
|
||||
for certain instance types. These may be specified here to
|
||||
limit Nodepool's usage. Defaults to
|
||||
:attr:`providers.[aws].max-resources`.
|
||||
|
||||
The following example limits the number of high-memory
|
||||
instance cores:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
max-resources:
|
||||
'L-43DA4232': 224
|
||||
|
||||
See `instance quotas`_ for more information.
|
||||
|
||||
.. attr:: subnet-id
|
||||
|
||||
If provided, specifies the subnet to assign to the primary network
|
||||
@ -538,3 +610,4 @@ Selecting the ``aws`` driver adds the following options to the
|
||||
.. _`Boto configuration`: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
|
||||
.. _`Boto describe images`: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_images
|
||||
.. _`VM Import/Export service role`: https://docs.aws.amazon.com/vm-import/latest/userguide/vmie_prereqs.html#vmimport-role
|
||||
.. _`instance quotas`: https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
|
||||
|
@ -548,8 +548,13 @@ Options
|
||||
max-servers: 10
|
||||
max-cores: 200
|
||||
max-ram: 16565
|
||||
'L-43DA4232': 224
|
||||
|
||||
Each entry is a dictionary with the following keys.
|
||||
Each entry is a dictionary with the following keys. Any other keys
|
||||
are interpreted as driver-specific resource limits (otherwise
|
||||
specified as ``max-resources`` in the provider configuration). The
|
||||
only driver that currently supports additional resource limits is
|
||||
AWS.
|
||||
|
||||
.. attr:: tenant-name
|
||||
:type: str
|
||||
|
@ -67,6 +67,7 @@ class ConfigValidator:
|
||||
'max-cores': int,
|
||||
'max-ram': int,
|
||||
'max-servers': int,
|
||||
str: int,
|
||||
}
|
||||
|
||||
top_level = {
|
||||
|
@ -255,19 +255,14 @@ class Config(ConfigValue):
|
||||
if not tenant_resource_limits_cfg:
|
||||
return
|
||||
for resource_limit in tenant_resource_limits_cfg:
|
||||
tenant_name = resource_limit['tenant-name']
|
||||
max_cores = resource_limit.get('max-cores')
|
||||
max_ram = resource_limit.get('max-ram')
|
||||
max_servers = resource_limit.get('max-servers')
|
||||
|
||||
resource_limit = resource_limit.copy()
|
||||
tenant_name = resource_limit.pop('tenant-name')
|
||||
limits = {}
|
||||
if max_cores:
|
||||
limits['cores'] = max_cores
|
||||
if max_servers:
|
||||
limits['instances'] = max_servers
|
||||
if max_ram:
|
||||
limits['ram'] = max_ram
|
||||
|
||||
limits['cores'] = resource_limit.pop('max-cores', math.inf)
|
||||
limits['instances'] = resource_limit.pop('max-servers', math.inf)
|
||||
limits['ram'] = resource_limit.pop('max-ram', math.inf)
|
||||
for k, v in resource_limit.items():
|
||||
limits[k] = v
|
||||
self.tenant_resource_limits[tenant_name] = limits
|
||||
|
||||
|
||||
|
@ -42,6 +42,41 @@ def tag_list_to_dict(taglist):
|
||||
return {t["Key"]: t["Value"] for t in taglist}
|
||||
|
||||
|
||||
# This is a map of instance types to quota codes. There does not
|
||||
# appear to be an automated way to determine what quota code to use
|
||||
# for an instance type, therefore this list was manually created by
|
||||
# visiting
|
||||
# https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
|
||||
# and filtering by "Instances". An example description is "Running
|
||||
# On-Demand P instances" which we can infer means we should use that
|
||||
# quota code for instance types starting with the letter "p". All
|
||||
# instance type names follow the format "([a-z\-]+)\d", so we can
|
||||
# match the first letters (up to the first number) of the instance
|
||||
# type name with the letters in the quota name. The prefix "u-" for
|
||||
# "Running On-Demand High Memory instances" was determined from
|
||||
# https://aws.amazon.com/ec2/instance-types/high-memory/
|
||||
|
||||
QUOTA_CODES = {
|
||||
'a': 'L-1216C47A',
|
||||
'c': 'L-1216C47A',
|
||||
'd': 'L-1216C47A',
|
||||
'h': 'L-1216C47A',
|
||||
'i': 'L-1216C47A',
|
||||
'm': 'L-1216C47A',
|
||||
'r': 'L-1216C47A',
|
||||
't': 'L-1216C47A',
|
||||
'z': 'L-1216C47A',
|
||||
'dl': 'L-6E869C2A',
|
||||
'f': 'L-74FC7D96',
|
||||
'g': 'L-DB2E81BA',
|
||||
'vt': 'L-DB2E81BA',
|
||||
'u-': 'L-43DA4232', # 'high memory'
|
||||
'inf': 'L-1945791B',
|
||||
'p': 'L-417A185B',
|
||||
'x': 'L-7295265B',
|
||||
}
|
||||
|
||||
|
||||
class AwsInstance(statemachine.Instance):
|
||||
def __init__(self, instance, quota):
|
||||
super().__init__()
|
||||
@ -293,15 +328,28 @@ class AwsAdapter(statemachine.Adapter):
|
||||
yield AwsInstance(instance, quota)
|
||||
|
||||
def getQuotaLimits(self):
|
||||
with self.non_mutating_rate_limiter:
|
||||
self.log.debug("Getting quota limits")
|
||||
response = self.aws_quotas.get_service_quota(
|
||||
ServiceCode='ec2',
|
||||
QuotaCode='L-1216C47A'
|
||||
)
|
||||
cores = response['Quota']['Value']
|
||||
return QuotaInformation(cores=cores,
|
||||
default=math.inf)
|
||||
# Get the instance types that this provider handles
|
||||
instance_types = set()
|
||||
for pool in self.provider.pools.values():
|
||||
for label in pool.labels.values():
|
||||
instance_types.add(label.instance_type)
|
||||
args = dict(default=math.inf)
|
||||
for instance_type in instance_types:
|
||||
code = self._getQuotaCodeForInstanceType(instance_type)
|
||||
if code in args:
|
||||
continue
|
||||
if not code:
|
||||
self.log.warning("Unknown quota code for instance type: %s",
|
||||
instance_type)
|
||||
continue
|
||||
with self.non_mutating_rate_limiter:
|
||||
self.log.debug("Getting quota limits for %s", code)
|
||||
response = self.aws_quotas.get_service_quota(
|
||||
ServiceCode='ec2',
|
||||
QuotaCode=code,
|
||||
)
|
||||
args[code] = response['Quota']['Value']
|
||||
return QuotaInformation(**args)
|
||||
|
||||
def getQuotaForLabel(self, label):
|
||||
return self._getQuotaForInstanceType(label.instance_type)
|
||||
@ -454,13 +502,27 @@ class AwsAdapter(statemachine.Adapter):
|
||||
# Return the first and only task
|
||||
return task
|
||||
|
||||
instance_key_re = re.compile(r'([a-z\-]+)\d.*')
|
||||
|
||||
def _getQuotaCodeForInstanceType(self, instance_type):
|
||||
m = self.instance_key_re.match(instance_type)
|
||||
if m:
|
||||
key = m.group(1)
|
||||
return QUOTA_CODES.get(key)
|
||||
|
||||
def _getQuotaForInstanceType(self, instance_type):
|
||||
itype = self._getInstanceType(instance_type)
|
||||
cores = itype['InstanceTypes'][0]['VCpuInfo']['DefaultCores']
|
||||
ram = itype['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']
|
||||
return QuotaInformation(cores=cores,
|
||||
ram=ram,
|
||||
instances=1)
|
||||
code = self._getQuotaCodeForInstanceType(instance_type)
|
||||
# We include cores twice: one to match the overall cores quota
|
||||
# (which may be set as a tenant resource limit), and a second
|
||||
# time as the specific AWS quota code which in for a specific
|
||||
# instance type.
|
||||
args = dict(cores=cores, ram=ram, instances=1)
|
||||
if code:
|
||||
args[code] = cores
|
||||
return QuotaInformation(**args)
|
||||
|
||||
@cachetools.func.lru_cache(maxsize=None)
|
||||
def _getInstanceType(self, instance_type):
|
||||
|
@ -15,6 +15,9 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from collections import defaultdict
|
||||
import math
|
||||
|
||||
import voluptuous as v
|
||||
|
||||
from nodepool.driver import ConfigPool
|
||||
@ -203,6 +206,13 @@ class AwsPool(ConfigPool):
|
||||
'use-internal-ip', self.provider.use_internal_ip)
|
||||
self.host_key_checking = pool_config.get(
|
||||
'host-key-checking', self.provider.host_key_checking)
|
||||
self.max_servers = pool_config.get(
|
||||
'max-servers', self.provider.max_servers)
|
||||
self.max_cores = pool_config.get('max-cores', self.provider.max_cores)
|
||||
self.max_ram = pool_config.get('max-ram', self.provider.max_ram)
|
||||
self.max_resources = self.provider.max_resources.copy()
|
||||
for k, val in pool_config.get('max-resources', {}).items():
|
||||
self.max_resources[k] = val
|
||||
|
||||
@staticmethod
|
||||
def getSchema():
|
||||
@ -218,6 +228,9 @@ class AwsPool(ConfigPool):
|
||||
'public-ipv4': bool,
|
||||
'public-ipv6': bool,
|
||||
'host-key-checking': bool,
|
||||
'max-cores': int,
|
||||
'max-ram': int,
|
||||
'max-resources': {str: int},
|
||||
})
|
||||
return pool
|
||||
|
||||
@ -263,6 +276,12 @@ class AwsProviderConfig(ProviderConfig):
|
||||
self.image_type = self.provider.get('image-format', 'raw')
|
||||
self.image_name_format = '{image_name}-{timestamp}'
|
||||
self.post_upload_hook = self.provider.get('post-upload-hook')
|
||||
self.max_servers = self.provider.get('max-servers', math.inf)
|
||||
self.max_cores = self.provider.get('max-cores', math.inf)
|
||||
self.max_ram = self.provider.get('max-ram', math.inf)
|
||||
self.max_resources = defaultdict(lambda: math.inf)
|
||||
for k, val in self.provider.get('max-resources', {}).items():
|
||||
self.max_resources[k] = val
|
||||
|
||||
self.cloud_images = {}
|
||||
for image in self.provider.get('cloud-images', []):
|
||||
@ -305,6 +324,10 @@ class AwsProviderConfig(ProviderConfig):
|
||||
'launch-retries': int,
|
||||
'object-storage': object_storage,
|
||||
'image-format': v.Any('ova', 'vhd', 'vhdx', 'vmdk', 'raw'),
|
||||
'max-servers': int,
|
||||
'max-cores': int,
|
||||
'max-ram': int,
|
||||
'max-resources': {str: int},
|
||||
})
|
||||
return v.Schema(provider)
|
||||
|
||||
|
@ -123,6 +123,10 @@ class StateMachineNodeLauncher(stats.StatsReporter):
|
||||
self.node.shell_type = image.shell_type
|
||||
self.node.connection_port = image.connection_port
|
||||
self.node.connection_type = image.connection_type
|
||||
qi = self.manager.quotaNeededByLabel(label.name, self.handler.pool)
|
||||
if qi:
|
||||
self.node.resources = qi.get_resources()
|
||||
|
||||
self.zk.storeNode(self.node)
|
||||
|
||||
# Windows computer names can be no more than 15 chars long.
|
||||
@ -386,11 +390,14 @@ class StateMachineHandler(NodeRequestHandler):
|
||||
|
||||
# Now calculate pool specific quota. Values indicating no quota default
|
||||
# to math.inf representing infinity that can be calculated with.
|
||||
pool_quota = QuotaInformation(
|
||||
args = dict(
|
||||
cores=getattr(self.pool, 'max_cores', None),
|
||||
instances=self.pool.max_servers,
|
||||
ram=getattr(self.pool, 'max_ram', None),
|
||||
default=math.inf)
|
||||
default=math.inf,
|
||||
)
|
||||
args.update(getattr(self.pool, 'max_resources', {}))
|
||||
pool_quota = QuotaInformation(**args)
|
||||
pool_quota.subtract(needed_quota)
|
||||
return pool_quota.non_negative()
|
||||
|
||||
@ -403,6 +410,7 @@ class StateMachineHandler(NodeRequestHandler):
|
||||
:return: True if there is enough quota, False otherwise
|
||||
'''
|
||||
needed_quota = self.manager.quotaNeededByLabel(ntype, self.pool)
|
||||
self.log.debug("Needed quota: %s", needed_quota)
|
||||
|
||||
# Calculate remaining quota which is calculated as:
|
||||
# quota = <total nodepool quota> - <used quota> - <quota for node>
|
||||
@ -418,11 +426,14 @@ class StateMachineHandler(NodeRequestHandler):
|
||||
|
||||
# Now calculate pool specific quota. Values indicating no quota default
|
||||
# to math.inf representing infinity that can be calculated with.
|
||||
pool_quota = QuotaInformation(
|
||||
args = dict(
|
||||
cores=getattr(self.pool, 'max_cores', None),
|
||||
instances=self.pool.max_servers,
|
||||
ram=getattr(self.pool, 'max_ram', None),
|
||||
default=math.inf)
|
||||
default=math.inf,
|
||||
)
|
||||
args.update(getattr(self.pool, 'max_resources', {}))
|
||||
pool_quota = QuotaInformation(**args)
|
||||
pool_quota.subtract(
|
||||
self.manager.estimatedNodepoolQuotaUsed(self.pool))
|
||||
self.log.debug("Current pool quota: %s" % pool_quota)
|
||||
|
@ -1,4 +1,5 @@
|
||||
# Copyright (C) 2018 Red Hat
|
||||
# Copyright 2022 Acme Gating, LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@ -179,7 +180,7 @@ class NodeDeleter(threading.Thread):
|
||||
|
||||
class QuotaInformation:
|
||||
|
||||
def __init__(self, cores=None, instances=None, ram=None, default=0):
|
||||
def __init__(self, cores=None, instances=None, ram=None, default=0, **kw):
|
||||
'''
|
||||
Initializes the quota information with some values. None values will
|
||||
be initialized with default which will be typically 0 or math.inf
|
||||
@ -202,6 +203,9 @@ class QuotaInformation:
|
||||
'ram': self._get_default(ram, default),
|
||||
}
|
||||
}
|
||||
for k, v in kw.items():
|
||||
self.quota['compute'][k] = v
|
||||
self.default = default
|
||||
|
||||
@staticmethod
|
||||
def construct_from_flavor(flavor):
|
||||
@ -225,9 +229,14 @@ class QuotaInformation:
|
||||
return value if value is not None else default
|
||||
|
||||
def _add_subtract(self, other, add=True):
|
||||
for category in other.quota.keys():
|
||||
self.quota.setdefault(category, {})
|
||||
for resource in other.quota[category].keys():
|
||||
self.quota[category].setdefault(resource, self.default)
|
||||
for category in self.quota.keys():
|
||||
for resource in self.quota[category].keys():
|
||||
second_value = other.quota.get(category, {}).get(resource, 0)
|
||||
second_value = other.quota.get(category, {}).get(
|
||||
resource, other.default)
|
||||
if add:
|
||||
self.quota[category][resource] += second_value
|
||||
else:
|
||||
|
46
nodepool/tests/fixtures/aws/aws-limits.yaml
vendored
Normal file
46
nodepool/tests/fixtures/aws/aws-limits.yaml
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
zookeeper-tls:
|
||||
ca: {zookeeper_ca}
|
||||
cert: {zookeeper_cert}
|
||||
key: {zookeeper_key}
|
||||
|
||||
tenant-resource-limits:
|
||||
- tenant-name: tenant-1
|
||||
max-cores: 1024
|
||||
'L-43DA4232': 224 # high mem cores
|
||||
|
||||
labels:
|
||||
- name: standard
|
||||
- name: high
|
||||
|
||||
providers:
|
||||
- name: ec2-us-west-2
|
||||
driver: aws
|
||||
region-name: us-west-2
|
||||
cloud-images:
|
||||
- name: ubuntu1404
|
||||
image-id: ami-1e749f67
|
||||
username: ubuntu
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 10
|
||||
subnet-id: {subnet_id}
|
||||
security-group-id: {security_group_id}
|
||||
node-attributes:
|
||||
key1: value1
|
||||
key2: value2
|
||||
max-resources:
|
||||
'L-1216C47A': 1 # standard cores
|
||||
labels:
|
||||
- name: standard
|
||||
cloud-image: ubuntu1404
|
||||
instance-type: t3.medium
|
||||
key-name: zuul
|
||||
- name: high
|
||||
cloud-image: ubuntu1404
|
||||
instance-type: u-6tb1.112xlarge
|
||||
key-name: zuul
|
43
nodepool/tests/fixtures/aws/aws-quota.yaml
vendored
Normal file
43
nodepool/tests/fixtures/aws/aws-quota.yaml
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
zookeeper-tls:
|
||||
ca: {zookeeper_ca}
|
||||
cert: {zookeeper_cert}
|
||||
key: {zookeeper_key}
|
||||
|
||||
tenant-resource-limits:
|
||||
- tenant-name: tenant-1
|
||||
max-cores: 1024
|
||||
|
||||
labels:
|
||||
- name: standard
|
||||
- name: high
|
||||
|
||||
providers:
|
||||
- name: ec2-us-west-2
|
||||
driver: aws
|
||||
region-name: us-west-2
|
||||
cloud-images:
|
||||
- name: ubuntu1404
|
||||
image-id: ami-1e749f67
|
||||
username: ubuntu
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 10
|
||||
subnet-id: {subnet_id}
|
||||
security-group-id: {security_group_id}
|
||||
node-attributes:
|
||||
key1: value1
|
||||
key2: value2
|
||||
labels:
|
||||
- name: standard
|
||||
cloud-image: ubuntu1404
|
||||
instance-type: t3.medium
|
||||
key-name: zuul
|
||||
- name: high
|
||||
cloud-image: ubuntu1404
|
||||
instance-type: u-6tb1.112xlarge
|
||||
key-name: zuul
|
@ -114,7 +114,8 @@ class TestDriverAws(tests.DBTestCase):
|
||||
kw['security_group_id'] = self.security_group_id
|
||||
return super().setup_config(*args, **kw)
|
||||
|
||||
def patchProvider(self, nodepool, provider_name='ec2-us-west-2'):
|
||||
def patchProvider(self, nodepool, provider_name='ec2-us-west-2',
|
||||
quotas=None):
|
||||
for _ in iterate_timeout(
|
||||
30, Exception, 'wait for provider'):
|
||||
try:
|
||||
@ -138,10 +139,13 @@ class TestDriverAws(tests.DBTestCase):
|
||||
_fake_create_instances
|
||||
|
||||
# moto does not mock service-quotas, so we do it ourselves:
|
||||
def _fake_get_service_quota(*args, **kwargs):
|
||||
def _fake_get_service_quota(ServiceCode, QuotaCode, *args, **kwargs):
|
||||
# This is a simple fake that only returns the number
|
||||
# of cores.
|
||||
return {'Quota': {'Value': 100}}
|
||||
if quotas is None:
|
||||
return {'Quota': {'Value': 100}}
|
||||
else:
|
||||
return {'Quota': {'Value': quotas.get(QuotaCode)}}
|
||||
provider_manager.adapter.aws_quotas.get_service_quota =\
|
||||
_fake_get_service_quota
|
||||
|
||||
@ -204,6 +208,149 @@ class TestDriverAws(tests.DBTestCase):
|
||||
for node in nodes:
|
||||
self.waitForNodeDeletion(node)
|
||||
|
||||
def test_aws_multi_quota(self):
|
||||
# Test multiple instance type quotas (standard and high-mem)
|
||||
configfile = self.setup_config('aws/aws-quota.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
self.patchProvider(pool, quotas={
|
||||
'L-1216C47A': 1,
|
||||
'L-43DA4232': 224,
|
||||
})
|
||||
|
||||
# Create a high-memory node request.
|
||||
req1 = zk.NodeRequest()
|
||||
req1.state = zk.REQUESTED
|
||||
req1.node_types.append('high')
|
||||
self.zk.storeNodeRequest(req1)
|
||||
self.log.debug("Waiting for request %s", req1.id)
|
||||
req1 = self.waitForNodeRequest(req1)
|
||||
node1 = self.assertSuccess(req1)
|
||||
|
||||
# Create a second high-memory node request; this should be
|
||||
# over quota so it won't be fulfilled.
|
||||
req2 = zk.NodeRequest()
|
||||
req2.state = zk.REQUESTED
|
||||
req2.node_types.append('high')
|
||||
self.zk.storeNodeRequest(req2)
|
||||
self.log.debug("Waiting for request %s", req2.id)
|
||||
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
|
||||
|
||||
# Make sure we're paused while we attempt to fulfill the
|
||||
# second request.
|
||||
pool_worker = pool.getPoolWorkers('ec2-us-west-2')
|
||||
for _ in iterate_timeout(30, Exception, 'paused handler'):
|
||||
if pool_worker[0].paused_handler:
|
||||
break
|
||||
|
||||
# Release the first node so that the second can be fulfilled.
|
||||
node1.state = zk.USED
|
||||
self.zk.storeNode(node1)
|
||||
self.waitForNodeDeletion(node1)
|
||||
|
||||
# Make sure the second high node exists now.
|
||||
req2 = self.waitForNodeRequest(req2)
|
||||
self.assertSuccess(req2)
|
||||
|
||||
# Create a standard node request which should succeed even
|
||||
# though we're at quota for high-mem (but not standard).
|
||||
req3 = zk.NodeRequest()
|
||||
req3.state = zk.REQUESTED
|
||||
req3.node_types.append('standard')
|
||||
self.zk.storeNodeRequest(req3)
|
||||
self.log.debug("Waiting for request %s", req3.id)
|
||||
req3 = self.waitForNodeRequest(req3)
|
||||
self.assertSuccess(req3)
|
||||
|
||||
def test_aws_multi_pool_limits(self):
|
||||
# Test multiple instance type quotas (standard and high-mem)
|
||||
# with pool resource limits
|
||||
configfile = self.setup_config('aws/aws-limits.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
self.patchProvider(pool, quotas={
|
||||
'L-1216C47A': 1000,
|
||||
'L-43DA4232': 1000,
|
||||
})
|
||||
|
||||
# Create a standard node request.
|
||||
req1 = zk.NodeRequest()
|
||||
req1.state = zk.REQUESTED
|
||||
req1.node_types.append('standard')
|
||||
self.zk.storeNodeRequest(req1)
|
||||
self.log.debug("Waiting for request %s", req1.id)
|
||||
req1 = self.waitForNodeRequest(req1)
|
||||
node1 = self.assertSuccess(req1)
|
||||
|
||||
# Create a second standard node request; this should be
|
||||
# over max-cores so it won't be fulfilled.
|
||||
req2 = zk.NodeRequest()
|
||||
req2.state = zk.REQUESTED
|
||||
req2.node_types.append('standard')
|
||||
self.zk.storeNodeRequest(req2)
|
||||
self.log.debug("Waiting for request %s", req2.id)
|
||||
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
|
||||
|
||||
# Make sure we're paused while we attempt to fulfill the
|
||||
# second request.
|
||||
pool_worker = pool.getPoolWorkers('ec2-us-west-2')
|
||||
for _ in iterate_timeout(30, Exception, 'paused handler'):
|
||||
if pool_worker[0].paused_handler:
|
||||
break
|
||||
|
||||
# Release the first node so that the second can be fulfilled.
|
||||
node1.state = zk.USED
|
||||
self.zk.storeNode(node1)
|
||||
self.waitForNodeDeletion(node1)
|
||||
|
||||
# Make sure the second standard node exists now.
|
||||
req2 = self.waitForNodeRequest(req2)
|
||||
self.assertSuccess(req2)
|
||||
|
||||
def test_aws_multi_tenant_limits(self):
|
||||
# Test multiple instance type quotas (standard and high-mem)
|
||||
# with tenant resource limits
|
||||
configfile = self.setup_config('aws/aws-limits.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
self.patchProvider(pool, quotas={
|
||||
'L-1216C47A': 1000,
|
||||
'L-43DA4232': 1000,
|
||||
})
|
||||
|
||||
# Create a high node request.
|
||||
req1 = zk.NodeRequest()
|
||||
req1.state = zk.REQUESTED
|
||||
req1.tenant_name = 'tenant-1'
|
||||
req1.node_types.append('high')
|
||||
self.zk.storeNodeRequest(req1)
|
||||
self.log.debug("Waiting for request %s", req1.id)
|
||||
req1 = self.waitForNodeRequest(req1)
|
||||
self.assertSuccess(req1)
|
||||
|
||||
# Create a second high node request; this should be
|
||||
# over quota so it won't be fulfilled.
|
||||
req2 = zk.NodeRequest()
|
||||
req2.state = zk.REQUESTED
|
||||
req2.tenant_name = 'tenant-1'
|
||||
req2.node_types.append('high')
|
||||
self.zk.storeNodeRequest(req2)
|
||||
req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
|
||||
|
||||
# Create a standard node request which should succeed even
|
||||
# though we're at quota for high-mem (but not standard).
|
||||
req3 = zk.NodeRequest()
|
||||
req3.state = zk.REQUESTED
|
||||
req3.tenant_name = 'tenant-1'
|
||||
req3.node_types.append('standard')
|
||||
self.zk.storeNodeRequest(req3)
|
||||
self.log.debug("Waiting for request %s", req3.id)
|
||||
req3 = self.waitForNodeRequest(req3)
|
||||
self.assertSuccess(req3)
|
||||
|
||||
# Assert that the second request is still being deferred
|
||||
req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
|
||||
|
||||
def test_aws_node(self):
|
||||
req = self.requestNode('aws/aws.yaml', 'ubuntu1404')
|
||||
node = self.assertSuccess(req)
|
||||
|
68
nodepool/tests/unit/test_utils.py
Normal file
68
nodepool/tests/unit/test_utils.py
Normal file
@ -0,0 +1,68 @@
|
||||
# Copyright 2022 Acme Gating, LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import copy
|
||||
import math
|
||||
|
||||
from nodepool import tests
|
||||
from nodepool.driver.utils import QuotaInformation
|
||||
|
||||
|
||||
class TestQutoInformation(tests.BaseTestCase):
|
||||
def test_subtract(self):
|
||||
provider = QuotaInformation(cores=8, ram=8192, default=math.inf)
|
||||
needed = QuotaInformation(cores=2, instances=1)
|
||||
expected = QuotaInformation(cores=6, instances=math.inf, ram=8192)
|
||||
|
||||
remain = copy.deepcopy(provider)
|
||||
remain.subtract(needed)
|
||||
|
||||
self.assertEqual(expected.quota, remain.quota)
|
||||
|
||||
def test_add(self):
|
||||
label1 = QuotaInformation(cores=8, ram=8192)
|
||||
label2 = QuotaInformation(cores=2, instances=1)
|
||||
|
||||
needed = copy.deepcopy(label1)
|
||||
needed.add(label2)
|
||||
expected = QuotaInformation(cores=10, instances=1, ram=8192)
|
||||
self.assertEqual(expected.quota, needed.quota)
|
||||
|
||||
def test_extra(self):
|
||||
# Test extra quota fields
|
||||
|
||||
# We call them red_, blue_, green_
|
||||
# cores here. They are arbitrary names other than the
|
||||
# standard cores, ram, instances.
|
||||
label1 = QuotaInformation(cores=8, ram=8192,
|
||||
red_cores=8, green_cores=8)
|
||||
label2 = QuotaInformation(cores=2, instances=1, blue_cores=2)
|
||||
|
||||
needed = copy.deepcopy(label1)
|
||||
needed.add(label2)
|
||||
expected = QuotaInformation(cores=10, instances=1, ram=8192,
|
||||
red_cores=8, blue_cores=2,
|
||||
green_cores=8)
|
||||
self.assertEqual(expected.quota, needed.quota)
|
||||
|
||||
provider = QuotaInformation(cores=8, ram=8192, default=math.inf,
|
||||
green_cores=16)
|
||||
expected = QuotaInformation(cores=-2, instances=math.inf, ram=0,
|
||||
red_cores=math.inf, blue_cores=math.inf,
|
||||
green_cores=8)
|
||||
|
||||
remain = copy.deepcopy(provider)
|
||||
remain.subtract(needed)
|
||||
|
||||
self.assertEqual(expected.quota, remain.quota)
|
7
releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
Normal file
7
releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
The AWS driver now supports multiple quotas for specific instance
|
||||
types. This support is automatic, but also includes corresponding
|
||||
enhancements to provider, pool, and tenant limits configured in
|
||||
Nodepool.
|
Loading…
Reference in New Issue
Block a user