Merge "AWS multi quota support"
This commit is contained in:
commit
123a32f922
@ -109,6 +109,42 @@ Selecting the ``aws`` driver adds the following options to the
|
|||||||
until that instance is reported as "active". If the timeout is
|
until that instance is reported as "active". If the timeout is
|
||||||
exceeded, the node launch is aborted and the instance deleted.
|
exceeded, the node launch is aborted and the instance deleted.
|
||||||
|
|
||||||
|
.. attr:: max-cores
|
||||||
|
:type: int
|
||||||
|
:default: unlimited
|
||||||
|
|
||||||
|
Maximum number of cores usable from this provider's pools by default.
|
||||||
|
|
||||||
|
.. attr:: max-servers
|
||||||
|
:type: int
|
||||||
|
:default: unlimited
|
||||||
|
|
||||||
|
Maximum number of servers spawnable from this provider's pools by default.
|
||||||
|
|
||||||
|
.. attr:: max-ram
|
||||||
|
:type: int
|
||||||
|
:default: unlimited
|
||||||
|
|
||||||
|
Maximum RAM usable from this provider's pools by default.
|
||||||
|
|
||||||
|
.. attr:: max-resources
|
||||||
|
:type: dict
|
||||||
|
:default: unlimited
|
||||||
|
|
||||||
|
A dictionary of other quota resource limits. AWS has quotas
|
||||||
|
for certain instance types. These may be specified here to
|
||||||
|
limit Nodepool's usage.
|
||||||
|
|
||||||
|
The following example limits the number of high-memory
|
||||||
|
instance cores:
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
max-resources:
|
||||||
|
'L-43DA4232': 224
|
||||||
|
|
||||||
|
See `instance quotas`_ for more information.
|
||||||
|
|
||||||
.. attr:: launch-retries
|
.. attr:: launch-retries
|
||||||
:default: 3
|
:default: 3
|
||||||
|
|
||||||
@ -379,6 +415,42 @@ Selecting the ``aws`` driver adds the following options to the
|
|||||||
A dictionary of key-value pairs that will be stored with the node data
|
A dictionary of key-value pairs that will be stored with the node data
|
||||||
in ZooKeeper. The keys and values can be any arbitrary string.
|
in ZooKeeper. The keys and values can be any arbitrary string.
|
||||||
|
|
||||||
|
.. attr:: max-cores
|
||||||
|
:type: int
|
||||||
|
|
||||||
|
Maximum number of cores usable from this pool. Defaults to
|
||||||
|
:attr:`providers.[aws].max-cores`.
|
||||||
|
|
||||||
|
.. attr:: max-servers
|
||||||
|
:type: int
|
||||||
|
|
||||||
|
Maximum number of servers spawnable from this pool. Defaults to
|
||||||
|
:attr:`providers.[aws].max-servers`.
|
||||||
|
|
||||||
|
.. attr:: max-ram
|
||||||
|
:type: int
|
||||||
|
|
||||||
|
Maximum RAM usable from this pool. Defaults to
|
||||||
|
:attr:`providers.[aws].max-ram`.
|
||||||
|
|
||||||
|
.. attr:: max-resources
|
||||||
|
:type: dict
|
||||||
|
|
||||||
|
A dictionary of other quota resource limits. AWS has quotas
|
||||||
|
for certain instance types. These may be specified here to
|
||||||
|
limit Nodepool's usage. Defaults to
|
||||||
|
:attr:`providers.[aws].max-resources`.
|
||||||
|
|
||||||
|
The following example limits the number of high-memory
|
||||||
|
instance cores:
|
||||||
|
|
||||||
|
.. code-block:: yaml
|
||||||
|
|
||||||
|
max-resources:
|
||||||
|
'L-43DA4232': 224
|
||||||
|
|
||||||
|
See `instance quotas`_ for more information.
|
||||||
|
|
||||||
.. attr:: subnet-id
|
.. attr:: subnet-id
|
||||||
|
|
||||||
If provided, specifies the subnet to assign to the primary network
|
If provided, specifies the subnet to assign to the primary network
|
||||||
@ -538,3 +610,4 @@ Selecting the ``aws`` driver adds the following options to the
|
|||||||
.. _`Boto configuration`: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
|
.. _`Boto configuration`: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
|
||||||
.. _`Boto describe images`: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_images
|
.. _`Boto describe images`: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_images
|
||||||
.. _`VM Import/Export service role`: https://docs.aws.amazon.com/vm-import/latest/userguide/vmie_prereqs.html#vmimport-role
|
.. _`VM Import/Export service role`: https://docs.aws.amazon.com/vm-import/latest/userguide/vmie_prereqs.html#vmimport-role
|
||||||
|
.. _`instance quotas`: https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
|
||||||
|
@ -548,8 +548,13 @@ Options
|
|||||||
max-servers: 10
|
max-servers: 10
|
||||||
max-cores: 200
|
max-cores: 200
|
||||||
max-ram: 16565
|
max-ram: 16565
|
||||||
|
'L-43DA4232': 224
|
||||||
|
|
||||||
Each entry is a dictionary with the following keys.
|
Each entry is a dictionary with the following keys. Any other keys
|
||||||
|
are interpreted as driver-specific resource limits (otherwise
|
||||||
|
specified as ``max-resources`` in the provider configuration). The
|
||||||
|
only driver that currently supports additional resource limits is
|
||||||
|
AWS.
|
||||||
|
|
||||||
.. attr:: tenant-name
|
.. attr:: tenant-name
|
||||||
:type: str
|
:type: str
|
||||||
|
@ -67,6 +67,7 @@ class ConfigValidator:
|
|||||||
'max-cores': int,
|
'max-cores': int,
|
||||||
'max-ram': int,
|
'max-ram': int,
|
||||||
'max-servers': int,
|
'max-servers': int,
|
||||||
|
str: int,
|
||||||
}
|
}
|
||||||
|
|
||||||
top_level = {
|
top_level = {
|
||||||
|
@ -255,19 +255,14 @@ class Config(ConfigValue):
|
|||||||
if not tenant_resource_limits_cfg:
|
if not tenant_resource_limits_cfg:
|
||||||
return
|
return
|
||||||
for resource_limit in tenant_resource_limits_cfg:
|
for resource_limit in tenant_resource_limits_cfg:
|
||||||
tenant_name = resource_limit['tenant-name']
|
resource_limit = resource_limit.copy()
|
||||||
max_cores = resource_limit.get('max-cores')
|
tenant_name = resource_limit.pop('tenant-name')
|
||||||
max_ram = resource_limit.get('max-ram')
|
|
||||||
max_servers = resource_limit.get('max-servers')
|
|
||||||
|
|
||||||
limits = {}
|
limits = {}
|
||||||
if max_cores:
|
limits['cores'] = resource_limit.pop('max-cores', math.inf)
|
||||||
limits['cores'] = max_cores
|
limits['instances'] = resource_limit.pop('max-servers', math.inf)
|
||||||
if max_servers:
|
limits['ram'] = resource_limit.pop('max-ram', math.inf)
|
||||||
limits['instances'] = max_servers
|
for k, v in resource_limit.items():
|
||||||
if max_ram:
|
limits[k] = v
|
||||||
limits['ram'] = max_ram
|
|
||||||
|
|
||||||
self.tenant_resource_limits[tenant_name] = limits
|
self.tenant_resource_limits[tenant_name] = limits
|
||||||
|
|
||||||
|
|
||||||
|
@ -42,6 +42,41 @@ def tag_list_to_dict(taglist):
|
|||||||
return {t["Key"]: t["Value"] for t in taglist}
|
return {t["Key"]: t["Value"] for t in taglist}
|
||||||
|
|
||||||
|
|
||||||
|
# This is a map of instance types to quota codes. There does not
|
||||||
|
# appear to be an automated way to determine what quota code to use
|
||||||
|
# for an instance type, therefore this list was manually created by
|
||||||
|
# visiting
|
||||||
|
# https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
|
||||||
|
# and filtering by "Instances". An example description is "Running
|
||||||
|
# On-Demand P instances" which we can infer means we should use that
|
||||||
|
# quota code for instance types starting with the letter "p". All
|
||||||
|
# instance type names follow the format "([a-z\-]+)\d", so we can
|
||||||
|
# match the first letters (up to the first number) of the instance
|
||||||
|
# type name with the letters in the quota name. The prefix "u-" for
|
||||||
|
# "Running On-Demand High Memory instances" was determined from
|
||||||
|
# https://aws.amazon.com/ec2/instance-types/high-memory/
|
||||||
|
|
||||||
|
QUOTA_CODES = {
|
||||||
|
'a': 'L-1216C47A',
|
||||||
|
'c': 'L-1216C47A',
|
||||||
|
'd': 'L-1216C47A',
|
||||||
|
'h': 'L-1216C47A',
|
||||||
|
'i': 'L-1216C47A',
|
||||||
|
'm': 'L-1216C47A',
|
||||||
|
'r': 'L-1216C47A',
|
||||||
|
't': 'L-1216C47A',
|
||||||
|
'z': 'L-1216C47A',
|
||||||
|
'dl': 'L-6E869C2A',
|
||||||
|
'f': 'L-74FC7D96',
|
||||||
|
'g': 'L-DB2E81BA',
|
||||||
|
'vt': 'L-DB2E81BA',
|
||||||
|
'u-': 'L-43DA4232', # 'high memory'
|
||||||
|
'inf': 'L-1945791B',
|
||||||
|
'p': 'L-417A185B',
|
||||||
|
'x': 'L-7295265B',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class AwsInstance(statemachine.Instance):
|
class AwsInstance(statemachine.Instance):
|
||||||
def __init__(self, instance, quota):
|
def __init__(self, instance, quota):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@ -293,15 +328,28 @@ class AwsAdapter(statemachine.Adapter):
|
|||||||
yield AwsInstance(instance, quota)
|
yield AwsInstance(instance, quota)
|
||||||
|
|
||||||
def getQuotaLimits(self):
|
def getQuotaLimits(self):
|
||||||
|
# Get the instance types that this provider handles
|
||||||
|
instance_types = set()
|
||||||
|
for pool in self.provider.pools.values():
|
||||||
|
for label in pool.labels.values():
|
||||||
|
instance_types.add(label.instance_type)
|
||||||
|
args = dict(default=math.inf)
|
||||||
|
for instance_type in instance_types:
|
||||||
|
code = self._getQuotaCodeForInstanceType(instance_type)
|
||||||
|
if code in args:
|
||||||
|
continue
|
||||||
|
if not code:
|
||||||
|
self.log.warning("Unknown quota code for instance type: %s",
|
||||||
|
instance_type)
|
||||||
|
continue
|
||||||
with self.non_mutating_rate_limiter:
|
with self.non_mutating_rate_limiter:
|
||||||
self.log.debug("Getting quota limits")
|
self.log.debug("Getting quota limits for %s", code)
|
||||||
response = self.aws_quotas.get_service_quota(
|
response = self.aws_quotas.get_service_quota(
|
||||||
ServiceCode='ec2',
|
ServiceCode='ec2',
|
||||||
QuotaCode='L-1216C47A'
|
QuotaCode=code,
|
||||||
)
|
)
|
||||||
cores = response['Quota']['Value']
|
args[code] = response['Quota']['Value']
|
||||||
return QuotaInformation(cores=cores,
|
return QuotaInformation(**args)
|
||||||
default=math.inf)
|
|
||||||
|
|
||||||
def getQuotaForLabel(self, label):
|
def getQuotaForLabel(self, label):
|
||||||
return self._getQuotaForInstanceType(label.instance_type)
|
return self._getQuotaForInstanceType(label.instance_type)
|
||||||
@ -454,13 +502,27 @@ class AwsAdapter(statemachine.Adapter):
|
|||||||
# Return the first and only task
|
# Return the first and only task
|
||||||
return task
|
return task
|
||||||
|
|
||||||
|
instance_key_re = re.compile(r'([a-z\-]+)\d.*')
|
||||||
|
|
||||||
|
def _getQuotaCodeForInstanceType(self, instance_type):
|
||||||
|
m = self.instance_key_re.match(instance_type)
|
||||||
|
if m:
|
||||||
|
key = m.group(1)
|
||||||
|
return QUOTA_CODES.get(key)
|
||||||
|
|
||||||
def _getQuotaForInstanceType(self, instance_type):
|
def _getQuotaForInstanceType(self, instance_type):
|
||||||
itype = self._getInstanceType(instance_type)
|
itype = self._getInstanceType(instance_type)
|
||||||
cores = itype['InstanceTypes'][0]['VCpuInfo']['DefaultCores']
|
cores = itype['InstanceTypes'][0]['VCpuInfo']['DefaultCores']
|
||||||
ram = itype['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']
|
ram = itype['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']
|
||||||
return QuotaInformation(cores=cores,
|
code = self._getQuotaCodeForInstanceType(instance_type)
|
||||||
ram=ram,
|
# We include cores twice: one to match the overall cores quota
|
||||||
instances=1)
|
# (which may be set as a tenant resource limit), and a second
|
||||||
|
# time as the specific AWS quota code which in for a specific
|
||||||
|
# instance type.
|
||||||
|
args = dict(cores=cores, ram=ram, instances=1)
|
||||||
|
if code:
|
||||||
|
args[code] = cores
|
||||||
|
return QuotaInformation(**args)
|
||||||
|
|
||||||
@cachetools.func.lru_cache(maxsize=None)
|
@cachetools.func.lru_cache(maxsize=None)
|
||||||
def _getInstanceType(self, instance_type):
|
def _getInstanceType(self, instance_type):
|
||||||
|
@ -15,6 +15,9 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
import math
|
||||||
|
|
||||||
import voluptuous as v
|
import voluptuous as v
|
||||||
|
|
||||||
from nodepool.driver import ConfigPool
|
from nodepool.driver import ConfigPool
|
||||||
@ -203,6 +206,13 @@ class AwsPool(ConfigPool):
|
|||||||
'use-internal-ip', self.provider.use_internal_ip)
|
'use-internal-ip', self.provider.use_internal_ip)
|
||||||
self.host_key_checking = pool_config.get(
|
self.host_key_checking = pool_config.get(
|
||||||
'host-key-checking', self.provider.host_key_checking)
|
'host-key-checking', self.provider.host_key_checking)
|
||||||
|
self.max_servers = pool_config.get(
|
||||||
|
'max-servers', self.provider.max_servers)
|
||||||
|
self.max_cores = pool_config.get('max-cores', self.provider.max_cores)
|
||||||
|
self.max_ram = pool_config.get('max-ram', self.provider.max_ram)
|
||||||
|
self.max_resources = self.provider.max_resources.copy()
|
||||||
|
for k, val in pool_config.get('max-resources', {}).items():
|
||||||
|
self.max_resources[k] = val
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def getSchema():
|
def getSchema():
|
||||||
@ -218,6 +228,9 @@ class AwsPool(ConfigPool):
|
|||||||
'public-ipv4': bool,
|
'public-ipv4': bool,
|
||||||
'public-ipv6': bool,
|
'public-ipv6': bool,
|
||||||
'host-key-checking': bool,
|
'host-key-checking': bool,
|
||||||
|
'max-cores': int,
|
||||||
|
'max-ram': int,
|
||||||
|
'max-resources': {str: int},
|
||||||
})
|
})
|
||||||
return pool
|
return pool
|
||||||
|
|
||||||
@ -263,6 +276,12 @@ class AwsProviderConfig(ProviderConfig):
|
|||||||
self.image_type = self.provider.get('image-format', 'raw')
|
self.image_type = self.provider.get('image-format', 'raw')
|
||||||
self.image_name_format = '{image_name}-{timestamp}'
|
self.image_name_format = '{image_name}-{timestamp}'
|
||||||
self.post_upload_hook = self.provider.get('post-upload-hook')
|
self.post_upload_hook = self.provider.get('post-upload-hook')
|
||||||
|
self.max_servers = self.provider.get('max-servers', math.inf)
|
||||||
|
self.max_cores = self.provider.get('max-cores', math.inf)
|
||||||
|
self.max_ram = self.provider.get('max-ram', math.inf)
|
||||||
|
self.max_resources = defaultdict(lambda: math.inf)
|
||||||
|
for k, val in self.provider.get('max-resources', {}).items():
|
||||||
|
self.max_resources[k] = val
|
||||||
|
|
||||||
self.cloud_images = {}
|
self.cloud_images = {}
|
||||||
for image in self.provider.get('cloud-images', []):
|
for image in self.provider.get('cloud-images', []):
|
||||||
@ -305,6 +324,10 @@ class AwsProviderConfig(ProviderConfig):
|
|||||||
'launch-retries': int,
|
'launch-retries': int,
|
||||||
'object-storage': object_storage,
|
'object-storage': object_storage,
|
||||||
'image-format': v.Any('ova', 'vhd', 'vhdx', 'vmdk', 'raw'),
|
'image-format': v.Any('ova', 'vhd', 'vhdx', 'vmdk', 'raw'),
|
||||||
|
'max-servers': int,
|
||||||
|
'max-cores': int,
|
||||||
|
'max-ram': int,
|
||||||
|
'max-resources': {str: int},
|
||||||
})
|
})
|
||||||
return v.Schema(provider)
|
return v.Schema(provider)
|
||||||
|
|
||||||
|
@ -123,6 +123,10 @@ class StateMachineNodeLauncher(stats.StatsReporter):
|
|||||||
self.node.shell_type = image.shell_type
|
self.node.shell_type = image.shell_type
|
||||||
self.node.connection_port = image.connection_port
|
self.node.connection_port = image.connection_port
|
||||||
self.node.connection_type = image.connection_type
|
self.node.connection_type = image.connection_type
|
||||||
|
qi = self.manager.quotaNeededByLabel(label.name, self.handler.pool)
|
||||||
|
if qi:
|
||||||
|
self.node.resources = qi.get_resources()
|
||||||
|
|
||||||
self.zk.storeNode(self.node)
|
self.zk.storeNode(self.node)
|
||||||
|
|
||||||
# Windows computer names can be no more than 15 chars long.
|
# Windows computer names can be no more than 15 chars long.
|
||||||
@ -386,11 +390,14 @@ class StateMachineHandler(NodeRequestHandler):
|
|||||||
|
|
||||||
# Now calculate pool specific quota. Values indicating no quota default
|
# Now calculate pool specific quota. Values indicating no quota default
|
||||||
# to math.inf representing infinity that can be calculated with.
|
# to math.inf representing infinity that can be calculated with.
|
||||||
pool_quota = QuotaInformation(
|
args = dict(
|
||||||
cores=getattr(self.pool, 'max_cores', None),
|
cores=getattr(self.pool, 'max_cores', None),
|
||||||
instances=self.pool.max_servers,
|
instances=self.pool.max_servers,
|
||||||
ram=getattr(self.pool, 'max_ram', None),
|
ram=getattr(self.pool, 'max_ram', None),
|
||||||
default=math.inf)
|
default=math.inf,
|
||||||
|
)
|
||||||
|
args.update(getattr(self.pool, 'max_resources', {}))
|
||||||
|
pool_quota = QuotaInformation(**args)
|
||||||
pool_quota.subtract(needed_quota)
|
pool_quota.subtract(needed_quota)
|
||||||
return pool_quota.non_negative()
|
return pool_quota.non_negative()
|
||||||
|
|
||||||
@ -403,6 +410,7 @@ class StateMachineHandler(NodeRequestHandler):
|
|||||||
:return: True if there is enough quota, False otherwise
|
:return: True if there is enough quota, False otherwise
|
||||||
'''
|
'''
|
||||||
needed_quota = self.manager.quotaNeededByLabel(ntype, self.pool)
|
needed_quota = self.manager.quotaNeededByLabel(ntype, self.pool)
|
||||||
|
self.log.debug("Needed quota: %s", needed_quota)
|
||||||
|
|
||||||
# Calculate remaining quota which is calculated as:
|
# Calculate remaining quota which is calculated as:
|
||||||
# quota = <total nodepool quota> - <used quota> - <quota for node>
|
# quota = <total nodepool quota> - <used quota> - <quota for node>
|
||||||
@ -418,11 +426,14 @@ class StateMachineHandler(NodeRequestHandler):
|
|||||||
|
|
||||||
# Now calculate pool specific quota. Values indicating no quota default
|
# Now calculate pool specific quota. Values indicating no quota default
|
||||||
# to math.inf representing infinity that can be calculated with.
|
# to math.inf representing infinity that can be calculated with.
|
||||||
pool_quota = QuotaInformation(
|
args = dict(
|
||||||
cores=getattr(self.pool, 'max_cores', None),
|
cores=getattr(self.pool, 'max_cores', None),
|
||||||
instances=self.pool.max_servers,
|
instances=self.pool.max_servers,
|
||||||
ram=getattr(self.pool, 'max_ram', None),
|
ram=getattr(self.pool, 'max_ram', None),
|
||||||
default=math.inf)
|
default=math.inf,
|
||||||
|
)
|
||||||
|
args.update(getattr(self.pool, 'max_resources', {}))
|
||||||
|
pool_quota = QuotaInformation(**args)
|
||||||
pool_quota.subtract(
|
pool_quota.subtract(
|
||||||
self.manager.estimatedNodepoolQuotaUsed(self.pool))
|
self.manager.estimatedNodepoolQuotaUsed(self.pool))
|
||||||
self.log.debug("Current pool quota: %s" % pool_quota)
|
self.log.debug("Current pool quota: %s" % pool_quota)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
# Copyright (C) 2018 Red Hat
|
# Copyright (C) 2018 Red Hat
|
||||||
|
# Copyright 2022 Acme Gating, LLC
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
@ -179,7 +180,7 @@ class NodeDeleter(threading.Thread):
|
|||||||
|
|
||||||
class QuotaInformation:
|
class QuotaInformation:
|
||||||
|
|
||||||
def __init__(self, cores=None, instances=None, ram=None, default=0):
|
def __init__(self, cores=None, instances=None, ram=None, default=0, **kw):
|
||||||
'''
|
'''
|
||||||
Initializes the quota information with some values. None values will
|
Initializes the quota information with some values. None values will
|
||||||
be initialized with default which will be typically 0 or math.inf
|
be initialized with default which will be typically 0 or math.inf
|
||||||
@ -202,6 +203,9 @@ class QuotaInformation:
|
|||||||
'ram': self._get_default(ram, default),
|
'ram': self._get_default(ram, default),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for k, v in kw.items():
|
||||||
|
self.quota['compute'][k] = v
|
||||||
|
self.default = default
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def construct_from_flavor(flavor):
|
def construct_from_flavor(flavor):
|
||||||
@ -225,9 +229,14 @@ class QuotaInformation:
|
|||||||
return value if value is not None else default
|
return value if value is not None else default
|
||||||
|
|
||||||
def _add_subtract(self, other, add=True):
|
def _add_subtract(self, other, add=True):
|
||||||
|
for category in other.quota.keys():
|
||||||
|
self.quota.setdefault(category, {})
|
||||||
|
for resource in other.quota[category].keys():
|
||||||
|
self.quota[category].setdefault(resource, self.default)
|
||||||
for category in self.quota.keys():
|
for category in self.quota.keys():
|
||||||
for resource in self.quota[category].keys():
|
for resource in self.quota[category].keys():
|
||||||
second_value = other.quota.get(category, {}).get(resource, 0)
|
second_value = other.quota.get(category, {}).get(
|
||||||
|
resource, other.default)
|
||||||
if add:
|
if add:
|
||||||
self.quota[category][resource] += second_value
|
self.quota[category][resource] += second_value
|
||||||
else:
|
else:
|
||||||
|
46
nodepool/tests/fixtures/aws/aws-limits.yaml
vendored
Normal file
46
nodepool/tests/fixtures/aws/aws-limits.yaml
vendored
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
zookeeper-servers:
|
||||||
|
- host: {zookeeper_host}
|
||||||
|
port: {zookeeper_port}
|
||||||
|
chroot: {zookeeper_chroot}
|
||||||
|
|
||||||
|
zookeeper-tls:
|
||||||
|
ca: {zookeeper_ca}
|
||||||
|
cert: {zookeeper_cert}
|
||||||
|
key: {zookeeper_key}
|
||||||
|
|
||||||
|
tenant-resource-limits:
|
||||||
|
- tenant-name: tenant-1
|
||||||
|
max-cores: 1024
|
||||||
|
'L-43DA4232': 224 # high mem cores
|
||||||
|
|
||||||
|
labels:
|
||||||
|
- name: standard
|
||||||
|
- name: high
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: ec2-us-west-2
|
||||||
|
driver: aws
|
||||||
|
region-name: us-west-2
|
||||||
|
cloud-images:
|
||||||
|
- name: ubuntu1404
|
||||||
|
image-id: ami-1e749f67
|
||||||
|
username: ubuntu
|
||||||
|
pools:
|
||||||
|
- name: main
|
||||||
|
max-servers: 10
|
||||||
|
subnet-id: {subnet_id}
|
||||||
|
security-group-id: {security_group_id}
|
||||||
|
node-attributes:
|
||||||
|
key1: value1
|
||||||
|
key2: value2
|
||||||
|
max-resources:
|
||||||
|
'L-1216C47A': 1 # standard cores
|
||||||
|
labels:
|
||||||
|
- name: standard
|
||||||
|
cloud-image: ubuntu1404
|
||||||
|
instance-type: t3.medium
|
||||||
|
key-name: zuul
|
||||||
|
- name: high
|
||||||
|
cloud-image: ubuntu1404
|
||||||
|
instance-type: u-6tb1.112xlarge
|
||||||
|
key-name: zuul
|
43
nodepool/tests/fixtures/aws/aws-quota.yaml
vendored
Normal file
43
nodepool/tests/fixtures/aws/aws-quota.yaml
vendored
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
zookeeper-servers:
|
||||||
|
- host: {zookeeper_host}
|
||||||
|
port: {zookeeper_port}
|
||||||
|
chroot: {zookeeper_chroot}
|
||||||
|
|
||||||
|
zookeeper-tls:
|
||||||
|
ca: {zookeeper_ca}
|
||||||
|
cert: {zookeeper_cert}
|
||||||
|
key: {zookeeper_key}
|
||||||
|
|
||||||
|
tenant-resource-limits:
|
||||||
|
- tenant-name: tenant-1
|
||||||
|
max-cores: 1024
|
||||||
|
|
||||||
|
labels:
|
||||||
|
- name: standard
|
||||||
|
- name: high
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: ec2-us-west-2
|
||||||
|
driver: aws
|
||||||
|
region-name: us-west-2
|
||||||
|
cloud-images:
|
||||||
|
- name: ubuntu1404
|
||||||
|
image-id: ami-1e749f67
|
||||||
|
username: ubuntu
|
||||||
|
pools:
|
||||||
|
- name: main
|
||||||
|
max-servers: 10
|
||||||
|
subnet-id: {subnet_id}
|
||||||
|
security-group-id: {security_group_id}
|
||||||
|
node-attributes:
|
||||||
|
key1: value1
|
||||||
|
key2: value2
|
||||||
|
labels:
|
||||||
|
- name: standard
|
||||||
|
cloud-image: ubuntu1404
|
||||||
|
instance-type: t3.medium
|
||||||
|
key-name: zuul
|
||||||
|
- name: high
|
||||||
|
cloud-image: ubuntu1404
|
||||||
|
instance-type: u-6tb1.112xlarge
|
||||||
|
key-name: zuul
|
@ -114,7 +114,8 @@ class TestDriverAws(tests.DBTestCase):
|
|||||||
kw['security_group_id'] = self.security_group_id
|
kw['security_group_id'] = self.security_group_id
|
||||||
return super().setup_config(*args, **kw)
|
return super().setup_config(*args, **kw)
|
||||||
|
|
||||||
def patchProvider(self, nodepool, provider_name='ec2-us-west-2'):
|
def patchProvider(self, nodepool, provider_name='ec2-us-west-2',
|
||||||
|
quotas=None):
|
||||||
for _ in iterate_timeout(
|
for _ in iterate_timeout(
|
||||||
30, Exception, 'wait for provider'):
|
30, Exception, 'wait for provider'):
|
||||||
try:
|
try:
|
||||||
@ -138,10 +139,13 @@ class TestDriverAws(tests.DBTestCase):
|
|||||||
_fake_create_instances
|
_fake_create_instances
|
||||||
|
|
||||||
# moto does not mock service-quotas, so we do it ourselves:
|
# moto does not mock service-quotas, so we do it ourselves:
|
||||||
def _fake_get_service_quota(*args, **kwargs):
|
def _fake_get_service_quota(ServiceCode, QuotaCode, *args, **kwargs):
|
||||||
# This is a simple fake that only returns the number
|
# This is a simple fake that only returns the number
|
||||||
# of cores.
|
# of cores.
|
||||||
|
if quotas is None:
|
||||||
return {'Quota': {'Value': 100}}
|
return {'Quota': {'Value': 100}}
|
||||||
|
else:
|
||||||
|
return {'Quota': {'Value': quotas.get(QuotaCode)}}
|
||||||
provider_manager.adapter.aws_quotas.get_service_quota =\
|
provider_manager.adapter.aws_quotas.get_service_quota =\
|
||||||
_fake_get_service_quota
|
_fake_get_service_quota
|
||||||
|
|
||||||
@ -204,6 +208,149 @@ class TestDriverAws(tests.DBTestCase):
|
|||||||
for node in nodes:
|
for node in nodes:
|
||||||
self.waitForNodeDeletion(node)
|
self.waitForNodeDeletion(node)
|
||||||
|
|
||||||
|
def test_aws_multi_quota(self):
|
||||||
|
# Test multiple instance type quotas (standard and high-mem)
|
||||||
|
configfile = self.setup_config('aws/aws-quota.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
pool.start()
|
||||||
|
self.patchProvider(pool, quotas={
|
||||||
|
'L-1216C47A': 1,
|
||||||
|
'L-43DA4232': 224,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a high-memory node request.
|
||||||
|
req1 = zk.NodeRequest()
|
||||||
|
req1.state = zk.REQUESTED
|
||||||
|
req1.node_types.append('high')
|
||||||
|
self.zk.storeNodeRequest(req1)
|
||||||
|
self.log.debug("Waiting for request %s", req1.id)
|
||||||
|
req1 = self.waitForNodeRequest(req1)
|
||||||
|
node1 = self.assertSuccess(req1)
|
||||||
|
|
||||||
|
# Create a second high-memory node request; this should be
|
||||||
|
# over quota so it won't be fulfilled.
|
||||||
|
req2 = zk.NodeRequest()
|
||||||
|
req2.state = zk.REQUESTED
|
||||||
|
req2.node_types.append('high')
|
||||||
|
self.zk.storeNodeRequest(req2)
|
||||||
|
self.log.debug("Waiting for request %s", req2.id)
|
||||||
|
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
|
||||||
|
|
||||||
|
# Make sure we're paused while we attempt to fulfill the
|
||||||
|
# second request.
|
||||||
|
pool_worker = pool.getPoolWorkers('ec2-us-west-2')
|
||||||
|
for _ in iterate_timeout(30, Exception, 'paused handler'):
|
||||||
|
if pool_worker[0].paused_handler:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Release the first node so that the second can be fulfilled.
|
||||||
|
node1.state = zk.USED
|
||||||
|
self.zk.storeNode(node1)
|
||||||
|
self.waitForNodeDeletion(node1)
|
||||||
|
|
||||||
|
# Make sure the second high node exists now.
|
||||||
|
req2 = self.waitForNodeRequest(req2)
|
||||||
|
self.assertSuccess(req2)
|
||||||
|
|
||||||
|
# Create a standard node request which should succeed even
|
||||||
|
# though we're at quota for high-mem (but not standard).
|
||||||
|
req3 = zk.NodeRequest()
|
||||||
|
req3.state = zk.REQUESTED
|
||||||
|
req3.node_types.append('standard')
|
||||||
|
self.zk.storeNodeRequest(req3)
|
||||||
|
self.log.debug("Waiting for request %s", req3.id)
|
||||||
|
req3 = self.waitForNodeRequest(req3)
|
||||||
|
self.assertSuccess(req3)
|
||||||
|
|
||||||
|
def test_aws_multi_pool_limits(self):
|
||||||
|
# Test multiple instance type quotas (standard and high-mem)
|
||||||
|
# with pool resource limits
|
||||||
|
configfile = self.setup_config('aws/aws-limits.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
pool.start()
|
||||||
|
self.patchProvider(pool, quotas={
|
||||||
|
'L-1216C47A': 1000,
|
||||||
|
'L-43DA4232': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a standard node request.
|
||||||
|
req1 = zk.NodeRequest()
|
||||||
|
req1.state = zk.REQUESTED
|
||||||
|
req1.node_types.append('standard')
|
||||||
|
self.zk.storeNodeRequest(req1)
|
||||||
|
self.log.debug("Waiting for request %s", req1.id)
|
||||||
|
req1 = self.waitForNodeRequest(req1)
|
||||||
|
node1 = self.assertSuccess(req1)
|
||||||
|
|
||||||
|
# Create a second standard node request; this should be
|
||||||
|
# over max-cores so it won't be fulfilled.
|
||||||
|
req2 = zk.NodeRequest()
|
||||||
|
req2.state = zk.REQUESTED
|
||||||
|
req2.node_types.append('standard')
|
||||||
|
self.zk.storeNodeRequest(req2)
|
||||||
|
self.log.debug("Waiting for request %s", req2.id)
|
||||||
|
req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
|
||||||
|
|
||||||
|
# Make sure we're paused while we attempt to fulfill the
|
||||||
|
# second request.
|
||||||
|
pool_worker = pool.getPoolWorkers('ec2-us-west-2')
|
||||||
|
for _ in iterate_timeout(30, Exception, 'paused handler'):
|
||||||
|
if pool_worker[0].paused_handler:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Release the first node so that the second can be fulfilled.
|
||||||
|
node1.state = zk.USED
|
||||||
|
self.zk.storeNode(node1)
|
||||||
|
self.waitForNodeDeletion(node1)
|
||||||
|
|
||||||
|
# Make sure the second standard node exists now.
|
||||||
|
req2 = self.waitForNodeRequest(req2)
|
||||||
|
self.assertSuccess(req2)
|
||||||
|
|
||||||
|
def test_aws_multi_tenant_limits(self):
|
||||||
|
# Test multiple instance type quotas (standard and high-mem)
|
||||||
|
# with tenant resource limits
|
||||||
|
configfile = self.setup_config('aws/aws-limits.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
pool.start()
|
||||||
|
self.patchProvider(pool, quotas={
|
||||||
|
'L-1216C47A': 1000,
|
||||||
|
'L-43DA4232': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a high node request.
|
||||||
|
req1 = zk.NodeRequest()
|
||||||
|
req1.state = zk.REQUESTED
|
||||||
|
req1.tenant_name = 'tenant-1'
|
||||||
|
req1.node_types.append('high')
|
||||||
|
self.zk.storeNodeRequest(req1)
|
||||||
|
self.log.debug("Waiting for request %s", req1.id)
|
||||||
|
req1 = self.waitForNodeRequest(req1)
|
||||||
|
self.assertSuccess(req1)
|
||||||
|
|
||||||
|
# Create a second high node request; this should be
|
||||||
|
# over quota so it won't be fulfilled.
|
||||||
|
req2 = zk.NodeRequest()
|
||||||
|
req2.state = zk.REQUESTED
|
||||||
|
req2.tenant_name = 'tenant-1'
|
||||||
|
req2.node_types.append('high')
|
||||||
|
self.zk.storeNodeRequest(req2)
|
||||||
|
req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
|
||||||
|
|
||||||
|
# Create a standard node request which should succeed even
|
||||||
|
# though we're at quota for high-mem (but not standard).
|
||||||
|
req3 = zk.NodeRequest()
|
||||||
|
req3.state = zk.REQUESTED
|
||||||
|
req3.tenant_name = 'tenant-1'
|
||||||
|
req3.node_types.append('standard')
|
||||||
|
self.zk.storeNodeRequest(req3)
|
||||||
|
self.log.debug("Waiting for request %s", req3.id)
|
||||||
|
req3 = self.waitForNodeRequest(req3)
|
||||||
|
self.assertSuccess(req3)
|
||||||
|
|
||||||
|
# Assert that the second request is still being deferred
|
||||||
|
req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
|
||||||
|
|
||||||
def test_aws_node(self):
|
def test_aws_node(self):
|
||||||
req = self.requestNode('aws/aws.yaml', 'ubuntu1404')
|
req = self.requestNode('aws/aws.yaml', 'ubuntu1404')
|
||||||
node = self.assertSuccess(req)
|
node = self.assertSuccess(req)
|
||||||
|
68
nodepool/tests/unit/test_utils.py
Normal file
68
nodepool/tests/unit/test_utils.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
# Copyright 2022 Acme Gating, LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import math
|
||||||
|
|
||||||
|
from nodepool import tests
|
||||||
|
from nodepool.driver.utils import QuotaInformation
|
||||||
|
|
||||||
|
|
||||||
|
class TestQutoInformation(tests.BaseTestCase):
|
||||||
|
def test_subtract(self):
|
||||||
|
provider = QuotaInformation(cores=8, ram=8192, default=math.inf)
|
||||||
|
needed = QuotaInformation(cores=2, instances=1)
|
||||||
|
expected = QuotaInformation(cores=6, instances=math.inf, ram=8192)
|
||||||
|
|
||||||
|
remain = copy.deepcopy(provider)
|
||||||
|
remain.subtract(needed)
|
||||||
|
|
||||||
|
self.assertEqual(expected.quota, remain.quota)
|
||||||
|
|
||||||
|
def test_add(self):
|
||||||
|
label1 = QuotaInformation(cores=8, ram=8192)
|
||||||
|
label2 = QuotaInformation(cores=2, instances=1)
|
||||||
|
|
||||||
|
needed = copy.deepcopy(label1)
|
||||||
|
needed.add(label2)
|
||||||
|
expected = QuotaInformation(cores=10, instances=1, ram=8192)
|
||||||
|
self.assertEqual(expected.quota, needed.quota)
|
||||||
|
|
||||||
|
def test_extra(self):
|
||||||
|
# Test extra quota fields
|
||||||
|
|
||||||
|
# We call them red_, blue_, green_
|
||||||
|
# cores here. They are arbitrary names other than the
|
||||||
|
# standard cores, ram, instances.
|
||||||
|
label1 = QuotaInformation(cores=8, ram=8192,
|
||||||
|
red_cores=8, green_cores=8)
|
||||||
|
label2 = QuotaInformation(cores=2, instances=1, blue_cores=2)
|
||||||
|
|
||||||
|
needed = copy.deepcopy(label1)
|
||||||
|
needed.add(label2)
|
||||||
|
expected = QuotaInformation(cores=10, instances=1, ram=8192,
|
||||||
|
red_cores=8, blue_cores=2,
|
||||||
|
green_cores=8)
|
||||||
|
self.assertEqual(expected.quota, needed.quota)
|
||||||
|
|
||||||
|
provider = QuotaInformation(cores=8, ram=8192, default=math.inf,
|
||||||
|
green_cores=16)
|
||||||
|
expected = QuotaInformation(cores=-2, instances=math.inf, ram=0,
|
||||||
|
red_cores=math.inf, blue_cores=math.inf,
|
||||||
|
green_cores=8)
|
||||||
|
|
||||||
|
remain = copy.deepcopy(provider)
|
||||||
|
remain.subtract(needed)
|
||||||
|
|
||||||
|
self.assertEqual(expected.quota, remain.quota)
|
7
releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
Normal file
7
releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
The AWS driver now supports multiple quotas for specific instance
|
||||||
|
types. This support is automatic, but also includes corresponding
|
||||||
|
enhancements to provider, pool, and tenant limits configured in
|
||||||
|
Nodepool.
|
Loading…
Reference in New Issue
Block a user