Merge "AWS multi quota support"

2022-07-29 17:01:09 +00:00 · 2022-07-29 17:01:09 +00:00 · 123a32f922
commit 123a32f922
parent 4c947487d1 207d8ac63c
13 changed files with 524 additions and 34 deletions
--- a/doc/source/aws.rst
+++ b/doc/source/aws.rst
@ -109,6 +109,42 @@ Selecting the ``aws`` driver adds the following options to the
      until that instance is reported as "active".  If the timeout is
      exceeded, the node launch is aborted and the instance deleted.

+   .. attr:: max-cores
+      :type: int
+      :default: unlimited
+
+      Maximum number of cores usable from this provider's pools by default.
+
+   .. attr:: max-servers
+      :type: int
+      :default: unlimited
+
+      Maximum number of servers spawnable from this provider's pools by default.
+
+   .. attr:: max-ram
+      :type: int
+      :default: unlimited
+
+      Maximum RAM usable from this provider's pools by default.
+
+   .. attr:: max-resources
+      :type: dict
+      :default: unlimited
+
+      A dictionary of other quota resource limits.  AWS has quotas
+      for certain instance types.  These may be specified here to
+      limit Nodepool's usage.
+
+      The following example limits the number of high-memory
+      instance cores:
+
+      .. code-block:: yaml
+
+         max-resources:
+           'L-43DA4232': 224
+
+      See `instance quotas`_ for more information.
+
   .. attr:: launch-retries
      :default: 3

@ -379,6 +415,42 @@ Selecting the ``aws`` driver adds the following options to the
         A dictionary of key-value pairs that will be stored with the node data
         in ZooKeeper. The keys and values can be any arbitrary string.

+      .. attr:: max-cores
+         :type: int
+
+         Maximum number of cores usable from this pool.  Defaults to
+         :attr:`providers.[aws].max-cores`.
+
+      .. attr:: max-servers
+         :type: int
+
+         Maximum number of servers spawnable from this pool.  Defaults to
+         :attr:`providers.[aws].max-servers`.
+
+      .. attr:: max-ram
+         :type: int
+
+         Maximum RAM usable from this pool.  Defaults to
+         :attr:`providers.[aws].max-ram`.
+
+      .. attr:: max-resources
+         :type: dict
+
+         A dictionary of other quota resource limits.  AWS has quotas
+         for certain instance types.  These may be specified here to
+         limit Nodepool's usage.  Defaults to
+         :attr:`providers.[aws].max-resources`.
+
+         The following example limits the number of high-memory
+         instance cores:
+
+         .. code-block:: yaml
+
+            max-resources:
+              'L-43DA4232': 224
+
+         See `instance quotas`_ for more information.
+
      .. attr:: subnet-id

         If provided, specifies the subnet to assign to the primary network
@ -538,3 +610,4 @@ Selecting the ``aws`` driver adds the following options to the
 .. _`Boto configuration`: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html
 .. _`Boto describe images`: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.Client.describe_images
 .. _`VM Import/Export service role`: https://docs.aws.amazon.com/vm-import/latest/userguide/vmie_prereqs.html#vmimport-role
+.. _`instance quotas`: https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
--- a/doc/source/configuration.rst
+++ b/doc/source/configuration.rst
@ -548,8 +548,13 @@ Options
          max-servers: 10
          max-cores: 200
          max-ram: 16565
+          'L-43DA4232': 224

-   Each entry is a dictionary with the following keys.
+   Each entry is a dictionary with the following keys.  Any other keys
+   are interpreted as driver-specific resource limits (otherwise
+   specified as ``max-resources`` in the provider configuration).  The
+   only driver that currently supports additional resource limits is
+   AWS.

   .. attr:: tenant-name
      :type: str
--- a/nodepool/cmd/config_validator.py
+++ b/nodepool/cmd/config_validator.py
@ -67,6 +67,7 @@ class ConfigValidator:
            'max-cores': int,
            'max-ram': int,
            'max-servers': int,
+            str: int,
        }

        top_level = {
--- a/nodepool/config.py
+++ b/nodepool/config.py
@ -255,19 +255,14 @@ class Config(ConfigValue):
        if not tenant_resource_limits_cfg:
            return
        for resource_limit in tenant_resource_limits_cfg:
-            tenant_name = resource_limit['tenant-name']
-            max_cores = resource_limit.get('max-cores')
-            max_ram = resource_limit.get('max-ram')
-            max_servers = resource_limit.get('max-servers')
-
+            resource_limit = resource_limit.copy()
+            tenant_name = resource_limit.pop('tenant-name')
            limits = {}
-            if max_cores:
-                limits['cores'] = max_cores
-            if max_servers:
-                limits['instances'] = max_servers
-            if max_ram:
-                limits['ram'] = max_ram
-
+            limits['cores'] = resource_limit.pop('max-cores', math.inf)
+            limits['instances'] = resource_limit.pop('max-servers', math.inf)
+            limits['ram'] = resource_limit.pop('max-ram', math.inf)
+            for k, v in resource_limit.items():
+                limits[k] = v
            self.tenant_resource_limits[tenant_name] = limits


--- a/nodepool/driver/aws/adapter.py
+++ b/nodepool/driver/aws/adapter.py
@ -42,6 +42,41 @@ def tag_list_to_dict(taglist):
    return {t["Key"]: t["Value"] for t in taglist}


+# This is a map of instance types to quota codes.  There does not
+# appear to be an automated way to determine what quota code to use
+# for an instance type, therefore this list was manually created by
+# visiting
+# https://us-west-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas
+# and filtering by "Instances".  An example description is "Running
+# On-Demand P instances" which we can infer means we should use that
+# quota code for instance types starting with the letter "p".  All
+# instance type names follow the format "([a-z\-]+)\d", so we can
+# match the first letters (up to the first number) of the instance
+# type name with the letters in the quota name.  The prefix "u-" for
+# "Running On-Demand High Memory instances" was determined from
+# https://aws.amazon.com/ec2/instance-types/high-memory/
+
+QUOTA_CODES = {
+    'a': 'L-1216C47A',
+    'c': 'L-1216C47A',
+    'd': 'L-1216C47A',
+    'h': 'L-1216C47A',
+    'i': 'L-1216C47A',
+    'm': 'L-1216C47A',
+    'r': 'L-1216C47A',
+    't': 'L-1216C47A',
+    'z': 'L-1216C47A',
+    'dl': 'L-6E869C2A',
+    'f': 'L-74FC7D96',
+    'g': 'L-DB2E81BA',
+    'vt': 'L-DB2E81BA',
+    'u-': 'L-43DA4232',  # 'high memory'
+    'inf': 'L-1945791B',
+    'p': 'L-417A185B',
+    'x': 'L-7295265B',
+}
+
+
 class AwsInstance(statemachine.Instance):
    def __init__(self, instance, quota):
        super().__init__()
@ -293,15 +328,28 @@ class AwsAdapter(statemachine.Adapter):
            yield AwsInstance(instance, quota)

    def getQuotaLimits(self):
-        with self.non_mutating_rate_limiter:
-            self.log.debug("Getting quota limits")
-            response = self.aws_quotas.get_service_quota(
-                ServiceCode='ec2',
-                QuotaCode='L-1216C47A'
-            )
-            cores = response['Quota']['Value']
-        return QuotaInformation(cores=cores,
-                                default=math.inf)
+        # Get the instance types that this provider handles
+        instance_types = set()
+        for pool in self.provider.pools.values():
+            for label in pool.labels.values():
+                instance_types.add(label.instance_type)
+        args = dict(default=math.inf)
+        for instance_type in instance_types:
+            code = self._getQuotaCodeForInstanceType(instance_type)
+            if code in args:
+                continue
+            if not code:
+                self.log.warning("Unknown quota code for instance type: %s",
+                                 instance_type)
+                continue
+            with self.non_mutating_rate_limiter:
+                self.log.debug("Getting quota limits for %s", code)
+                response = self.aws_quotas.get_service_quota(
+                    ServiceCode='ec2',
+                    QuotaCode=code,
+                )
+                args[code] = response['Quota']['Value']
+        return QuotaInformation(**args)

    def getQuotaForLabel(self, label):
        return self._getQuotaForInstanceType(label.instance_type)
@ -454,13 +502,27 @@ class AwsAdapter(statemachine.Adapter):
                    # Return the first and only task
                    return task

+    instance_key_re = re.compile(r'([a-z\-]+)\d.*')
+
+    def _getQuotaCodeForInstanceType(self, instance_type):
+        m = self.instance_key_re.match(instance_type)
+        if m:
+            key = m.group(1)
+            return QUOTA_CODES.get(key)
+
    def _getQuotaForInstanceType(self, instance_type):
        itype = self._getInstanceType(instance_type)
        cores = itype['InstanceTypes'][0]['VCpuInfo']['DefaultCores']
        ram = itype['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']
-        return QuotaInformation(cores=cores,
-                                ram=ram,
-                                instances=1)
+        code = self._getQuotaCodeForInstanceType(instance_type)
+        # We include cores twice: one to match the overall cores quota
+        # (which may be set as a tenant resource limit), and a second
+        # time as the specific AWS quota code which in for a specific
+        # instance type.
+        args = dict(cores=cores, ram=ram, instances=1)
+        if code:
+            args[code] = cores
+        return QuotaInformation(**args)

    @cachetools.func.lru_cache(maxsize=None)
    def _getInstanceType(self, instance_type):
--- a/nodepool/driver/aws/config.py
+++ b/nodepool/driver/aws/config.py
@ -15,6 +15,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from collections import defaultdict
+import math
+
 import voluptuous as v

 from nodepool.driver import ConfigPool
@ -203,6 +206,13 @@ class AwsPool(ConfigPool):
            'use-internal-ip', self.provider.use_internal_ip)
        self.host_key_checking = pool_config.get(
            'host-key-checking', self.provider.host_key_checking)
+        self.max_servers = pool_config.get(
+            'max-servers', self.provider.max_servers)
+        self.max_cores = pool_config.get('max-cores', self.provider.max_cores)
+        self.max_ram = pool_config.get('max-ram', self.provider.max_ram)
+        self.max_resources = self.provider.max_resources.copy()
+        for k, val in pool_config.get('max-resources', {}).items():
+            self.max_resources[k] = val

    @staticmethod
    def getSchema():
@ -218,6 +228,9 @@ class AwsPool(ConfigPool):
            'public-ipv4': bool,
            'public-ipv6': bool,
            'host-key-checking': bool,
+            'max-cores': int,
+            'max-ram': int,
+            'max-resources': {str: int},
        })
        return pool

@ -263,6 +276,12 @@ class AwsProviderConfig(ProviderConfig):
        self.image_type = self.provider.get('image-format', 'raw')
        self.image_name_format = '{image_name}-{timestamp}'
        self.post_upload_hook = self.provider.get('post-upload-hook')
+        self.max_servers = self.provider.get('max-servers', math.inf)
+        self.max_cores = self.provider.get('max-cores', math.inf)
+        self.max_ram = self.provider.get('max-ram', math.inf)
+        self.max_resources = defaultdict(lambda: math.inf)
+        for k, val in self.provider.get('max-resources', {}).items():
+            self.max_resources[k] = val

        self.cloud_images = {}
        for image in self.provider.get('cloud-images', []):
@ -305,6 +324,10 @@ class AwsProviderConfig(ProviderConfig):
            'launch-retries': int,
            'object-storage': object_storage,
            'image-format': v.Any('ova', 'vhd', 'vhdx', 'vmdk', 'raw'),
+            'max-servers': int,
+            'max-cores': int,
+            'max-ram': int,
+            'max-resources': {str: int},
        })
        return v.Schema(provider)

--- a/nodepool/driver/statemachine.py
+++ b/nodepool/driver/statemachine.py
@ -123,6 +123,10 @@ class StateMachineNodeLauncher(stats.StatsReporter):
        self.node.shell_type = image.shell_type
        self.node.connection_port = image.connection_port
        self.node.connection_type = image.connection_type
+        qi = self.manager.quotaNeededByLabel(label.name, self.handler.pool)
+        if qi:
+            self.node.resources = qi.get_resources()
+
        self.zk.storeNode(self.node)

        # Windows computer names can be no more than 15 chars long.
@ -386,11 +390,14 @@ class StateMachineHandler(NodeRequestHandler):

        # Now calculate pool specific quota. Values indicating no quota default
        # to math.inf representing infinity that can be calculated with.
-        pool_quota = QuotaInformation(
+        args = dict(
            cores=getattr(self.pool, 'max_cores', None),
            instances=self.pool.max_servers,
            ram=getattr(self.pool, 'max_ram', None),
-            default=math.inf)
+            default=math.inf,
+        )
+        args.update(getattr(self.pool, 'max_resources', {}))
+        pool_quota = QuotaInformation(**args)
        pool_quota.subtract(needed_quota)
        return pool_quota.non_negative()

@ -403,6 +410,7 @@ class StateMachineHandler(NodeRequestHandler):
        :return: True if there is enough quota, False otherwise
        '''
        needed_quota = self.manager.quotaNeededByLabel(ntype, self.pool)
+        self.log.debug("Needed quota: %s", needed_quota)

        # Calculate remaining quota which is calculated as:
        # quota = <total nodepool quota> - <used quota> - <quota for node>
@ -418,11 +426,14 @@ class StateMachineHandler(NodeRequestHandler):

        # Now calculate pool specific quota. Values indicating no quota default
        # to math.inf representing infinity that can be calculated with.
-        pool_quota = QuotaInformation(
+        args = dict(
            cores=getattr(self.pool, 'max_cores', None),
            instances=self.pool.max_servers,
            ram=getattr(self.pool, 'max_ram', None),
-            default=math.inf)
+            default=math.inf,
+        )
+        args.update(getattr(self.pool, 'max_resources', {}))
+        pool_quota = QuotaInformation(**args)
        pool_quota.subtract(
            self.manager.estimatedNodepoolQuotaUsed(self.pool))
        self.log.debug("Current pool quota: %s" % pool_quota)
--- a/nodepool/driver/utils.py
+++ b/nodepool/driver/utils.py
@ -1,4 +1,5 @@
 # Copyright (C) 2018 Red Hat
+# Copyright 2022 Acme Gating, LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@ -179,7 +180,7 @@ class NodeDeleter(threading.Thread):

 class QuotaInformation:

-    def __init__(self, cores=None, instances=None, ram=None, default=0):
+    def __init__(self, cores=None, instances=None, ram=None, default=0, **kw):
        '''
        Initializes the quota information with some values. None values will
        be initialized with default which will be typically 0 or math.inf
@ -202,6 +203,9 @@ class QuotaInformation:
                'ram': self._get_default(ram, default),
            }
        }
+        for k, v in kw.items():
+            self.quota['compute'][k] = v
+        self.default = default

    @staticmethod
    def construct_from_flavor(flavor):
@ -225,9 +229,14 @@ class QuotaInformation:
        return value if value is not None else default

    def _add_subtract(self, other, add=True):
+        for category in other.quota.keys():
+            self.quota.setdefault(category, {})
+            for resource in other.quota[category].keys():
+                self.quota[category].setdefault(resource, self.default)
        for category in self.quota.keys():
            for resource in self.quota[category].keys():
-                second_value = other.quota.get(category, {}).get(resource, 0)
+                second_value = other.quota.get(category, {}).get(
+                    resource, other.default)
                if add:
                    self.quota[category][resource] += second_value
                else:
--- a/nodepool/tests/fixtures/aws/aws-limits.yaml
+++ b/nodepool/tests/fixtures/aws/aws-limits.yaml
@ -0,0 +1,46 @@
+zookeeper-servers:
+  - host: {zookeeper_host}
+    port: {zookeeper_port}
+    chroot: {zookeeper_chroot}
+
+zookeeper-tls:
+  ca: {zookeeper_ca}
+  cert: {zookeeper_cert}
+  key: {zookeeper_key}
+
+tenant-resource-limits:
+  - tenant-name: tenant-1
+    max-cores: 1024
+    'L-43DA4232': 224  # high mem cores
+
+labels:
+  - name: standard
+  - name: high
+
+providers:
+  - name: ec2-us-west-2
+    driver: aws
+    region-name: us-west-2
+    cloud-images:
+      - name: ubuntu1404
+        image-id: ami-1e749f67
+        username: ubuntu
+    pools:
+      - name: main
+        max-servers: 10
+        subnet-id: {subnet_id}
+        security-group-id: {security_group_id}
+        node-attributes:
+          key1: value1
+          key2: value2
+        max-resources:
+          'L-1216C47A': 1  # standard cores
+        labels:
+          - name: standard
+            cloud-image: ubuntu1404
+            instance-type: t3.medium
+            key-name: zuul
+          - name: high
+            cloud-image: ubuntu1404
+            instance-type: u-6tb1.112xlarge
+            key-name: zuul
--- a/nodepool/tests/fixtures/aws/aws-quota.yaml
+++ b/nodepool/tests/fixtures/aws/aws-quota.yaml
@ -0,0 +1,43 @@
+zookeeper-servers:
+  - host: {zookeeper_host}
+    port: {zookeeper_port}
+    chroot: {zookeeper_chroot}
+
+zookeeper-tls:
+  ca: {zookeeper_ca}
+  cert: {zookeeper_cert}
+  key: {zookeeper_key}
+
+tenant-resource-limits:
+  - tenant-name: tenant-1
+    max-cores: 1024
+
+labels:
+  - name: standard
+  - name: high
+
+providers:
+  - name: ec2-us-west-2
+    driver: aws
+    region-name: us-west-2
+    cloud-images:
+      - name: ubuntu1404
+        image-id: ami-1e749f67
+        username: ubuntu
+    pools:
+      - name: main
+        max-servers: 10
+        subnet-id: {subnet_id}
+        security-group-id: {security_group_id}
+        node-attributes:
+          key1: value1
+          key2: value2
+        labels:
+          - name: standard
+            cloud-image: ubuntu1404
+            instance-type: t3.medium
+            key-name: zuul
+          - name: high
+            cloud-image: ubuntu1404
+            instance-type: u-6tb1.112xlarge
+            key-name: zuul
--- a/nodepool/tests/unit/test_driver_aws.py
+++ b/nodepool/tests/unit/test_driver_aws.py
@ -114,7 +114,8 @@ class TestDriverAws(tests.DBTestCase):
        kw['security_group_id'] = self.security_group_id
        return super().setup_config(*args, **kw)

-    def patchProvider(self, nodepool, provider_name='ec2-us-west-2'):
+    def patchProvider(self, nodepool, provider_name='ec2-us-west-2',
+                      quotas=None):
        for _ in iterate_timeout(
                30, Exception, 'wait for provider'):
            try:
@ -138,10 +139,13 @@ class TestDriverAws(tests.DBTestCase):
            _fake_create_instances

        # moto does not mock service-quotas, so we do it ourselves:
-        def _fake_get_service_quota(*args, **kwargs):
+        def _fake_get_service_quota(ServiceCode, QuotaCode, *args, **kwargs):
            # This is a simple fake that only returns the number
            # of cores.
-            return {'Quota': {'Value': 100}}
+            if quotas is None:
+                return {'Quota': {'Value': 100}}
+            else:
+                return {'Quota': {'Value': quotas.get(QuotaCode)}}
        provider_manager.adapter.aws_quotas.get_service_quota =\
            _fake_get_service_quota

@ -204,6 +208,149 @@ class TestDriverAws(tests.DBTestCase):
        for node in nodes:
            self.waitForNodeDeletion(node)

+    def test_aws_multi_quota(self):
+        # Test multiple instance type quotas (standard and high-mem)
+        configfile = self.setup_config('aws/aws-quota.yaml')
+        pool = self.useNodepool(configfile, watermark_sleep=1)
+        pool.start()
+        self.patchProvider(pool, quotas={
+            'L-1216C47A': 1,
+            'L-43DA4232': 224,
+        })
+
+        # Create a high-memory node request.
+        req1 = zk.NodeRequest()
+        req1.state = zk.REQUESTED
+        req1.node_types.append('high')
+        self.zk.storeNodeRequest(req1)
+        self.log.debug("Waiting for request %s", req1.id)
+        req1 = self.waitForNodeRequest(req1)
+        node1 = self.assertSuccess(req1)
+
+        # Create a second high-memory node request; this should be
+        # over quota so it won't be fulfilled.
+        req2 = zk.NodeRequest()
+        req2.state = zk.REQUESTED
+        req2.node_types.append('high')
+        self.zk.storeNodeRequest(req2)
+        self.log.debug("Waiting for request %s", req2.id)
+        req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
+
+        # Make sure we're paused while we attempt to fulfill the
+        # second request.
+        pool_worker = pool.getPoolWorkers('ec2-us-west-2')
+        for _ in iterate_timeout(30, Exception, 'paused handler'):
+            if pool_worker[0].paused_handler:
+                break
+
+        # Release the first node so that the second can be fulfilled.
+        node1.state = zk.USED
+        self.zk.storeNode(node1)
+        self.waitForNodeDeletion(node1)
+
+        # Make sure the second high node exists now.
+        req2 = self.waitForNodeRequest(req2)
+        self.assertSuccess(req2)
+
+        # Create a standard node request which should succeed even
+        # though we're at quota for high-mem (but not standard).
+        req3 = zk.NodeRequest()
+        req3.state = zk.REQUESTED
+        req3.node_types.append('standard')
+        self.zk.storeNodeRequest(req3)
+        self.log.debug("Waiting for request %s", req3.id)
+        req3 = self.waitForNodeRequest(req3)
+        self.assertSuccess(req3)
+
+    def test_aws_multi_pool_limits(self):
+        # Test multiple instance type quotas (standard and high-mem)
+        # with pool resource limits
+        configfile = self.setup_config('aws/aws-limits.yaml')
+        pool = self.useNodepool(configfile, watermark_sleep=1)
+        pool.start()
+        self.patchProvider(pool, quotas={
+            'L-1216C47A': 1000,
+            'L-43DA4232': 1000,
+        })
+
+        # Create a standard node request.
+        req1 = zk.NodeRequest()
+        req1.state = zk.REQUESTED
+        req1.node_types.append('standard')
+        self.zk.storeNodeRequest(req1)
+        self.log.debug("Waiting for request %s", req1.id)
+        req1 = self.waitForNodeRequest(req1)
+        node1 = self.assertSuccess(req1)
+
+        # Create a second standard node request; this should be
+        # over max-cores so it won't be fulfilled.
+        req2 = zk.NodeRequest()
+        req2.state = zk.REQUESTED
+        req2.node_types.append('standard')
+        self.zk.storeNodeRequest(req2)
+        self.log.debug("Waiting for request %s", req2.id)
+        req2 = self.waitForNodeRequest(req2, (zk.PENDING,))
+
+        # Make sure we're paused while we attempt to fulfill the
+        # second request.
+        pool_worker = pool.getPoolWorkers('ec2-us-west-2')
+        for _ in iterate_timeout(30, Exception, 'paused handler'):
+            if pool_worker[0].paused_handler:
+                break
+
+        # Release the first node so that the second can be fulfilled.
+        node1.state = zk.USED
+        self.zk.storeNode(node1)
+        self.waitForNodeDeletion(node1)
+
+        # Make sure the second standard node exists now.
+        req2 = self.waitForNodeRequest(req2)
+        self.assertSuccess(req2)
+
+    def test_aws_multi_tenant_limits(self):
+        # Test multiple instance type quotas (standard and high-mem)
+        # with tenant resource limits
+        configfile = self.setup_config('aws/aws-limits.yaml')
+        pool = self.useNodepool(configfile, watermark_sleep=1)
+        pool.start()
+        self.patchProvider(pool, quotas={
+            'L-1216C47A': 1000,
+            'L-43DA4232': 1000,
+        })
+
+        # Create a high node request.
+        req1 = zk.NodeRequest()
+        req1.state = zk.REQUESTED
+        req1.tenant_name = 'tenant-1'
+        req1.node_types.append('high')
+        self.zk.storeNodeRequest(req1)
+        self.log.debug("Waiting for request %s", req1.id)
+        req1 = self.waitForNodeRequest(req1)
+        self.assertSuccess(req1)
+
+        # Create a second high node request; this should be
+        # over quota so it won't be fulfilled.
+        req2 = zk.NodeRequest()
+        req2.state = zk.REQUESTED
+        req2.tenant_name = 'tenant-1'
+        req2.node_types.append('high')
+        self.zk.storeNodeRequest(req2)
+        req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
+
+        # Create a standard node request which should succeed even
+        # though we're at quota for high-mem (but not standard).
+        req3 = zk.NodeRequest()
+        req3.state = zk.REQUESTED
+        req3.tenant_name = 'tenant-1'
+        req3.node_types.append('standard')
+        self.zk.storeNodeRequest(req3)
+        self.log.debug("Waiting for request %s", req3.id)
+        req3 = self.waitForNodeRequest(req3)
+        self.assertSuccess(req3)
+
+        # Assert that the second request is still being deferred
+        req2 = self.waitForNodeRequest(req2, (zk.REQUESTED,))
+
    def test_aws_node(self):
        req = self.requestNode('aws/aws.yaml', 'ubuntu1404')
        node = self.assertSuccess(req)
--- a/nodepool/tests/unit/test_utils.py
+++ b/nodepool/tests/unit/test_utils.py
@ -0,0 +1,68 @@
+# Copyright 2022 Acme Gating, LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import copy
+import math
+
+from nodepool import tests
+from nodepool.driver.utils import QuotaInformation
+
+
+class TestQutoInformation(tests.BaseTestCase):
+    def test_subtract(self):
+        provider = QuotaInformation(cores=8, ram=8192, default=math.inf)
+        needed = QuotaInformation(cores=2, instances=1)
+        expected = QuotaInformation(cores=6, instances=math.inf, ram=8192)
+
+        remain = copy.deepcopy(provider)
+        remain.subtract(needed)
+
+        self.assertEqual(expected.quota, remain.quota)
+
+    def test_add(self):
+        label1 = QuotaInformation(cores=8, ram=8192)
+        label2 = QuotaInformation(cores=2, instances=1)
+
+        needed = copy.deepcopy(label1)
+        needed.add(label2)
+        expected = QuotaInformation(cores=10, instances=1, ram=8192)
+        self.assertEqual(expected.quota, needed.quota)
+
+    def test_extra(self):
+        # Test extra quota fields
+
+        # We call them red_, blue_, green_
+        # cores here.  They are arbitrary names other than the
+        # standard cores, ram, instances.
+        label1 = QuotaInformation(cores=8, ram=8192,
+                                  red_cores=8, green_cores=8)
+        label2 = QuotaInformation(cores=2, instances=1, blue_cores=2)
+
+        needed = copy.deepcopy(label1)
+        needed.add(label2)
+        expected = QuotaInformation(cores=10, instances=1, ram=8192,
+                                    red_cores=8, blue_cores=2,
+                                    green_cores=8)
+        self.assertEqual(expected.quota, needed.quota)
+
+        provider = QuotaInformation(cores=8, ram=8192, default=math.inf,
+                                    green_cores=16)
+        expected = QuotaInformation(cores=-2, instances=math.inf, ram=0,
+                                    red_cores=math.inf, blue_cores=math.inf,
+                                    green_cores=8)
+
+        remain = copy.deepcopy(provider)
+        remain.subtract(needed)
+
+        self.assertEqual(expected.quota, remain.quota)
--- a/releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
+++ b/releasenotes/notes/aws-multi-quota-fbddefb56d0694a4.yaml
@ -0,0 +1,7 @@
+---
+features:
+  - |
+    The AWS driver now supports multiple quotas for specific instance
+    types.  This support is automatic, but also includes corresponding
+    enhancements to provider, pool, and tenant limits configured in
+    Nodepool.