From ea35fd5152d5ddaa63287ef1efb851216a18c829 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Wed, 11 May 2022 14:41:27 -0700 Subject: [PATCH] Add provider/pool priority support This lets users configure providers which should fulfill requests before other providers. This facilitates using a less expensive cloud before using a more expensive one. The default priority is 100, to facilitate either raising above or lowering below the default (while using only positive integers in order to avoid confusion). Change-Id: I969ea821e10a7773a0a8d135a4f13407319362ee --- doc/source/aws.rst | 14 +++ doc/source/azure.rst | 14 +++ doc/source/configuration.rst | 13 +++ doc/source/gce.rst | 14 +++ doc/source/ibmvpc.rst | 14 +++ doc/source/kubernetes.rst | 14 +++ doc/source/metastatic.rst | 14 +++ doc/source/openshift-pods.rst | 14 +++ doc/source/openshift.rst | 14 +++ doc/source/openstack.rst | 14 +++ doc/source/static.rst | 14 +++ nodepool/driver/__init__.py | 7 +- nodepool/driver/ibmvpc/config.py | 7 +- nodepool/launcher.py | 54 +++++++++--- nodepool/tests/fixtures/priority.yaml | 87 +++++++++++++++++++ nodepool/tests/unit/test_launcher.py | 44 ++++++++++ nodepool/zk/components.py | 5 ++ .../notes/priority-cb2f0ad1829fbaaf.yaml | 9 ++ 18 files changed, 349 insertions(+), 17 deletions(-) create mode 100644 nodepool/tests/fixtures/priority.yaml create mode 100644 releasenotes/notes/priority-cb2f0ad1829fbaaf.yaml diff --git a/doc/source/aws.rst b/doc/source/aws.rst index 4f0fe51f3..7541b649d 100644 --- a/doc/source/aws.rst +++ b/doc/source/aws.rst @@ -353,6 +353,20 @@ Selecting the ``aws`` driver adds the following options to the A unique name within the provider for this pool of resources. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/azure.rst b/doc/source/azure.rst index 5bfcefd99..12be72877 100644 --- a/doc/source/azure.rst +++ b/doc/source/azure.rst @@ -484,6 +484,20 @@ section of the configuration. A unique name within the provider for this pool of resources. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index def0b9c9a..435eebdff 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -459,6 +459,19 @@ Options thread, this can be useful for limiting the number of threads used by the nodepool-launcher daemon. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting provides the default for each provider pool, but + the value can be overidden in the pool configuration. + .. attr:: driver :type: string :default: openstack diff --git a/doc/source/gce.rst b/doc/source/gce.rst index dd66f6a40..59a58801e 100644 --- a/doc/source/gce.rst +++ b/doc/source/gce.rst @@ -195,6 +195,20 @@ section of the configuration. A unique name within the provider for this pool of resources. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/ibmvpc.rst b/doc/source/ibmvpc.rst index 435ac3976..7a6768d9b 100644 --- a/doc/source/ibmvpc.rst +++ b/doc/source/ibmvpc.rst @@ -567,6 +567,20 @@ section of the configuration. A unique name within the provider for this pool of resources. The name may not contain underscores. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: zone Name of the IBM Cloud region zone to interact with (e.g., diff --git a/doc/source/kubernetes.rst b/doc/source/kubernetes.rst index 83ea387cb..416774a84 100644 --- a/doc/source/kubernetes.rst +++ b/doc/source/kubernetes.rst @@ -76,6 +76,20 @@ Selecting the kubernetes driver adds the following options to the Namespaces are prefixed with the pool's name. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/metastatic.rst b/doc/source/metastatic.rst index 88091e222..9ff9853c3 100644 --- a/doc/source/metastatic.rst +++ b/doc/source/metastatic.rst @@ -71,6 +71,20 @@ itself, which is "meta". A unique name within the provider for this pool of resources. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: max-servers :type: int diff --git a/doc/source/openshift-pods.rst b/doc/source/openshift-pods.rst index 63e2f937b..199976f1f 100644 --- a/doc/source/openshift-pods.rst +++ b/doc/source/openshift-pods.rst @@ -61,6 +61,20 @@ Selecting the openshift pods driver adds the following options to the The project's (namespace) name that will be used to create the pods. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/openshift.rst b/doc/source/openshift.rst index 0c777fe43..2479ff5b6 100644 --- a/doc/source/openshift.rst +++ b/doc/source/openshift.rst @@ -78,6 +78,20 @@ Selecting the openshift driver adds the following options to the Project's name are prefixed with the pool's name. + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/openstack.rst b/doc/source/openstack.rst index 55768710e..229227a6b 100644 --- a/doc/source/openstack.rst +++ b/doc/source/openstack.rst @@ -422,6 +422,20 @@ Selecting the OpenStack driver adds the following options to the Pool name + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/doc/source/static.rst b/doc/source/static.rst index 544dab02a..70a6137d4 100644 --- a/doc/source/static.rst +++ b/doc/source/static.rst @@ -60,6 +60,20 @@ Selecting the static driver adds the following options to the Pool name + .. attr:: priority + :type: int + :default: 100 + + The priority of this provider pool (a lesser number is a higher + priority). Nodepool launchers will yield requests to other + provider pools with a higher priority as long as they are not + paused. This means that in general, higher priority pools will + reach quota first before lower priority pools begin to be used. + + This setting may be specified at the provider level in order + to apply to all pools within that provider, or it can be + overridden here for a specific pool. + .. attr:: node-attributes :type: dict diff --git a/nodepool/driver/__init__.py b/nodepool/driver/__init__.py index 68b72ff45..08bef6dcc 100644 --- a/nodepool/driver/__init__.py +++ b/nodepool/driver/__init__.py @@ -860,6 +860,7 @@ class ConfigPool(ConfigValue, metaclass=abc.ABCMeta): self.labels = {} self.max_servers = math.inf self.node_attributes = None + self.priority = None @classmethod def getCommonSchemaDict(self): @@ -876,6 +877,7 @@ class ConfigPool(ConfigValue, metaclass=abc.ABCMeta): return { 'max-servers': int, 'node-attributes': dict, + 'priority': int, } @abc.abstractmethod @@ -894,6 +896,7 @@ class ConfigPool(ConfigValue, metaclass=abc.ABCMeta): ''' self.max_servers = pool_config.get('max-servers', math.inf) self.node_attributes = pool_config.get('node-attributes') + self.priority = pool_config.get('priority', None) class DriverConfig(ConfigValue): @@ -913,13 +916,15 @@ class ProviderConfig(ConfigValue, metaclass=abc.ABCMeta): self.driver = DriverConfig() self.driver.name = provider.get('driver', 'openstack') self.max_concurrency = provider.get('max-concurrency', -1) + self.priority = provider.get('priority', None) @classmethod def getCommonSchemaDict(self): return { v.Required('name'): str, 'driver': str, - 'max-concurrency': int + 'max-concurrency': int, + 'priority': int, } @property diff --git a/nodepool/driver/ibmvpc/config.py b/nodepool/driver/ibmvpc/config.py index c731ac92b..c624cccfb 100644 --- a/nodepool/driver/ibmvpc/config.py +++ b/nodepool/driver/ibmvpc/config.py @@ -297,12 +297,11 @@ class IBMVPCProviderConfig(ProviderConfig): 'ram': int, } - provider = { + provider = ProviderConfig.getCommonSchemaDict() + provider.update({ # Normally provided by superclass; we override to add the # underscore restriction v.Required('name'): v.Match(r'^[^_]+$'), - 'driver': str, - 'max-concurrency': int, # For this class v.Required('pools'): [pool], v.Required('vpc'): str, @@ -322,7 +321,7 @@ class IBMVPCProviderConfig(ProviderConfig): 'launch-retries': int, 'credentials-file': str, 'object-storage': object_storage, - } + }) return v.Schema(provider) def getSupportedLabels(self, pool_name=None): diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 4e7710e25..5e32a6b4a 100644 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -81,6 +81,17 @@ class PoolWorker(threading.Thread, stats.StatsReporter): uuid.uuid4().hex) stats.StatsReporter.__init__(self) + def getPriority(self): + pool = self.getPoolConfig() + provider = self.getProviderConfig() + if pool.priority is not None: + priority = pool.priority + elif provider.priority is not None: + priority = provider.priority + else: + priority = 100 + return priority + # --------------------------------------------------------------- # Private methods # --------------------------------------------------------------- @@ -168,21 +179,37 @@ class PoolWorker(threading.Thread, stats.StatsReporter): log = get_annotated_logger(self.log, event_id=req.event_id, node_request_id=req.id) + # Get the candidate launchers for these nodes + candidate_launchers = set( + x for x in launchers + if (set(x.supported_labels).issuperset(set(req.node_types)) and + x.id not in req.declined_by) + ) # Skip this request if it is requesting another provider # which is online if req.provider and req.provider != self.provider_name: # The request is asking for a specific provider - candidate_launchers = set( - x.id for x in launchers + launcher_ids_for_provider = set( + x.id for x in candidate_launchers if x.provider_name == req.provider - and set(x.supported_labels).issuperset(req.node_types)) - if candidate_launchers: - # There is a launcher online which can satisfy the request - if not candidate_launchers.issubset(set(req.declined_by)): - # It has not yet declined the request, so yield to it. - log.debug("Yielding request to provider %s %s", - req.provider, candidate_launchers) - continue + ) + if launcher_ids_for_provider: + # There is a launcher online which can satisfy the + # request that has not yet declined the request, + # so yield to it. + log.debug("Yielding request to provider %s %s", + req.provider, launcher_ids_for_provider) + continue + + priority = self.getPriority() + launcher_ids_with_higher_priority = set( + x.id for x in candidate_launchers + if x.priority < priority and not x.paused + ) + if launcher_ids_with_higher_priority: + log.debug("Yielding request to higher priority providers %s", + launcher_ids_with_higher_priority) + continue if has_quota_support and not all(label_quota.get(l, math.inf) > 0 for l in req.node_types): @@ -363,6 +390,7 @@ class PoolWorker(threading.Thread, stats.StatsReporter): 'provider_name': self.provider_name, 'supported_labels': list(labels), 'state': self.component_info.RUNNING, + 'priority': self.getPriority(), }) self.component_info.register() @@ -380,8 +408,8 @@ class PoolWorker(threading.Thread, stats.StatsReporter): labels = set() for prov_cfg in self.nodepool.config.providers.values(): labels.update(prov_cfg.getSupportedLabels()) - if set(self.component_info.supported_labels) != labels: - self.component_info.supported_labels = list(labels) + self.component_info.supported_labels = list(labels) + self.component_info.priority = self.getPriority() self.updateProviderLimits( self.nodepool.config.providers.get(self.provider_name)) @@ -389,6 +417,7 @@ class PoolWorker(threading.Thread, stats.StatsReporter): self.nodepool.config.tenant_resource_limits) if not self.paused_handler: + self.component_info.paused = False while not self._assignHandlers(): # _assignHandlers can take quite some time on a busy # system so sprinkle _removeCompletedHandlers in @@ -396,6 +425,7 @@ class PoolWorker(threading.Thread, stats.StatsReporter): # requests that already have all nodes. self._removeCompletedHandlers() else: + self.component_info.paused = True # If we are paused, one request handler could not # satisfy its assigned request, so give it # another shot. Unpause ourselves if it completed. diff --git a/nodepool/tests/fixtures/priority.yaml b/nodepool/tests/fixtures/priority.yaml new file mode 100644 index 000000000..3500d7aad --- /dev/null +++ b/nodepool/tests/fixtures/priority.yaml @@ -0,0 +1,87 @@ +elements-dir: . +images-dir: '{images_dir}' +build-log-dir: '{build_log_dir}' +build-log-retention: 1 + +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +zookeeper-tls: + ca: {zookeeper_ca} + cert: {zookeeper_cert} + key: {zookeeper_key} + +zookeeper-timeout: 20.0 + +labels: + - name: fake-label + min-ready: 0 + +providers: + - name: low-provider + priority: 2 + cloud: fake + driver: fake + region-name: fake-region + rate: 0.0001 + diskimages: + - name: fake-image + meta: + key: value + key2: value + pools: + - name: main + max-servers: 2 + node-attributes: + key1: value1 + key2: value2 + availability-zones: + - az1 + networks: + - net-name + labels: + - name: fake-label + diskimage: fake-image + min-ram: 8192 + flavor-name: 'Fake' + - name: high-provider + priority: 1 + cloud: fake + driver: fake + region-name: fake-region + rate: 0.0001 + diskimages: + - name: fake-image + meta: + key: value + key2: value + pools: + - name: main + max-servers: 1 + node-attributes: + key1: value1 + key2: value2 + availability-zones: + - az1 + networks: + - net-name + labels: + - name: fake-label + diskimage: fake-image + min-ram: 8192 + flavor-name: 'Fake' + +diskimages: + - name: fake-image + elements: + - fedora + - vm + release: 21 + dib-cmd: nodepool/tests/fake-image-create + env-vars: + TMPDIR: /opt/dib_tmp + DIB_IMAGE_CACHE: /opt/dib_cache + DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/ + BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2 diff --git a/nodepool/tests/unit/test_launcher.py b/nodepool/tests/unit/test_launcher.py index 8c37d8536..bdc03b6bd 100644 --- a/nodepool/tests/unit/test_launcher.py +++ b/nodepool/tests/unit/test_launcher.py @@ -2455,3 +2455,47 @@ class TestLauncher(tests.DBTestCase): # Ready for the real delete now zk.ZooKeeper.deleteRawNode = real_method self.waitForNodeDeletion(node) + + def test_provider_priority(self): + """Test provider priorities""" + configfile = self.setup_config('priority.yaml') + self.useBuilder(configfile) + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.waitForImage('low-provider', 'fake-image') + self.waitForImage('high-provider', 'fake-image') + + # The first request should be handled by the highest priority + # provider (high-provider; priority 1) + req1 = zk.NodeRequest() + req1.state = zk.REQUESTED + req1.node_types.append('fake-label') + self.zk.storeNodeRequest(req1) + req1 = self.waitForNodeRequest(req1) + self.assertEqual(req1.state, zk.FULFILLED) + self.assertEqual(len(req1.nodes), 1) + node1 = self.zk.getNode(req1.nodes[0]) + self.assertEqual(node1.provider, 'high-provider') + + # The second request should also be handled by the highest + # priority provider, but since it has max-servers=1, this + # request should be paused, which will cause the provider to + # be paused. + req2 = zk.NodeRequest() + req2.state = zk.REQUESTED + req2.node_types.append('fake-label') + self.zk.storeNodeRequest(req2) + req2 = self.waitForNodeRequest(req2, (zk.PENDING,)) + self.assertEqual(req2.state, zk.PENDING) + + # The third request should be handled by the low priority + # provider now that the high provider is paused. + req3 = zk.NodeRequest() + req3.state = zk.REQUESTED + req3.node_types.append('fake-label') + self.zk.storeNodeRequest(req3) + req3 = self.waitForNodeRequest(req3) + self.assertEqual(req3.state, zk.FULFILLED) + self.assertEqual(len(req3.nodes), 1) + node3 = self.zk.getNode(req3.nodes[0]) + self.assertEqual(node3.provider, 'low-provider') diff --git a/nodepool/zk/components.py b/nodepool/zk/components.py index 07c228e79..d2917ef59 100644 --- a/nodepool/zk/components.py +++ b/nodepool/zk/components.py @@ -104,6 +104,9 @@ class BaseComponent(ZooKeeperBase): if name not in self.content.keys(): return super().__setattr__(name, value) + if self.content[name] == value: + return + # Set the value in the local content dict self.content[name] = value @@ -214,6 +217,8 @@ class PoolComponent(BaseComponent): "id": None, "provider_name": None, "supported_labels": [], + "priority": 100, + "paused": False, } self.content.update(self.initial_state) diff --git a/releasenotes/notes/priority-cb2f0ad1829fbaaf.yaml b/releasenotes/notes/priority-cb2f0ad1829fbaaf.yaml new file mode 100644 index 000000000..0524f3995 --- /dev/null +++ b/releasenotes/notes/priority-cb2f0ad1829fbaaf.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Priority may now be set at the provider or provider-pool level. + Providers or pools with the highest priority will fulfill requests + first, until they are at quota, at which point lower-priority + providers will begin to be used. + + See :attr:`providers.priority` for details.