From c739eec8535ccff4dc133a40661dfaed393042ba Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Fri, 6 Oct 2017 08:08:04 -0400 Subject: [PATCH] Do not satisfy min-ready requests if at capacity If a provider gets a min-ready request, but it is already at capacity, it should decline the request so as to not wedge the provider (because a min-ready does not reuse READY nodes, provider would have to wait until a node was freed to satisfy the min-ready request). Change-Id: I37dc0802c1b30714833d6f46cc19b86deb58852b --- nodepool/driver/openstack/handler.py | 9 ++++ nodepool/launcher.py | 2 +- .../fixtures/node_min_ready_capacity.yaml | 47 +++++++++++++++++++ nodepool/tests/test_launcher.py | 29 ++++++++++++ nodepool/zk.py | 13 +++++ 5 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 nodepool/tests/fixtures/node_min_ready_capacity.yaml diff --git a/nodepool/driver/openstack/handler.py b/nodepool/driver/openstack/handler.py index 34499a23f..9ada75177 100644 --- a/nodepool/driver/openstack/handler.py +++ b/nodepool/driver/openstack/handler.py @@ -486,6 +486,15 @@ class OpenStackNodeRequestHandler(NodeRequestHandler): if len(self.request.node_types) > self.pool.max_servers: declined_reasons.append('it would exceed quota') + # For min-ready requests, which do not re-use READY nodes, let's + # decline if this provider is already at capacity. Otherwise, we + # could end up wedged until another request frees up a node. + if self.request.requestor == "NodePool:min-ready": + current_count = self.zk.countPoolNodes(self.provider.name, + self.pool.name) + if current_count == self.pool.max_servers: + declined_reasons.append("provider cannot satisify min-ready") + if declined_reasons: self.log.debug("Declining node request %s because %s", self.request.id, ', '.join(declined_reasons)) diff --git a/nodepool/launcher.py b/nodepool/launcher.py index 8010a974e..bd1fce8e0 100755 --- a/nodepool/launcher.py +++ b/nodepool/launcher.py @@ -786,7 +786,7 @@ class NodePool(threading.Thread): req.requestor = "NodePool:min-ready" req.node_types.append(label_name) req.reuse = False # force new node launches - self.zk.storeNodeRequest(req) + self.zk.storeNodeRequest(req, priority="100") if label_name not in self._submittedRequests: self._submittedRequests[label_name] = [] self._submittedRequests[label_name].append(req) diff --git a/nodepool/tests/fixtures/node_min_ready_capacity.yaml b/nodepool/tests/fixtures/node_min_ready_capacity.yaml new file mode 100644 index 000000000..0c31e5590 --- /dev/null +++ b/nodepool/tests/fixtures/node_min_ready_capacity.yaml @@ -0,0 +1,47 @@ +elements-dir: . +images-dir: '{images_dir}' + +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +labels: + - name: fake-label + min-ready: 0 + +providers: + - name: fake-provider + cloud: fake + driver: fake + region-name: fake-region + rate: 0.0001 + diskimages: + - name: fake-image + meta: + key: value + key2: value + pools: + - name: main + max-servers: 1 + availability-zones: + - az1 + networks: + - net-name + labels: + - name: fake-label + diskimage: fake-image + min-ram: 8192 + flavor-name: 'Fake' + +diskimages: + - name: fake-image + elements: + - fedora + - vm + release: 21 + env-vars: + TMPDIR: /opt/dib_tmp + DIB_IMAGE_CACHE: /opt/dib_cache + DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/ + BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2 diff --git a/nodepool/tests/test_launcher.py b/nodepool/tests/test_launcher.py index 4b5594ec9..2eb75cece 100644 --- a/nodepool/tests/test_launcher.py +++ b/nodepool/tests/test_launcher.py @@ -228,6 +228,35 @@ class TestLauncher(tests.DBTestCase): self.assertEqual(req.state, zk.FAILED) self.assertNotEqual(req.declined_by, []) + def test_fail_minready_request_at_capacity(self): + ''' + A min-ready request to a provider that is already at capacity should + be declined. + ''' + configfile = self.setup_config('node_min_ready_capacity.yaml') + self._useBuilder(configfile) + self.waitForImage('fake-provider', 'fake-image') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + + # Get an initial node ready + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append("fake-label") + self.zk.storeNodeRequest(req) + req = self.waitForNodeRequest(req) + self.assertEqual(req.state, zk.FULFILLED) + + # Now simulate a min-ready request + min_ready_req = zk.NodeRequest() + min_ready_req.state = zk.REQUESTED + min_ready_req.node_types.append("fake-label") + min_ready_req.requestor = "NodePool:min-ready" + self.zk.storeNodeRequest(min_ready_req) + min_ready_req = self.waitForNodeRequest(min_ready_req) + self.assertEqual(min_ready_req.state, zk.FAILED) + self.assertNotEqual(min_ready_req.declined_by, []) + def test_invalid_image_fails(self): ''' Test that an invalid image declines and fails the request. diff --git a/nodepool/zk.py b/nodepool/zk.py index 24a621a81..78829582e 100755 --- a/nodepool/zk.py +++ b/nodepool/zk.py @@ -1629,3 +1629,16 @@ class ZooKeeper(object): req = self.getNodeRequest(req_id) if req: yield req + + def countPoolNodes(self, provider_name, pool_name): + ''' + Count the number of nodes that exist for the given provider pool. + + :param str provider_name: The provider name. + :param str pool_name: The pool name. + ''' + count = 0 + for node in self.nodeIterator(): + if node.provider == provider_name and node.pool == pool_name: + count = count + 1 + return count