From b48e8ad4ec258859d298e4391df06534e64ea67b Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 20 Mar 2017 11:07:20 -0700 Subject: [PATCH] Fix test_node_assignment_at_quota There is a bug in the request handler at quota where if: the request handler runs but must pause due to quota while still needing more than one node, and then a single node becomes available and the handler runs again and causes a node to be launched but then must wait for another node to become available, the handler will never unpause. This is because nodes that it launches are not added to the handler's nodeset until after the entire request is handled (they are added by the poll method). However, nodes that are allocated to the request from ready node stock are added to the nodeset. The current nodeset is used to determine whether more nodes are needed. Because the nodes from the recent launches are not part of the nodeset, they are still counted as being "needed", and so the request handler continues to wait for more slots to become available. The fix is to add the newly requested node to the node set immediately when it is requested rather than when it becomes READY in the poll() method. This should be safe since any node failures causes the entire request to be failed. Co-Authored-By: David Shrewsbury Change-Id: I88c682807b395fc549f7c698d0c42c888dab2bc2 --- nodepool/nodepool.py | 4 +--- nodepool/tests/test_nodepool.py | 31 +++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index b17ea68b3..b68866669 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -719,8 +719,7 @@ class NodeRequestHandler(object): node.state = zk.BUILDING self.zk.storeNode(node) - # NOTE: We append the node to nodeset if it successfully - # launches. + self.nodeset.append(node) self.launch_manager.launch(node) def _run(self): @@ -858,7 +857,6 @@ class NodeRequestHandler(object): else: self.request.state = zk.REQUESTED else: - self.nodeset.extend(self.launch_manager.ready_nodes) for node in self.nodeset: # Record node ID in the request self.request.nodes.append(node.id) diff --git a/nodepool/tests/test_nodepool.py b/nodepool/tests/test_nodepool.py index 5b270bea7..4c4aa329c 100644 --- a/nodepool/tests/test_nodepool.py +++ b/nodepool/tests/test_nodepool.py @@ -86,8 +86,8 @@ class TestNodepool(tests.DBTestCase): client = pool.getProviderManager('fake-provider')._getClient() - # One of the things we want to test is that if spawn many node - # launches at once, we do not deadlock while the request + # One of the things we want to test is that if we spawn many + # node launches at once, we do not deadlock while the request # handler pauses for quota. To ensure we test that case, # pause server creation until we have accepted all of the node # requests we submit. This will ensure that we hold locks on @@ -129,11 +129,30 @@ class TestNodepool(tests.DBTestCase): # Mark the first request's nodes as USED, which will get them deleted # and allow the second to proceed. + self.log.debug("Marking first node as used %s", req1.id) + node = self.zk.getNode(req1.nodes[0]) + node.state = zk.USED + self.zk.storeNode(node) + self.waitForNodeDeletion(node) + + # To force the sequential nature of what we're testing, wait for + # the 2nd request to get a node allocated to it now that we've + # freed up a node. + self.log.debug("Waiting for node allocation for 2nd request") + done = False + while not done: + for n in self.zk.nodeIterator(): + if n.allocated_to == req2.id: + done = True + break + + self.log.debug("Marking second node as used %s", req1.id) + node = self.zk.getNode(req1.nodes[1]) + node.state = zk.USED + self.zk.storeNode(node) + self.waitForNodeDeletion(node) + self.log.debug("Deleting 1st request %s", req1.id) - for node_id in req1.nodes: - node = self.zk.getNode(node_id) - node.state = zk.USED - self.zk.storeNode(node) self.zk.deleteNodeRequest(req1) self.waitForNodeRequestLockDeletion(req1.id)