diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 2a680e99c..f85e4ec2a 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -186,6 +186,11 @@ delete .. program-output:: nodepool delete --help :nostderr: +hold +^^^^ +.. program-output:: nodepool hold --help + :nostderr: + The following subcommands deal with ZooKeeper data management: info diff --git a/nodepool/cmd/nodepoolcmd.py b/nodepool/cmd/nodepoolcmd.py index 65e3f4257..0265c2c97 100644 --- a/nodepool/cmd/nodepoolcmd.py +++ b/nodepool/cmd/nodepoolcmd.py @@ -88,6 +88,13 @@ class NodePoolCmd(NodepoolApp): action='store_true', help='delete the node in the foreground') + cmd_hold = subparsers.add_parser( + 'hold', + help='place a node in the HOLD state ' + 'e.g. for running maintenance tasks') + cmd_hold.set_defaults(func=self.hold) + cmd_hold.add_argument('id', help='node id') + cmd_image_delete = subparsers.add_parser( 'image-delete', help='delete an image') @@ -303,6 +310,23 @@ class NodePoolCmd(NodepoolApp): self.list(node_id=node.id) + def _change_node_state(self, new_state): + node = self.zk.getNode(self.args.id) + if not node: + print("Node id %s not found" % self.args.id) + return + + self.zk.lockNode(node, blocking=True, timeout=5) + + node.state = new_state + self.zk.storeNode(node) + self.zk.unlockNode(node) + + self.list(node_id=node.id) + + def hold(self): + self._change_node_state(zk.HOLD) + def dib_image_delete(self): (image, build_num) = self.args.id.rsplit('-', 1) build = self.zk.getBuild(image, build_num) @@ -434,7 +458,7 @@ class NodePoolCmd(NodepoolApp): 'image-status', 'image-list', 'dib-image-delete', 'image-delete', 'alien-image-list', - 'list', 'delete', + 'list', 'delete', 'hold', 'request-list', 'info', 'erase', 'image-pause', 'image-unpause', 'export-image-data', 'import-image-data'): diff --git a/nodepool/tests/__init__.py b/nodepool/tests/__init__.py index 925cb8b20..347d27617 100644 --- a/nodepool/tests/__init__.py +++ b/nodepool/tests/__init__.py @@ -618,13 +618,13 @@ class DBTestCase(BaseTestCase): if node.state == state: return node - def waitForNodeRequest(self, req, states=None): + def waitForNodeRequest(self, req, states=None, max_time=ONE_MINUTE): ''' Wait for a node request to transition to a final state. ''' if states is None: states = (zk.FULFILLED, zk.FAILED) - for _ in iterate_timeout(ONE_MINUTE, Exception, + for _ in iterate_timeout(max_time, Exception, "Node request state transition", interval=1): req = self.zk.getNodeRequest(req.id) diff --git a/nodepool/tests/unit/test_commands.py b/nodepool/tests/unit/test_commands.py index a993402c7..c08851206 100644 --- a/nodepool/tests/unit/test_commands.py +++ b/nodepool/tests/unit/test_commands.py @@ -22,6 +22,7 @@ import fixtures import mock import testtools +from nodepool import exceptions as npe from nodepool.cmd import nodepoolcmd from nodepool import tests from nodepool.zk import zookeeper as zk @@ -350,6 +351,161 @@ class TestNodepoolCMD(tests.DBTestCase): # Assert the node is gone self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0) + def test_hold(self): + configfile = self.setup_config('node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.useBuilder(configfile) + pool.start() + self.waitForImage('fake-provider', 'fake-image') + nodes = self.waitForNodes('fake-label') + self.assertEqual(len(nodes), 1) + + # Assert one node exists and it is nodes[0].id in a ready state. + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.READY) + + # Hold node + self.patch_argv('-c', configfile, 'hold', nodes[0].id) + nodepoolcmd.main() + + # Assert the node is on hold + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.HOLD) + + # Re-enable node by deleting + old_node_id = nodes[0].id + self.patch_argv('-c', configfile, 'delete', nodes[0].id) + nodepoolcmd.main() + + # Assert that the node is ready + self.waitForNodeDeletion(nodes[0]) + new_nodes = self.waitForNodes('fake-label') + self.assertEqual(len(new_nodes), 1) + self.assert_listed(configfile, ['list'], 0, new_nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.READY) + self.assertNotEqual(old_node_id, new_nodes[0].id) + + # Request a node + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('fake-label') + self.assertEqual(len(req.nodes), 0) + self.zk.storeNodeRequest(req) + + self.log.debug("Waiting for request %s", req.id) + req = self.waitForNodeRequest(req, (zk.FULFILLED,)) + self.assertEqual(len(req.nodes), 1) + + def test_attempt_hold_busy_node(self): + configfile = self.setup_config('node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.useBuilder(configfile) + pool.start() + self.waitForImage('fake-provider', 'fake-image') + nodes = self.waitForNodes('fake-label') + self.assertEqual(len(nodes), 1) + + # Assert one node exists and it is nodes[0].id in a ready state. + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.READY) + + # Request a node + req1 = zk.NodeRequest() + req1.state = zk.REQUESTED + req1.node_types.append('fake-label') + self.zk.storeNodeRequest(req1) + + # Wait for node request + self.log.debug("Waiting for 1st request %s", req1.id) + req1 = self.waitForNodeRequest(req1, (zk.FULFILLED,)) + self.assertEqual(len(req1.nodes), 1) + + # Lock node and set it as in-use + node = self.zk.getNode(req1.nodes[0]) + self.zk.lockNode(node, blocking=False) + node.state = zk.IN_USE + self.zk.storeNode(node) + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.IN_USE) + + # Attempt to hold the node, this should fail + # since another process holds the lock + with testtools.ExpectedException(npe.TimeoutException): + self.patch_argv('-c', configfile, 'hold', nodes[0].id) + nodepoolcmd.main() + + def test_attempt_request_held_static_node(self): + configfile = self.setup_config('static-basic.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + nodes = self.waitForNodes('fake-label') + self.assertEqual(len(nodes), 1) + + # Assert one node exists and it is nodes[0].id in a ready state. + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.READY) + + # Hold node + self.patch_argv('-c', configfile, 'hold', nodes[0].id) + nodepoolcmd.main() + + # Assert the node is on HOLD + self.assertEqual(len(nodes), 1) + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.HOLD) + + # Prepare node request + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('fake-label') + self.zk.storeNodeRequest(req) + + # Make a node request + # Expect to timeout since the node is not ready + self.log.debug("Waiting for request %s", req.id) + req = self.zk.getNodeRequest(req.id) + + with testtools.ExpectedException(Exception): + req = self.waitForNodeRequest(req, (zk.FULFILLED,), max_time=30) + + self.assertEqual(len(req.nodes), 0) + + def test_attempt_request_held_node(self): + configfile = self.setup_config('node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self.useBuilder(configfile) + pool.start() + self.waitForImage('fake-provider', 'fake-image') + nodes = self.waitForNodes('fake-label') + self.assertEqual(len(nodes), 1) + + # Assert one node exists and it is nodes[0].id in a ready state. + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.READY) + + # Hold node + self.patch_argv('-c', configfile, 'hold', nodes[0].id) + nodepoolcmd.main() + + # Assert the node is on HOLD + self.assertEqual(len(nodes), 1) + self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1) + self.assert_nodes_listed(configfile, 1, zk.HOLD) + + # Prepare node request + req = zk.NodeRequest() + req.state = zk.REQUESTED + req.node_types.append('fake-label') + self.zk.storeNodeRequest(req) + + # Make a node request + self.log.debug("Waiting for request %s", req.id) + req = self.waitForNodeRequest(req, (zk.FULFILLED,)) + + # Make sure we did not assign the held node + # but another node as long as the quota is not reached + self.assertNotEqual(nodes[0].id, req.nodes[0]) + def test_image_build(self): configfile = self.setup_config('node.yaml') self.useBuilder(configfile)