Merge "Add hold command to disable nodes"

This commit is contained in:
Zuul 2022-11-30 20:05:41 +00:00 committed by Gerrit Code Review
commit 9dd883107a
4 changed files with 188 additions and 3 deletions

View File

@ -186,6 +186,11 @@ delete
.. program-output:: nodepool delete --help
:nostderr:
hold
^^^^
.. program-output:: nodepool hold --help
:nostderr:
The following subcommands deal with ZooKeeper data management:
info

View File

@ -88,6 +88,13 @@ class NodePoolCmd(NodepoolApp):
action='store_true',
help='delete the node in the foreground')
cmd_hold = subparsers.add_parser(
'hold',
help='place a node in the HOLD state '
'e.g. for running maintenance tasks')
cmd_hold.set_defaults(func=self.hold)
cmd_hold.add_argument('id', help='node id')
cmd_image_delete = subparsers.add_parser(
'image-delete',
help='delete an image')
@ -303,6 +310,23 @@ class NodePoolCmd(NodepoolApp):
self.list(node_id=node.id)
def _change_node_state(self, new_state):
node = self.zk.getNode(self.args.id)
if not node:
print("Node id %s not found" % self.args.id)
return
self.zk.lockNode(node, blocking=True, timeout=5)
node.state = new_state
self.zk.storeNode(node)
self.zk.unlockNode(node)
self.list(node_id=node.id)
def hold(self):
self._change_node_state(zk.HOLD)
def dib_image_delete(self):
(image, build_num) = self.args.id.rsplit('-', 1)
build = self.zk.getBuild(image, build_num)
@ -434,7 +458,7 @@ class NodePoolCmd(NodepoolApp):
'image-status',
'image-list', 'dib-image-delete',
'image-delete', 'alien-image-list',
'list', 'delete',
'list', 'delete', 'hold',
'request-list', 'info', 'erase',
'image-pause', 'image-unpause',
'export-image-data', 'import-image-data'):

View File

@ -621,13 +621,13 @@ class DBTestCase(BaseTestCase):
if node.state == state:
return node
def waitForNodeRequest(self, req, states=None):
def waitForNodeRequest(self, req, states=None, max_time=ONE_MINUTE):
'''
Wait for a node request to transition to a final state.
'''
if states is None:
states = (zk.FULFILLED, zk.FAILED)
for _ in iterate_timeout(ONE_MINUTE, Exception,
for _ in iterate_timeout(max_time, Exception,
"Node request state transition",
interval=1):
req = self.zk.getNodeRequest(req.id)

View File

@ -23,6 +23,7 @@ import fixtures
import mock
import testtools
from nodepool import exceptions as npe
from nodepool.cmd import nodepoolcmd
from nodepool import tests
from nodepool.zk import zookeeper as zk
@ -383,6 +384,161 @@ class TestNodepoolCMD(tests.DBTestCase):
# Assert the node is gone
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0)
def test_hold(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on hold
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Re-enable node by deleting
old_node_id = nodes[0].id
self.patch_argv('-c', configfile, 'delete', nodes[0].id)
nodepoolcmd.main()
# Assert that the node is ready
self.waitForNodeDeletion(nodes[0])
new_nodes = self.waitForNodes('fake-label')
self.assertEqual(len(new_nodes), 1)
self.assert_listed(configfile, ['list'], 0, new_nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
self.assertNotEqual(old_node_id, new_nodes[0].id)
# Request a node
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.assertEqual(len(req.nodes), 0)
self.zk.storeNodeRequest(req)
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
self.assertEqual(len(req.nodes), 1)
def test_attempt_hold_busy_node(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Request a node
req1 = zk.NodeRequest()
req1.state = zk.REQUESTED
req1.node_types.append('fake-label')
self.zk.storeNodeRequest(req1)
# Wait for node request
self.log.debug("Waiting for 1st request %s", req1.id)
req1 = self.waitForNodeRequest(req1, (zk.FULFILLED,))
self.assertEqual(len(req1.nodes), 1)
# Lock node and set it as in-use
node = self.zk.getNode(req1.nodes[0])
self.zk.lockNode(node, blocking=False)
node.state = zk.IN_USE
self.zk.storeNode(node)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.IN_USE)
# Attempt to hold the node, this should fail
# since another process holds the lock
with testtools.ExpectedException(npe.TimeoutException):
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
def test_attempt_request_held_static_node(self):
configfile = self.setup_config('static-basic.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on HOLD
self.assertEqual(len(nodes), 1)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Prepare node request
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
# Make a node request
# Expect to timeout since the node is not ready
self.log.debug("Waiting for request %s", req.id)
req = self.zk.getNodeRequest(req.id)
with testtools.ExpectedException(Exception):
req = self.waitForNodeRequest(req, (zk.FULFILLED,), max_time=30)
self.assertEqual(len(req.nodes), 0)
def test_attempt_request_held_node(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on HOLD
self.assertEqual(len(nodes), 1)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Prepare node request
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
# Make a node request
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
# Make sure we did not assign the held node
# but another node as long as the quota is not reached
self.assertNotEqual(nodes[0].id, req.nodes[0])
def test_image_build(self):
configfile = self.setup_config('node.yaml')
self.useBuilder(configfile)