Merge "Add hold command to disable nodes"

This commit is contained in:
Zuul 2022-11-30 20:05:41 +00:00 committed by Gerrit Code Review
commit 9dd883107a
4 changed files with 188 additions and 3 deletions

View File

@ -186,6 +186,11 @@ delete
.. program-output:: nodepool delete --help .. program-output:: nodepool delete --help
:nostderr: :nostderr:
hold
^^^^
.. program-output:: nodepool hold --help
:nostderr:
The following subcommands deal with ZooKeeper data management: The following subcommands deal with ZooKeeper data management:
info info

View File

@ -88,6 +88,13 @@ class NodePoolCmd(NodepoolApp):
action='store_true', action='store_true',
help='delete the node in the foreground') help='delete the node in the foreground')
cmd_hold = subparsers.add_parser(
'hold',
help='place a node in the HOLD state '
'e.g. for running maintenance tasks')
cmd_hold.set_defaults(func=self.hold)
cmd_hold.add_argument('id', help='node id')
cmd_image_delete = subparsers.add_parser( cmd_image_delete = subparsers.add_parser(
'image-delete', 'image-delete',
help='delete an image') help='delete an image')
@ -303,6 +310,23 @@ class NodePoolCmd(NodepoolApp):
self.list(node_id=node.id) self.list(node_id=node.id)
def _change_node_state(self, new_state):
node = self.zk.getNode(self.args.id)
if not node:
print("Node id %s not found" % self.args.id)
return
self.zk.lockNode(node, blocking=True, timeout=5)
node.state = new_state
self.zk.storeNode(node)
self.zk.unlockNode(node)
self.list(node_id=node.id)
def hold(self):
self._change_node_state(zk.HOLD)
def dib_image_delete(self): def dib_image_delete(self):
(image, build_num) = self.args.id.rsplit('-', 1) (image, build_num) = self.args.id.rsplit('-', 1)
build = self.zk.getBuild(image, build_num) build = self.zk.getBuild(image, build_num)
@ -434,7 +458,7 @@ class NodePoolCmd(NodepoolApp):
'image-status', 'image-status',
'image-list', 'dib-image-delete', 'image-list', 'dib-image-delete',
'image-delete', 'alien-image-list', 'image-delete', 'alien-image-list',
'list', 'delete', 'list', 'delete', 'hold',
'request-list', 'info', 'erase', 'request-list', 'info', 'erase',
'image-pause', 'image-unpause', 'image-pause', 'image-unpause',
'export-image-data', 'import-image-data'): 'export-image-data', 'import-image-data'):

View File

@ -621,13 +621,13 @@ class DBTestCase(BaseTestCase):
if node.state == state: if node.state == state:
return node return node
def waitForNodeRequest(self, req, states=None): def waitForNodeRequest(self, req, states=None, max_time=ONE_MINUTE):
''' '''
Wait for a node request to transition to a final state. Wait for a node request to transition to a final state.
''' '''
if states is None: if states is None:
states = (zk.FULFILLED, zk.FAILED) states = (zk.FULFILLED, zk.FAILED)
for _ in iterate_timeout(ONE_MINUTE, Exception, for _ in iterate_timeout(max_time, Exception,
"Node request state transition", "Node request state transition",
interval=1): interval=1):
req = self.zk.getNodeRequest(req.id) req = self.zk.getNodeRequest(req.id)

View File

@ -23,6 +23,7 @@ import fixtures
import mock import mock
import testtools import testtools
from nodepool import exceptions as npe
from nodepool.cmd import nodepoolcmd from nodepool.cmd import nodepoolcmd
from nodepool import tests from nodepool import tests
from nodepool.zk import zookeeper as zk from nodepool.zk import zookeeper as zk
@ -383,6 +384,161 @@ class TestNodepoolCMD(tests.DBTestCase):
# Assert the node is gone # Assert the node is gone
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0) self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0)
def test_hold(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on hold
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Re-enable node by deleting
old_node_id = nodes[0].id
self.patch_argv('-c', configfile, 'delete', nodes[0].id)
nodepoolcmd.main()
# Assert that the node is ready
self.waitForNodeDeletion(nodes[0])
new_nodes = self.waitForNodes('fake-label')
self.assertEqual(len(new_nodes), 1)
self.assert_listed(configfile, ['list'], 0, new_nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
self.assertNotEqual(old_node_id, new_nodes[0].id)
# Request a node
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.assertEqual(len(req.nodes), 0)
self.zk.storeNodeRequest(req)
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
self.assertEqual(len(req.nodes), 1)
def test_attempt_hold_busy_node(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Request a node
req1 = zk.NodeRequest()
req1.state = zk.REQUESTED
req1.node_types.append('fake-label')
self.zk.storeNodeRequest(req1)
# Wait for node request
self.log.debug("Waiting for 1st request %s", req1.id)
req1 = self.waitForNodeRequest(req1, (zk.FULFILLED,))
self.assertEqual(len(req1.nodes), 1)
# Lock node and set it as in-use
node = self.zk.getNode(req1.nodes[0])
self.zk.lockNode(node, blocking=False)
node.state = zk.IN_USE
self.zk.storeNode(node)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.IN_USE)
# Attempt to hold the node, this should fail
# since another process holds the lock
with testtools.ExpectedException(npe.TimeoutException):
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
def test_attempt_request_held_static_node(self):
configfile = self.setup_config('static-basic.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
pool.start()
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on HOLD
self.assertEqual(len(nodes), 1)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Prepare node request
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
# Make a node request
# Expect to timeout since the node is not ready
self.log.debug("Waiting for request %s", req.id)
req = self.zk.getNodeRequest(req.id)
with testtools.ExpectedException(Exception):
req = self.waitForNodeRequest(req, (zk.FULFILLED,), max_time=30)
self.assertEqual(len(req.nodes), 0)
def test_attempt_request_held_node(self):
configfile = self.setup_config('node.yaml')
pool = self.useNodepool(configfile, watermark_sleep=1)
self.useBuilder(configfile)
pool.start()
self.waitForImage('fake-provider', 'fake-image')
nodes = self.waitForNodes('fake-label')
self.assertEqual(len(nodes), 1)
# Assert one node exists and it is nodes[0].id in a ready state.
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.READY)
# Hold node
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
nodepoolcmd.main()
# Assert the node is on HOLD
self.assertEqual(len(nodes), 1)
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
self.assert_nodes_listed(configfile, 1, zk.HOLD)
# Prepare node request
req = zk.NodeRequest()
req.state = zk.REQUESTED
req.node_types.append('fake-label')
self.zk.storeNodeRequest(req)
# Make a node request
self.log.debug("Waiting for request %s", req.id)
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
# Make sure we did not assign the held node
# but another node as long as the quota is not reached
self.assertNotEqual(nodes[0].id, req.nodes[0])
def test_image_build(self): def test_image_build(self):
configfile = self.setup_config('node.yaml') configfile = self.setup_config('node.yaml')
self.useBuilder(configfile) self.useBuilder(configfile)