Add hold command to disable nodes
This allows nodes to be set in an idle state so that they will not have jobs scheduled while e.g. maintenance tasks are performed. This is probably most useful for static nodes. Change-Id: Iebc6b909f370fca11fab2be0b8805d4daef33afe
This commit is contained in:
parent
95b3d4c302
commit
1658aa9851
@ -186,6 +186,11 @@ delete
|
||||
.. program-output:: nodepool delete --help
|
||||
:nostderr:
|
||||
|
||||
hold
|
||||
^^^^
|
||||
.. program-output:: nodepool hold --help
|
||||
:nostderr:
|
||||
|
||||
The following subcommands deal with ZooKeeper data management:
|
||||
|
||||
info
|
||||
|
@ -88,6 +88,13 @@ class NodePoolCmd(NodepoolApp):
|
||||
action='store_true',
|
||||
help='delete the node in the foreground')
|
||||
|
||||
cmd_hold = subparsers.add_parser(
|
||||
'hold',
|
||||
help='place a node in the HOLD state '
|
||||
'e.g. for running maintenance tasks')
|
||||
cmd_hold.set_defaults(func=self.hold)
|
||||
cmd_hold.add_argument('id', help='node id')
|
||||
|
||||
cmd_image_delete = subparsers.add_parser(
|
||||
'image-delete',
|
||||
help='delete an image')
|
||||
@ -303,6 +310,23 @@ class NodePoolCmd(NodepoolApp):
|
||||
|
||||
self.list(node_id=node.id)
|
||||
|
||||
def _change_node_state(self, new_state):
|
||||
node = self.zk.getNode(self.args.id)
|
||||
if not node:
|
||||
print("Node id %s not found" % self.args.id)
|
||||
return
|
||||
|
||||
self.zk.lockNode(node, blocking=True, timeout=5)
|
||||
|
||||
node.state = new_state
|
||||
self.zk.storeNode(node)
|
||||
self.zk.unlockNode(node)
|
||||
|
||||
self.list(node_id=node.id)
|
||||
|
||||
def hold(self):
|
||||
self._change_node_state(zk.HOLD)
|
||||
|
||||
def dib_image_delete(self):
|
||||
(image, build_num) = self.args.id.rsplit('-', 1)
|
||||
build = self.zk.getBuild(image, build_num)
|
||||
@ -434,7 +458,7 @@ class NodePoolCmd(NodepoolApp):
|
||||
'image-status',
|
||||
'image-list', 'dib-image-delete',
|
||||
'image-delete', 'alien-image-list',
|
||||
'list', 'delete',
|
||||
'list', 'delete', 'hold',
|
||||
'request-list', 'info', 'erase',
|
||||
'image-pause', 'image-unpause',
|
||||
'export-image-data', 'import-image-data'):
|
||||
|
@ -618,13 +618,13 @@ class DBTestCase(BaseTestCase):
|
||||
if node.state == state:
|
||||
return node
|
||||
|
||||
def waitForNodeRequest(self, req, states=None):
|
||||
def waitForNodeRequest(self, req, states=None, max_time=ONE_MINUTE):
|
||||
'''
|
||||
Wait for a node request to transition to a final state.
|
||||
'''
|
||||
if states is None:
|
||||
states = (zk.FULFILLED, zk.FAILED)
|
||||
for _ in iterate_timeout(ONE_MINUTE, Exception,
|
||||
for _ in iterate_timeout(max_time, Exception,
|
||||
"Node request state transition",
|
||||
interval=1):
|
||||
req = self.zk.getNodeRequest(req.id)
|
||||
|
@ -22,6 +22,7 @@ import fixtures
|
||||
import mock
|
||||
import testtools
|
||||
|
||||
from nodepool import exceptions as npe
|
||||
from nodepool.cmd import nodepoolcmd
|
||||
from nodepool import tests
|
||||
from nodepool.zk import zookeeper as zk
|
||||
@ -350,6 +351,161 @@ class TestNodepoolCMD(tests.DBTestCase):
|
||||
# Assert the node is gone
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 0)
|
||||
|
||||
def test_hold(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
self.useBuilder(configfile)
|
||||
pool.start()
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
nodes = self.waitForNodes('fake-label')
|
||||
self.assertEqual(len(nodes), 1)
|
||||
|
||||
# Assert one node exists and it is nodes[0].id in a ready state.
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.READY)
|
||||
|
||||
# Hold node
|
||||
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
|
||||
nodepoolcmd.main()
|
||||
|
||||
# Assert the node is on hold
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.HOLD)
|
||||
|
||||
# Re-enable node by deleting
|
||||
old_node_id = nodes[0].id
|
||||
self.patch_argv('-c', configfile, 'delete', nodes[0].id)
|
||||
nodepoolcmd.main()
|
||||
|
||||
# Assert that the node is ready
|
||||
self.waitForNodeDeletion(nodes[0])
|
||||
new_nodes = self.waitForNodes('fake-label')
|
||||
self.assertEqual(len(new_nodes), 1)
|
||||
self.assert_listed(configfile, ['list'], 0, new_nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.READY)
|
||||
self.assertNotEqual(old_node_id, new_nodes[0].id)
|
||||
|
||||
# Request a node
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.assertEqual(len(req.nodes), 0)
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
self.log.debug("Waiting for request %s", req.id)
|
||||
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
|
||||
self.assertEqual(len(req.nodes), 1)
|
||||
|
||||
def test_attempt_hold_busy_node(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
self.useBuilder(configfile)
|
||||
pool.start()
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
nodes = self.waitForNodes('fake-label')
|
||||
self.assertEqual(len(nodes), 1)
|
||||
|
||||
# Assert one node exists and it is nodes[0].id in a ready state.
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.READY)
|
||||
|
||||
# Request a node
|
||||
req1 = zk.NodeRequest()
|
||||
req1.state = zk.REQUESTED
|
||||
req1.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req1)
|
||||
|
||||
# Wait for node request
|
||||
self.log.debug("Waiting for 1st request %s", req1.id)
|
||||
req1 = self.waitForNodeRequest(req1, (zk.FULFILLED,))
|
||||
self.assertEqual(len(req1.nodes), 1)
|
||||
|
||||
# Lock node and set it as in-use
|
||||
node = self.zk.getNode(req1.nodes[0])
|
||||
self.zk.lockNode(node, blocking=False)
|
||||
node.state = zk.IN_USE
|
||||
self.zk.storeNode(node)
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.IN_USE)
|
||||
|
||||
# Attempt to hold the node, this should fail
|
||||
# since another process holds the lock
|
||||
with testtools.ExpectedException(npe.TimeoutException):
|
||||
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
|
||||
nodepoolcmd.main()
|
||||
|
||||
def test_attempt_request_held_static_node(self):
|
||||
configfile = self.setup_config('static-basic.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
pool.start()
|
||||
nodes = self.waitForNodes('fake-label')
|
||||
self.assertEqual(len(nodes), 1)
|
||||
|
||||
# Assert one node exists and it is nodes[0].id in a ready state.
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.READY)
|
||||
|
||||
# Hold node
|
||||
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
|
||||
nodepoolcmd.main()
|
||||
|
||||
# Assert the node is on HOLD
|
||||
self.assertEqual(len(nodes), 1)
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.HOLD)
|
||||
|
||||
# Prepare node request
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
# Make a node request
|
||||
# Expect to timeout since the node is not ready
|
||||
self.log.debug("Waiting for request %s", req.id)
|
||||
req = self.zk.getNodeRequest(req.id)
|
||||
|
||||
with testtools.ExpectedException(Exception):
|
||||
req = self.waitForNodeRequest(req, (zk.FULFILLED,), max_time=30)
|
||||
|
||||
self.assertEqual(len(req.nodes), 0)
|
||||
|
||||
def test_attempt_request_held_node(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
self.useBuilder(configfile)
|
||||
pool.start()
|
||||
self.waitForImage('fake-provider', 'fake-image')
|
||||
nodes = self.waitForNodes('fake-label')
|
||||
self.assertEqual(len(nodes), 1)
|
||||
|
||||
# Assert one node exists and it is nodes[0].id in a ready state.
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.READY)
|
||||
|
||||
# Hold node
|
||||
self.patch_argv('-c', configfile, 'hold', nodes[0].id)
|
||||
nodepoolcmd.main()
|
||||
|
||||
# Assert the node is on HOLD
|
||||
self.assertEqual(len(nodes), 1)
|
||||
self.assert_listed(configfile, ['list'], 0, nodes[0].id, 1)
|
||||
self.assert_nodes_listed(configfile, 1, zk.HOLD)
|
||||
|
||||
# Prepare node request
|
||||
req = zk.NodeRequest()
|
||||
req.state = zk.REQUESTED
|
||||
req.node_types.append('fake-label')
|
||||
self.zk.storeNodeRequest(req)
|
||||
|
||||
# Make a node request
|
||||
self.log.debug("Waiting for request %s", req.id)
|
||||
req = self.waitForNodeRequest(req, (zk.FULFILLED,))
|
||||
|
||||
# Make sure we did not assign the held node
|
||||
# but another node as long as the quota is not reached
|
||||
self.assertNotEqual(nodes[0].id, req.nodes[0])
|
||||
|
||||
def test_image_build(self):
|
||||
configfile = self.setup_config('node.yaml')
|
||||
self.useBuilder(configfile)
|
||||
|
Loading…
Reference in New Issue
Block a user