Add max-age to metastatic driver
This allows the metastatic driver to gracefully remove a backing node from service after a certain amount of time. This forced retirement can be used to periodically ensure that fresh backing nodes are used even in busy systems (which can help ensure that, over time, job behavior does not change based on the contents of the backing node server). Change-Id: I62a95411a5d0b75185739a3c2553c75124c78c25
This commit is contained in:
parent
c253ebfef3
commit
a57231cb60
@ -203,6 +203,18 @@ itself, which is "meta".
|
||||
used to ensure that the backing node is retained for at
|
||||
least the minimum billing interval.
|
||||
|
||||
.. attr:: max-age
|
||||
:type: int
|
||||
|
||||
If this value is set, the backing node will be removed
|
||||
from service after this amount of time (in seconds) has
|
||||
passed since the backing node was launched. After a
|
||||
backing node reaches this point, any existing jobs will
|
||||
be permitted to run to completion, but no new metastatic
|
||||
nodes will be created with that backing node and once all
|
||||
metastatic nodes using it have been deleted, then backing
|
||||
node will be deleted.
|
||||
|
||||
.. attr:: host-key-checking
|
||||
:type: bool
|
||||
:default: False
|
||||
|
@ -298,6 +298,15 @@ class MetastaticAdapter(statemachine.Adapter):
|
||||
if label_config:
|
||||
grace_time = label_config.grace_time
|
||||
min_time = label_config.min_retention_time
|
||||
if label_config.max_age:
|
||||
if now - bnr.launched > label_config.max_age:
|
||||
# Mark it as failed; even though it
|
||||
# hasn't really failed, the lifecycle
|
||||
# is the same: do not allocate any
|
||||
# more jobs to this node but let any
|
||||
# remaining ones finish, then delete
|
||||
# ASAP.
|
||||
bnr.failed = True
|
||||
else:
|
||||
# The label doesn't exist in our config any more,
|
||||
# it must have been removed.
|
||||
|
@ -46,8 +46,12 @@ class MetastaticLabel(ConfigValue):
|
||||
self.max_parallel_jobs = label.get('max-parallel-jobs', 1)
|
||||
self.grace_time = label.get('grace-time', 60)
|
||||
self.min_retention_time = label.get('min-retention-time', 0)
|
||||
self.max_age = label.get('max-age', None)
|
||||
self.host_key_checking = label.get('host-key-checking',
|
||||
self.pool.host_key_checking)
|
||||
if self.max_age and self.max_age < self.min_retention_time:
|
||||
raise Exception("The max_age must be greater than or "
|
||||
"equal to the min_retention_time")
|
||||
|
||||
@staticmethod
|
||||
def getSchema():
|
||||
@ -57,6 +61,7 @@ class MetastaticLabel(ConfigValue):
|
||||
'max-parallel-jobs': int,
|
||||
'grace-time': int,
|
||||
'min-retention-time': int,
|
||||
'max-age': int,
|
||||
'host-key-checking': bool,
|
||||
}
|
||||
|
||||
@ -66,7 +71,8 @@ class MetastaticLabel(ConfigValue):
|
||||
self.backing_label == other.backing_label and
|
||||
self.max_parallel_jobs == other.max_parallel_jobs and
|
||||
self.grace_time == other.grace_time and
|
||||
self.min_retention_time == other.min_retention_time
|
||||
self.min_retention_time == other.min_retention_time and
|
||||
self.max_age == other.max_age
|
||||
)
|
||||
|
||||
|
||||
|
1
nodepool/tests/fixtures/metastatic.yaml
vendored
1
nodepool/tests/fixtures/metastatic.yaml
vendored
@ -66,6 +66,7 @@ providers:
|
||||
backing-label: backing-label
|
||||
max-parallel-jobs: 2
|
||||
grace-time: 2
|
||||
max-age: 300
|
||||
host-key-checking: true
|
||||
- name: user-label-min-retention
|
||||
backing-label: backing-label-min-retention
|
||||
|
@ -372,3 +372,47 @@ class TestDriverMetastatic(tests.DBTestCase):
|
||||
meta_manager.adapter.listResources()
|
||||
nodes = self._getNodes()
|
||||
self.waitForNodeDeletion(bn1)
|
||||
|
||||
def test_metastatic_max_age(self):
|
||||
# Test the max-age option
|
||||
configfile = self.setup_config('metastatic.yaml')
|
||||
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||
self.startPool(pool)
|
||||
manager = pool.getProviderManager('fake-provider')
|
||||
manager.adapter._client.create_image(name="fake-image")
|
||||
|
||||
# Launch one metastatic node on a backing node
|
||||
node1 = self._requestNode()
|
||||
nodes = self._getNodes()
|
||||
self.assertEqual(len(nodes), 2)
|
||||
bn1 = nodes[1]
|
||||
self.assertEqual(bn1.provider, 'fake-provider')
|
||||
self.assertEqual(bn1.id, node1.driver_data['backing_node'])
|
||||
|
||||
# Create a second node and verify it uses the same backing node.
|
||||
node2 = self._requestNode()
|
||||
nodes = self._getNodes()
|
||||
self.assertEqual(len(nodes), 3)
|
||||
self.assertEqual(bn1.id, node2.driver_data['backing_node'])
|
||||
|
||||
# Delete the second node.
|
||||
node2.state = zk.DELETING
|
||||
self.zk.storeNode(node2)
|
||||
self.waitForNodeDeletion(node2)
|
||||
nodes = self._getNodes()
|
||||
self.assertEqual(len(nodes), 2)
|
||||
|
||||
# Falsify the launch time so that the node is older than
|
||||
# max_age (300).
|
||||
meta_manager = pool.getProviderManager('meta-provider')
|
||||
bnr = meta_manager.adapter.backing_node_records['user-label'][0]
|
||||
bnr.launched = 0
|
||||
|
||||
# This has the side effect of marking the backing node as failed.
|
||||
meta_manager.adapter.listResources()
|
||||
|
||||
# Create another node and verify it gets a new backing node.
|
||||
node3 = self._requestNode()
|
||||
nodes = self._getNodes()
|
||||
self.assertEqual(len(nodes), 4)
|
||||
self.assertNotEqual(bn1.id, node3.driver_data['backing_node'])
|
||||
|
Loading…
x
Reference in New Issue
Block a user