Allow configuring nodepool launch retries
Nodepool currently hardcodes that 3 attempts are made to upload an image to the cloud. Allow modifying this in your provider configuration. Change-Id: I61f44e163d419771824daa2039f7cdecc74742aa
This commit is contained in:
parent
646c48800b
commit
71035081d5
@ -296,6 +296,7 @@ provider, the Nodepool image types are also defined (see
|
|||||||
- az1
|
- az1
|
||||||
boot-timeout: 120
|
boot-timeout: 120
|
||||||
launch-timeout: 900
|
launch-timeout: 900
|
||||||
|
launch-retries: 3
|
||||||
image-name-format: 'template-{image_name}-{timestamp}'
|
image-name-format: 'template-{image_name}-{timestamp}'
|
||||||
hostname-format: '{label.name}-{provider.name}-{node.id}'
|
hostname-format: '{label.name}-{provider.name}-{node.id}'
|
||||||
ipv6-preferred: False
|
ipv6-preferred: False
|
||||||
@ -413,6 +414,13 @@ provider, the Nodepool image types are also defined (see
|
|||||||
|
|
||||||
In seconds. Default 3600.
|
In seconds. Default 3600.
|
||||||
|
|
||||||
|
``launch-retries``
|
||||||
|
|
||||||
|
The number of times to retry launching a server before considering the job
|
||||||
|
failed.
|
||||||
|
|
||||||
|
Default 3.
|
||||||
|
|
||||||
``keypair``
|
``keypair``
|
||||||
Default None
|
Default None
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ class ConfigValidator:
|
|||||||
'boot-timeout': int,
|
'boot-timeout': int,
|
||||||
'api-timeout': int,
|
'api-timeout': int,
|
||||||
'launch-timeout': int,
|
'launch-timeout': int,
|
||||||
|
'launch-retries': int,
|
||||||
'rate': float,
|
'rate': float,
|
||||||
'images': [images],
|
'images': [images],
|
||||||
'hostname-format': str,
|
'hostname-format': str,
|
||||||
|
@ -182,6 +182,7 @@ def loadConfig(config_path):
|
|||||||
p.api_timeout = provider.get('api-timeout')
|
p.api_timeout = provider.get('api-timeout')
|
||||||
p.boot_timeout = provider.get('boot-timeout', 60)
|
p.boot_timeout = provider.get('boot-timeout', 60)
|
||||||
p.launch_timeout = provider.get('launch-timeout', 3600)
|
p.launch_timeout = provider.get('launch-timeout', 3600)
|
||||||
|
p.launch_retries = provider.get('launch-retries', 3)
|
||||||
p.networks = []
|
p.networks = []
|
||||||
for network in provider.get('networks', []):
|
for network in provider.get('networks', []):
|
||||||
n = Network()
|
n = Network()
|
||||||
|
@ -900,7 +900,8 @@ class NodeRequestHandler(object):
|
|||||||
self.zk.storeNodeRequest(self.request)
|
self.zk.storeNodeRequest(self.request)
|
||||||
|
|
||||||
self.launch_manager = NodeLaunchManager(
|
self.launch_manager = NodeLaunchManager(
|
||||||
self.zk, self.provider, self.labels, self.manager, retries=3)
|
self.zk, self.provider, self.labels, self.manager,
|
||||||
|
retries=self.provider.launch_retries)
|
||||||
ready_nodes = self.zk.getReadyNodesOfTypes(self.request.node_types)
|
ready_nodes = self.zk.getReadyNodesOfTypes(self.request.node_types)
|
||||||
|
|
||||||
for ntype in self.request.node_types:
|
for ntype in self.request.node_types:
|
||||||
|
@ -38,6 +38,7 @@ providers:
|
|||||||
boot-timeout: 120
|
boot-timeout: 120
|
||||||
max-servers: 184
|
max-servers: 184
|
||||||
max-concurrency: 10
|
max-concurrency: 10
|
||||||
|
launch-retries: 3
|
||||||
rate: 0.001
|
rate: 0.001
|
||||||
images:
|
images:
|
||||||
- name: trusty
|
- name: trusty
|
||||||
|
55
nodepool/tests/fixtures/node_launch_retry.yaml
vendored
Normal file
55
nodepool/tests/fixtures/node_launch_retry.yaml
vendored
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
elements-dir: .
|
||||||
|
images-dir: '{images_dir}'
|
||||||
|
|
||||||
|
cron:
|
||||||
|
check: '*/15 * * * *'
|
||||||
|
cleanup: '*/1 * * * *'
|
||||||
|
|
||||||
|
zookeeper-servers:
|
||||||
|
- host: {zookeeper_host}
|
||||||
|
port: {zookeeper_port}
|
||||||
|
chroot: {zookeeper_chroot}
|
||||||
|
|
||||||
|
labels:
|
||||||
|
- name: fake-label
|
||||||
|
image: fake-image
|
||||||
|
min-ready: 0
|
||||||
|
providers:
|
||||||
|
- name: fake-provider
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: fake-provider
|
||||||
|
region-name: fake-region
|
||||||
|
keypair: 'if-present-use-this-keypair'
|
||||||
|
username: 'fake'
|
||||||
|
password: 'fake'
|
||||||
|
auth-url: 'fake'
|
||||||
|
project-id: 'fake'
|
||||||
|
max-servers: 96
|
||||||
|
pool: 'fake'
|
||||||
|
launch-retries: 2
|
||||||
|
networks:
|
||||||
|
- net-id: 'some-uuid'
|
||||||
|
rate: 0.0001
|
||||||
|
images:
|
||||||
|
- name: fake-image
|
||||||
|
min-ram: 8192
|
||||||
|
name-filter: 'Fake'
|
||||||
|
meta:
|
||||||
|
key: value
|
||||||
|
key2: value
|
||||||
|
|
||||||
|
targets:
|
||||||
|
- name: fake-target
|
||||||
|
|
||||||
|
diskimages:
|
||||||
|
- name: fake-image
|
||||||
|
elements:
|
||||||
|
- fedora
|
||||||
|
- vm
|
||||||
|
release: 21
|
||||||
|
env-vars:
|
||||||
|
TMPDIR: /opt/dib_tmp
|
||||||
|
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||||
|
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||||
|
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
@ -271,6 +271,27 @@ class TestNodepool(tests.DBTestCase):
|
|||||||
self.assertEqual('fake-provider', new_nodes[0].provider)
|
self.assertEqual('fake-provider', new_nodes[0].provider)
|
||||||
self.assertNotEqual(nodes[0], new_nodes[0])
|
self.assertNotEqual(nodes[0], new_nodes[0])
|
||||||
|
|
||||||
|
@mock.patch('nodepool.provider_manager.FakeProviderManager.createServer')
|
||||||
|
def test_node_launch_retries(self, mock_create_server):
|
||||||
|
mock_create_server.side_effect = Exception('Boom!')
|
||||||
|
|
||||||
|
configfile = self.setup_config('node_launch_retry.yaml')
|
||||||
|
pool = self.useNodepool(configfile, watermark_sleep=1)
|
||||||
|
self._useBuilder(configfile)
|
||||||
|
pool.start()
|
||||||
|
self.waitForImage('fake-provider', 'fake-image')
|
||||||
|
|
||||||
|
req = zk.NodeRequest()
|
||||||
|
req.state = zk.REQUESTED
|
||||||
|
req.node_types.append('fake-label')
|
||||||
|
self.zk.storeNodeRequest(req)
|
||||||
|
|
||||||
|
req = self.waitForNodeRequest(req)
|
||||||
|
self.assertEqual(req.state, zk.FAILED)
|
||||||
|
|
||||||
|
# retries in config is set to 2, so 2 attempts to create a server
|
||||||
|
self.assertEqual(2, mock_create_server.call_count)
|
||||||
|
|
||||||
@skip("Disabled for early v3 development")
|
@skip("Disabled for early v3 development")
|
||||||
def test_node_delete_failure(self):
|
def test_node_delete_failure(self):
|
||||||
def fail_delete(self, name):
|
def fail_delete(self, name):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user