From f89b41f6ad7d88645668b1313b58324f712d97c5 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Thu, 8 Feb 2024 09:36:35 -0800 Subject: [PATCH] Reconcile docs/validation for some options Some drivers were missing docs and/or validation for options that they actually support. This change: adds launch-timeout to: metastatic docs and validation aws validation gce docs and validation adds post-upload-hook to: aws validation adds boot-timeout to: metastatic docs and validation adds launch-retries to: metastatic docs and validation Change-Id: Id3f4bb687c1b2c39a1feb926a50c46b23ae9df9a --- doc/source/gce.rst | 8 ++++ doc/source/metastatic.rst | 22 ++++++++++ nodepool/driver/aws/config.py | 2 + nodepool/driver/gce/config.py | 1 + nodepool/driver/metastatic/config.py | 3 ++ .../tests/fixtures/config_validate/good.yaml | 43 +++++++++++++++++++ 6 files changed, 79 insertions(+) diff --git a/doc/source/gce.rst b/doc/source/gce.rst index d64643252..b9d660702 100644 --- a/doc/source/gce.rst +++ b/doc/source/gce.rst @@ -80,6 +80,14 @@ section of the configuration. image via SSH. If the timeout is exceeded, the node launch is aborted and the instance deleted. + .. attr:: launch-timeout + :type: int seconds + :default: 3600 + + The time to wait from issuing the command to create a new instance + until that instance is reported as "active". If the timeout is + exceeded, the node launch is aborted and the instance deleted. + .. attr:: launch-retries :default: 3 diff --git a/doc/source/metastatic.rst b/doc/source/metastatic.rst index a518aa22c..b37f47fce 100644 --- a/doc/source/metastatic.rst +++ b/doc/source/metastatic.rst @@ -59,6 +59,28 @@ itself, which is "meta". A unique name for this provider configuration. + .. attr:: boot-timeout + :type: int seconds + :default: 60 + + Once an instance is active, how long to try connecting to the + image via SSH. If the timeout is exceeded, the node launch is + aborted and the instance deleted. + + .. attr:: launch-timeout + :type: int seconds + :default: 3600 + + The time to wait from issuing the command to create a new instance + until that instance is reported as "active". If the timeout is + exceeded, the node launch is aborted and the instance deleted. + + .. attr:: launch-retries + :default: 3 + + The number of times to retry launching a node before considering + the job failed. + .. attr:: pools :type: list diff --git a/nodepool/driver/aws/config.py b/nodepool/driver/aws/config.py index 37348755d..617af89ff 100644 --- a/nodepool/driver/aws/config.py +++ b/nodepool/driver/aws/config.py @@ -346,6 +346,7 @@ class AwsProviderConfig(ProviderConfig): 'diskimages': [provider_diskimages], 'hostname-format': str, 'boot-timeout': int, + 'launch-timeout': int, 'launch-retries': int, 'object-storage': object_storage, 'image-format': v.Any('ova', 'vhd', 'vhdx', 'vmdk', 'raw'), @@ -354,6 +355,7 @@ class AwsProviderConfig(ProviderConfig): 'max-cores': int, 'max-ram': int, 'max-resources': {str: int}, + 'post-upload-hook': str, }) return v.Schema(provider) diff --git a/nodepool/driver/gce/config.py b/nodepool/driver/gce/config.py index c5817b55e..5c5ec3bd4 100644 --- a/nodepool/driver/gce/config.py +++ b/nodepool/driver/gce/config.py @@ -187,6 +187,7 @@ class GceProviderConfig(ProviderConfig): v.Required('zone'): str, 'cloud-images': [provider_cloud_images], 'boot-timeout': int, + 'launch-timeout': int, 'launch-retries': int, 'rate-limit': int, }) diff --git a/nodepool/driver/metastatic/config.py b/nodepool/driver/metastatic/config.py index b38b0ad91..83f31ee71 100644 --- a/nodepool/driver/metastatic/config.py +++ b/nodepool/driver/metastatic/config.py @@ -145,6 +145,9 @@ class MetastaticProviderConfig(ProviderConfig): provider = ProviderConfig.getCommonSchemaDict() provider.update({ + 'boot-timeout': int, + 'launch-timeout': int, + 'launch-retries': int, v.Required('pools'): [pool], }) return v.Schema(provider) diff --git a/nodepool/tests/fixtures/config_validate/good.yaml b/nodepool/tests/fixtures/config_validate/good.yaml index a779ebc75..f3b4c3cc0 100644 --- a/nodepool/tests/fixtures/config_validate/good.yaml +++ b/nodepool/tests/fixtures/config_validate/good.yaml @@ -82,6 +82,7 @@ providers: rate: 0.001 port-cleanup-interval: 0 post-upload-hook: /usr/bin/upload-hook + launch-timeout: 1500 diskimages: - name: trusty pause: False @@ -211,6 +212,9 @@ providers: driver: aws region-name: us-east-2 profile-name: default + launch-timeout: 1500 + launch-retries: 5 + boot-timeout: 120 cloud-images: - name: centos-ami image-id: ami-cfdafaaa @@ -228,6 +232,45 @@ providers: volume-type: gp2 volume-size: 80 + + - name: gce-uscentral1 + driver: gce + project: nodepool-123456 + region: us-central1 + zone: us-central1-a + launch-timeout: 1500 + launch-retries: 5 + boot-timeout: 120 + cloud-images: + - name: debian-stretch + image-project: debian-cloud + image-family: debian-9 + username: zuul + key: ssh-rsa ... + pools: + - name: main + max-servers: 8 + labels: + - name: debian-stretch + instance-type: f1-micro + cloud-image: debian-stretch + volume-type: standard + volume-size: 10 + + - name: meta-provider + driver: metastatic + launch-timeout: 1500 + launch-retries: 5 + boot-timeout: 120 + pools: + - name: main + max-servers: 10 + labels: + - name: small-node + backing-label: large-node + max-parallel-jobs: 2 + grace-time: 600 + - name: openshift-single-project driver: openshiftpods context: "/hostname:8443/developer"