From 58482658874584ddd6e092ee7c3fa95ff3bfe7ce Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Wed, 30 Mar 2016 17:34:24 -0700 Subject: [PATCH 01/21] Start using dogpile caching in devstack tests As a run up to enabling the dbm dogpile caching in nodepool, enable it in the devstack tests. Change-Id: Id3bd1aca930053674af7848f848bb95b7f6ad85a --- devstack/plugin.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 62447471a..7d356299a 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -234,6 +234,10 @@ EOF cp /etc/openstack/clouds.yaml /tmp cat >>/tmp/clouds.yaml < Date: Mon, 1 Aug 2016 16:54:49 -0400 Subject: [PATCH 02/21] Default config-drive to true As we depend more and more on glean to help bootstrap a node, it is possible for new clouds added to nodepool.yaml to be missing the setting. Which results is broken nodes and multiple configuration updates. As a result, we now default config-drive to true to make it easier to bring nodes online. Change-Id: I4e214ba7bc43a59ddffb4bfb50576ab3b96acf69 Signed-off-by: Paul Belanger --- doc/source/configuration.rst | 2 +- nodepool/config.py | 2 +- nodepool/provider_manager.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 6b09d60e3..e73a22753 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -510,7 +510,7 @@ Example:: Default ``/var/lib/jenkins/.ssh/id_rsa`` ``config-drive`` (boolean) - Whether config drive should be used for the image. + Whether config drive should be used for the image. Default ``True`` ``meta`` (dict) Arbitrary key/value metadata to store for this server using the Nova diff --git a/nodepool/config.py b/nodepool/config.py index 590330328..d60eefc4a 100644 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -207,7 +207,7 @@ def loadConfig(config_path): i.user_home = image.get('user-home', '/home/jenkins') i.private_key = image.get('private-key', '/var/lib/jenkins/.ssh/id_rsa') - i.config_drive = image.get('config-drive', None) + i.config_drive = image.get('config-drive', True) # note this does "double-duty" -- for # SnapshotImageUpdater the meta-data dict is passed to diff --git a/nodepool/provider_manager.py b/nodepool/provider_manager.py index 4a25dd031..24f2ac219 100644 --- a/nodepool/provider_manager.py +++ b/nodepool/provider_manager.py @@ -179,7 +179,7 @@ class ProviderManager(TaskManager): def createServer(self, name, min_ram, image_id=None, image_name=None, az=None, key_name=None, name_filter=None, - config_drive=None, nodepool_node_id=None, + config_drive=True, nodepool_node_id=None, nodepool_image_name=None, nodepool_snapshot_image_id=None): if image_name: From d55f99ca60db5ad7b95f8250e8b0b2eefb588221 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Sat, 17 Dec 2016 14:41:11 -0500 Subject: [PATCH 03/21] Set diskimage formats for building state A little optimization to actually display the formats are are building, we only see this information in dib-image-list after the image was built. Also update our existing code to use list() over split(). Change-Id: I7cce9bbb3f583efc22ed520af72314d2b14de993 Signed-off-by: Paul Belanger --- nodepool/builder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nodepool/builder.py b/nodepool/builder.py index 2e709aae9..18a0e46d0 100644 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -589,6 +589,7 @@ class BuildWorker(BaseWorker): data = zk.ImageBuild() data.state = zk.BUILDING data.builder = self._hostname + data.formats = list(diskimage.image_types) bnum = self._zk.storeBuild(diskimage.name, data) data = self._buildImage(bnum, diskimage) @@ -638,6 +639,7 @@ class BuildWorker(BaseWorker): data = zk.ImageBuild() data.state = zk.BUILDING data.builder = self._hostname + data.formats = list(diskimage.image_types) bnum = self._zk.storeBuild(diskimage.name, data) data = self._buildImage(bnum, diskimage) @@ -740,7 +742,7 @@ class BuildWorker(BaseWorker): else: self.log.info("DIB image %s is built" % diskimage.name) build_data.state = zk.READY - build_data.formats = img_types.split(",") + build_data.formats = list(diskimage.image_types) if self._statsd: # record stats on the size of each image we create From 85b1b90a75d8e5aeed48b9f1675200079458f159 Mon Sep 17 00:00:00 2001 From: Jamie Lennox Date: Wed, 4 Jan 2017 14:58:32 +1100 Subject: [PATCH 04/21] Ensure env-vars are strings in config validate If a non-string value is passed to env-vars then when diskimage-builder is run it will be injected as is into the ENV dictionary. This results in the error "TypeError: execve() arg 3 contains a non-string value" which is not exactly explicit about the problem. We have two options here: Either convert the dictionary items to strings automatically or make the config file fail validation. For now I'm going with config validation because if the value is a dict or something more difficult than simply an int this could be confusing. Change-Id: I94013b50761ff0fc5569eb1af6420454b4191a50 --- nodepool/cmd/config_validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py index 52a591426..061fdac84 100644 --- a/nodepool/cmd/config_validator.py +++ b/nodepool/cmd/config_validator.py @@ -110,7 +110,7 @@ class ConfigValidator: 'elements': [str], 'release': v.Any(str, int), 'rebuild-age': int, - 'env-vars': dict, + 'env-vars': {str: str}, } top_level = { From ca28b269597c3acf89c51d17d1178611e44b26ef Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Thu, 22 Dec 2016 13:54:11 -0500 Subject: [PATCH 05/21] Support AFS mirrors for nodepool diskimages We do a good job mirroring ubuntu and centos, lets use them for our dsvm job. Change-Id: I5a0801142a84c0d773ff2fad355ff714746315f6 Depends-On: Idd11c2b7df1d247c6d32a5f936b8601b4741b519 Signed-off-by: Paul Belanger --- devstack/plugin.sh | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 46532f1f3..9ecc068d4 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -82,16 +82,23 @@ function nodepool_create_keypairs { function nodepool_write_elements { sudo mkdir -p $(dirname $NODEPOOL_CONFIG)/elements/nodepool-setup/install.d + sudo mkdir -p $(dirname $NODEPOOL_CONFIG)/elements/nodepool-setup/root.d cat > /tmp/01-nodepool-setup < /tmp/50-apt-allow-unauthenticated < /tmp/nodepool.yaml < Date: Mon, 16 Jan 2017 14:05:02 +0700 Subject: [PATCH 06/21] Removed redundant 'the' Change-Id: I3f2ec1900533443e3c64e851a1bef818562e6832 --- doc/source/operation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/operation.rst b/doc/source/operation.rst index 800db95a7..f245fa0e7 100644 --- a/doc/source/operation.rst +++ b/doc/source/operation.rst @@ -163,7 +163,7 @@ alien-image-list In the case that a job is randomly failing for an unknown cause, it may be necessary to instruct nodepool to automatically hold a node on -which that job has failed. To do so, use the the ``job-create`` +which that job has failed. To do so, use the ``job-create`` command to specify the job name and how many failed nodes should be held. When debugging is complete, use ''job-delete'' to disable the feature. From 44ee08c2d71937816d0029bb60c05a9d8b802655 Mon Sep 17 00:00:00 2001 From: Lenny Verkhovsky Date: Mon, 23 Jan 2017 09:00:12 +0000 Subject: [PATCH 07/21] Create mandatory /etc/nodepool for later use Since nodepool-base element is optional /etc/nodepool folder still must exist for node information. Change-Id: I838f4eb1ea260b1e2eed3c2768e955f34067dcc5 --- nodepool/nodepool.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index 04ed39a73..612c64ca7 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -625,7 +625,9 @@ class NodeLauncher(threading.Thread): if not host: raise Exception("Unable to log in via SSH") - host.ssh("test for config dir", "ls /etc/nodepool") + host.ssh("Create config dir", "sudo mkdir -p /etc/nodepool") + host.ssh("Change config dir owner", + "sudo chown %s -R /etc/nodepool" % self.image.username) ftp = host.client.open_sftp() From fd2b3c921c9e0d9cd63ce8451becaa01be817311 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Sun, 29 Jan 2017 10:51:23 -0500 Subject: [PATCH 08/21] Fix fedora 25 pause bug with devstack We mistakenly skipped this setting, as a results fedora-25 images are built by default, causing un needed churn for our nodepool project jobs. Change-Id: Id91991a490709f9bbac5a4f6e9847e047b83ca51 Signed-off-by: Paul Belanger --- devstack/settings | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devstack/settings b/devstack/settings index d42263511..dc2107a84 100644 --- a/devstack/settings +++ b/devstack/settings @@ -7,7 +7,7 @@ NODEPOOL_DIB_BASE_PATH=/opt/dib # NOTE(pabelanger): Be sure to also update tools/check_devstack_plugin.sh if you # change the defaults. NODEPOOL_PAUSE_CENTOS_7_DIB=${NODEPOOL_PAUSE_CENTOS_7_DIB:-true} -NODEPOOL_PAUSE_FEDORA_24_DIB=${NODEPOOL_PAUSE_FEDORA_24_DIB:-true} +NODEPOOL_PAUSE_FEDORA_25_DIB=${NODEPOOL_PAUSE_FEDORA_25_DIB:-true} NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB=${NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB:-true} NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB=${NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB:-false} NODEPOOL_PAUSE_UBUNTU_XENIAL_DIB=${NODEPOOL_PAUSE_UBUNTU_XENIAL_DIB:-true} From 7c217682a460977df954fd5797a99c551d024658 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Tue, 31 Jan 2017 14:20:47 -0600 Subject: [PATCH 09/21] Start nodepool in the test-config phase test-config is a devstack plugin phase that runs at the very end. That's much more what we want, and allows us to continue running the services using the devstack functions. Change-Id: I1bdfb23d0ba2c988e367cd13171a59398a3b9c71 --- devstack/plugin.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index c5a17c379..f0457735f 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -462,7 +462,7 @@ if is_service_enabled nodepool; then echo_summary "Configuring nodepool" configure_nodepool - elif [[ "$1" == "stack" && "$2" == "extra" ]]; then + elif [[ "$1" == "stack" && "$2" == "test-config" ]]; then # Initialize and start the nodepool service echo_summary "Initializing nodepool" start_nodepool From 491fae7b28667f8ef116c936f79950d3d2913ba3 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Tue, 31 Jan 2017 16:18:08 -0600 Subject: [PATCH 10/21] Revert "Create mandatory /etc/nodepool for later use" At first glance, this may be causing a hang on the remote side that is causing the nodepool test to hang. We didn't notice it because the test was otherwise broken. This reverts commit 44ee08c2d71937816d0029bb60c05a9d8b802655. Change-Id: Iaef2a140e33fc48f8bfa8ff4769eded37ce152c6 --- nodepool/nodepool.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index 612c64ca7..04ed39a73 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -625,9 +625,7 @@ class NodeLauncher(threading.Thread): if not host: raise Exception("Unable to log in via SSH") - host.ssh("Create config dir", "sudo mkdir -p /etc/nodepool") - host.ssh("Change config dir owner", - "sudo chown %s -R /etc/nodepool" % self.image.username) + host.ssh("test for config dir", "ls /etc/nodepool") ftp = host.client.open_sftp() From a21a2cdae74b493f73a974f25256e2e8c3f4d283 Mon Sep 17 00:00:00 2001 From: Monty Taylor Date: Tue, 31 Jan 2017 16:43:06 -0600 Subject: [PATCH 11/21] Add comment explaining test-not-create behavior Change-Id: I37ed71d0ffc405ea430815aa2b208bda84e0d798 --- nodepool/nodepool.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index 04ed39a73..a6d538372 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -625,6 +625,9 @@ class NodeLauncher(threading.Thread): if not host: raise Exception("Unable to log in via SSH") + # This is a test for the existence and not a creation on purpose. + # Current requirements for nodepool nodes are that nodepool can log + # in and that it can write to /etc/nodepool. host.ssh("test for config dir", "ls /etc/nodepool") ftp = host.client.open_sftp() From 3e0035e9083ae73800a40fc659d59048a24b67b6 Mon Sep 17 00:00:00 2001 From: Ian Wienand Date: Fri, 3 Feb 2017 11:07:53 +1100 Subject: [PATCH 12/21] Make web status text/plain When I added the JSON targets in I3410a4e4efd649732747b0502d66d212f50fa1bb I also changed the text output mime-type to be application/text, which seemed to pair with application/json. But this means browsers prompt to download the file rather than display it, which is not the intention. Restore to text/plain. Change-Id: I6fa6597b2fcd05a4c33ba4932c966101daf98ecf --- nodepool/tests/test_webapp.py | 2 +- nodepool/webapp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nodepool/tests/test_webapp.py b/nodepool/tests/test_webapp.py index 3038547b6..75be8e6aa 100644 --- a/nodepool/tests/test_webapp.py +++ b/nodepool/tests/test_webapp.py @@ -39,7 +39,7 @@ class TestWebApp(tests.DBTestCase): "http://localhost:%s/image-list" % port) f = urllib2.urlopen(req) self.assertEqual(f.info().getheader('Content-Type'), - 'application/text') + 'text/plain; charset=UTF-8') data = f.read() self.assertTrue('fake-image' in data) diff --git a/nodepool/webapp.py b/nodepool/webapp.py index 9da0cd5dc..288235563 100644 --- a/nodepool/webapp.py +++ b/nodepool/webapp.py @@ -93,7 +93,7 @@ class WebApp(threading.Thread): if request.path.endswith('.json'): content_type = 'application/json' else: - content_type = 'application/text' + content_type = 'text/plain' response = webob.Response(body=output, content_type=content_type) From ac1e0bac5333abe9d3a8961ba90a9d8737934d4a Mon Sep 17 00:00:00 2001 From: David Shrewsbury Date: Thu, 16 Feb 2017 09:20:39 -0500 Subject: [PATCH 13/21] Fix potential race in image upload cleanup It's possible for an active image upload to complete during the builder cleanup phase, which would release its lock and change the state to READY. If the cleanup thread has cached the UPLOADING state and checks to see if the upload is in progress (just after the lock has been release), then we could accidentally delete the upload record because we don't have the correct state. This adds a second check on the state to make sure it didn't change on us. Change-Id: I60be43e999cf86d4c3c46e6ea69ecd1bcb69f533 --- nodepool/builder.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nodepool/builder.py b/nodepool/builder.py index 19e5ecba5..25ea88cb8 100644 --- a/nodepool/builder.py +++ b/nodepool/builder.py @@ -373,6 +373,14 @@ class CleanupWorker(BaseWorker): if (upload.state == zk.UPLOADING and not self._inProgressUpload(upload) ): + # Since we cache the uploads above, we need to verify the + # state hasn't changed on us (e.g., it could have gone from + # an in progress upload to a successfully completed upload + # between the getUploads() and the _inProgressUpload() check. + u = self._zk.getImageUpload(image, build_id, provider, + upload.id) + if upload.state != u.state: + continue self.log.info("Removing failed upload record: %s" % upload) self._zk.deleteUpload(image, build_id, provider, upload.id) elif upload.state == zk.DELETING: From ed4eb0ce677ddaa610aeb0678b6c5cb8012218cb Mon Sep 17 00:00:00 2001 From: David Moreau-Simard Date: Wed, 22 Feb 2017 10:44:51 -0500 Subject: [PATCH 14/21] Clarification about the rate parameter Change-Id: I1d1900c143ceff43748752846395ee8ca6ffd899 --- doc/source/configuration.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 0511bcf8b..6c512bc52 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -476,7 +476,8 @@ provider, the Nodepool image types are also defined (see Default ``template-{image.name}-{timestamp}`` ``rate`` - In seconds. Default 1.0. + In seconds, amount to wait between operations on the provider. + Defaults to ``1.0``. ``clean-floating-ips`` If it is set to True, nodepool will assume it is the only user of the From a6f4f6be9bf0069c00c45562f9728189345e1589 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Mon, 27 Feb 2017 14:16:12 -0500 Subject: [PATCH 15/21] Add nodepool-id to provider section Currently, while testing zuulv3, we are wanting to share the infracloud-chocolate provider between 2 nodepool servers. The current issue is, if we launch nodes from zuulv3-dev.o.o, nodepool.o.o will detect the nodes as leaked and delete them. A way to solve this, is to create a per provider 'nodepool-id' where an admin can configure 2 separate nodepool servers to share the same tenant. The big reason for doing this, is so we don't have to stand up a duplicate nodepool-builder and upload duplicate images. Change-Id: I03a95ce7b8bf06199de7f46fd3d0f82407bec8f5 Signed-off-by: Paul Belanger --- doc/source/configuration.rst | 8 +++ nodepool/cmd/config_validator.py | 1 + nodepool/config.py | 2 + nodepool/nodepool.py | 7 ++ nodepool/provider_manager.py | 2 + .../fixtures/leaked_node_nodepool_id.yaml | 62 ++++++++++++++++ nodepool/tests/test_nodepool.py | 71 ++++++++++++++++++- 7 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 nodepool/tests/fixtures/leaked_node_nodepool_id.yaml diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 0511bcf8b..63e3ab290 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -446,6 +446,14 @@ provider, the Nodepool image types are also defined (see In seconds. Default 3600. + ``nodepool-id`` (deprecated) + + A unique string to identify which nodepool instances is using a provider. + This is useful if you want to configure production and development instances + of nodepool but share the same provider. + + Default None + ``keypair`` Default None diff --git a/nodepool/cmd/config_validator.py b/nodepool/cmd/config_validator.py index f7ee7b0bb..7e7d1f126 100644 --- a/nodepool/cmd/config_validator.py +++ b/nodepool/cmd/config_validator.py @@ -73,6 +73,7 @@ class ConfigValidator: 'boot-timeout': int, 'api-timeout': int, 'launch-timeout': int, + 'nodepool-id': str, 'rate': float, 'images': [images], 'template-hostname': str, diff --git a/nodepool/config.py b/nodepool/config.py index 65682c93f..da7753926 100644 --- a/nodepool/config.py +++ b/nodepool/config.py @@ -40,6 +40,7 @@ class Config(ConfigValue): class Provider(ConfigValue): def __eq__(self, other): if (other.cloud_config != self.cloud_config or + other.nodepool_id != self.nodepool_id or other.max_servers != self.max_servers or other.pool != self.pool or other.image_type != self.image_type or @@ -197,6 +198,7 @@ def loadConfig(config_path): cloud_kwargs = _cloudKwargsFromProvider(provider) p.cloud_config = _get_one_cloud(cloud_config, cloud_kwargs) + p.nodepool_id = provider.get('nodepool-id', None) p.region_name = provider.get('region-name') p.max_servers = provider['max-servers'] p.keypair = provider.get('keypair', None) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index a6d538372..2ffc6a8f5 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -1542,6 +1542,13 @@ class NodePool(threading.Thread): provider.name, meta['provider_name'])) continue + nodepool_id = meta.get('nodepool_id', None) + if nodepool_id and nodepool_id != provider.nodepool_id: + self.log.debug("Instance %s (%s) in %s " + "was not launched by us" % ( + server['name'], server['id'], + provider.name)) + continue node_id = meta.get('node_id') if node_id: if session.getNode(node_id): diff --git a/nodepool/provider_manager.py b/nodepool/provider_manager.py index 6dc887fde..5eddfbd99 100644 --- a/nodepool/provider_manager.py +++ b/nodepool/provider_manager.py @@ -220,6 +220,8 @@ class ProviderManager(object): # groups[0] is the image name or anything silly like that. nodepool_meta = dict(provider_name=self.provider.name) groups_meta = [self.provider.name] + if self.provider.nodepool_id: + nodepool_meta['nodepool_id'] = self.provider.nodepool_id if nodepool_node_id: nodepool_meta['node_id'] = nodepool_node_id if nodepool_snapshot_image_id: diff --git a/nodepool/tests/fixtures/leaked_node_nodepool_id.yaml b/nodepool/tests/fixtures/leaked_node_nodepool_id.yaml new file mode 100644 index 000000000..667caac97 --- /dev/null +++ b/nodepool/tests/fixtures/leaked_node_nodepool_id.yaml @@ -0,0 +1,62 @@ +elements-dir: . +images-dir: '{images_dir}' + +cron: + check: '*/15 * * * *' + cleanup: '* * * * * *' + +zmq-publishers: + - tcp://localhost:8881 + +gearman-servers: + - host: localhost + port: {gearman_port} + +zookeeper-servers: + - host: {zookeeper_host} + port: {zookeeper_port} + chroot: {zookeeper_chroot} + +labels: + - name: fake-label + image: fake-image + min-ready: 1 + providers: + - name: fake-provider + +providers: + - name: fake-provider + region-name: fake-region + keypair: 'if-present-use-this-keypair' + username: 'fake' + password: 'fake' + auth-url: 'fake' + project-id: 'fake' + max-servers: 96 + pool: 'fake' + networks: + - net-id: 'some-uuid' + rate: 0.0001 + nodepool-id: foo + images: + - name: fake-image + min-ram: 8192 + name-filter: 'Fake' + meta: + key: value + key2: value + +targets: + - name: fake-target + +diskimages: + - name: fake-image + elements: + - fedora + - vm + release: 21 + env-vars: + TMPDIR: /opt/dib_tmp + DIB_IMAGE_CACHE: /opt/dib_cache + DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/ + BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2 diff --git a/nodepool/tests/test_nodepool.py b/nodepool/tests/test_nodepool.py index 55bdb60de..e56586be8 100644 --- a/nodepool/tests/test_nodepool.py +++ b/nodepool/tests/test_nodepool.py @@ -334,9 +334,15 @@ class TestNodepool(tests.DBTestCase): self.assertEqual(len(deleted_nodes), 1) self.assertEqual(node_id, deleted_nodes[0].id) + def test_leaked_node_with_nodepool_id(self): + self._test_leaked_node('leaked_node_nodepool_id.yaml') + def test_leaked_node(self): + self._test_leaked_node('leaked_node.yaml') + + def _test_leaked_node(self, cfgfile): """Test that a leaked node is deleted""" - configfile = self.setup_config('leaked_node.yaml') + configfile = self.setup_config(cfgfile) pool = self.useNodepool(configfile, watermark_sleep=1) self._useBuilder(configfile) pool.start() @@ -385,6 +391,69 @@ class TestNodepool(tests.DBTestCase): state=nodedb.READY) self.assertEqual(len(nodes), 1) + def test_leaked_node_not_deleted(self): + """Test that a leaked node is not deleted""" + configfile = self.setup_config('leaked_node_nodepool_id.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + self._useBuilder(configfile) + pool.start() + self.waitForImage('fake-provider', 'fake-image') + self.log.debug("Waiting for initial pool...") + self.waitForNodes(pool) + self.log.debug("...done waiting for initial pool.") + pool.stop() + + # Make sure we have a node built and ready + provider = pool.config.providers['fake-provider'] + manager = pool.getProviderManager(provider) + servers = manager.listServers() + self.assertEqual(len(servers), 1) + + with pool.getDB().getSession() as session: + nodes = session.getNodes(provider_name='fake-provider', + label_name='fake-label', + target_name='fake-target', + state=nodedb.READY) + self.assertEqual(len(nodes), 1) + # Delete the node from the db, but leave the instance + # so it is leaked. + self.log.debug("Delete node db record so instance is leaked...") + for node in nodes: + node.delete() + self.log.debug("...deleted node db so instance is leaked.") + nodes = session.getNodes(provider_name='fake-provider', + label_name='fake-label', + target_name='fake-target', + state=nodedb.READY) + self.assertEqual(len(nodes), 0) + + # Wait for nodepool to replace it, which should be enough + # time for it to also delete the leaked node + configfile = self.setup_config('leaked_node.yaml') + pool = self.useNodepool(configfile, watermark_sleep=1) + pool.start() + self.log.debug("Waiting for replacement pool...") + self.waitForNodes(pool) + self.log.debug("...done waiting for replacement pool.") + + # Make sure we end up with only one server (the replacement) + provider = pool.config.providers['fake-provider'] + manager = pool.getProviderManager(provider) + foobar_servers = manager.listServers() + self.assertEqual(len(servers), 1) + self.assertEqual(len(foobar_servers), 1) + + with pool.getDB().getSession() as session: + nodes = session.getNodes(provider_name='fake-provider', + label_name='fake-label', + target_name='fake-target', + state=nodedb.READY) + self.assertEqual(len(nodes), 1) + + # Just to be safe, ensure we have 2 nodes again. + self.assertEqual(len(servers), 1) + self.assertEqual(len(foobar_servers), 1) + @skip("Disabled for early v3 development") def test_building_image_cleanup_on_start(self): """Test that a building image is deleted on start""" From 2216e349c000532a93497767df262eced7603766 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Tue, 28 Feb 2017 13:13:48 -0500 Subject: [PATCH 16/21] Fix logic error with nodepool-id In our case, we won't have zuulv3-dev setting metadata but nodepool.o.o will. This now means, nodepool.o.o should not delete 'leaked' nodes because zuulv3-dev.o.o has launched them. Change-Id: I9a2dbd3845928f83a9cca67082fd6b1ca247a607 Signed-off-by: Paul Belanger --- nodepool/nodepool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index 2ffc6a8f5..4d426ab44 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -1543,7 +1543,8 @@ class NodePool(threading.Thread): meta['provider_name'])) continue nodepool_id = meta.get('nodepool_id', None) - if nodepool_id and nodepool_id != provider.nodepool_id: + if provider.nodepool_id is not None and \ + nodepool_id != provider.nodepool_id: self.log.debug("Instance %s (%s) in %s " "was not launched by us" % ( server['name'], server['id'], From d616e61723207ed0e29ea69d67908a00ebf2cdfb Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Tue, 28 Feb 2017 15:47:00 -0800 Subject: [PATCH 17/21] Add destructor to SSHClient Newer versions of paramiko require a client object to be explicitly closed. Fortunately, we wrap all of our use of paramiko client objects in our own class. Add a destructor to our class which closes the client object. Note, this has been tested to work (and is needed) even if a connection is not established. Change-Id: I5dff7ed254567968b42d053b85004769f8647ecb --- nodepool/sshclient.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/nodepool/sshclient.py b/nodepool/sshclient.py index 51faca093..8be0c0089 100644 --- a/nodepool/sshclient.py +++ b/nodepool/sshclient.py @@ -25,14 +25,17 @@ class SSHClient(object): def __init__(self, ip, username, password=None, pkey=None, key_filename=None, log=None, look_for_keys=False, allow_agent=False): - client = paramiko.SSHClient() - client.set_missing_host_key_policy(paramiko.WarningPolicy()) - client.connect(ip, username=username, password=password, pkey=pkey, - key_filename=key_filename, look_for_keys=look_for_keys, - allow_agent=allow_agent) - self.client = client + self.client = paramiko.SSHClient() + self.client.set_missing_host_key_policy(paramiko.WarningPolicy()) + self.client.connect(ip, username=username, password=password, + pkey=pkey, key_filename=key_filename, + look_for_keys=look_for_keys, + allow_agent=allow_agent) self.log = log + def __del__(self): + self.client.close() + def ssh(self, action, command, get_pty=True, output=False): if self.log: self.log.debug("*** START to %s" % action) From 7142c2193aeac648891598c89c14ac8d4a245851 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Wed, 8 Mar 2017 14:24:08 -0500 Subject: [PATCH 18/21] Remove ubuntu-precise from AFS mirror list We don't actually mirror precise, so fall back to default settings for APT mirrors. Change-Id: Ifed9e8e6e22aba5a65b27d22706d4a665a7ffe0a Signed-off-by: Paul Belanger --- devstack/plugin.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 7b0dea5e4..8a896ef25 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -348,8 +348,6 @@ diskimages: DIB_DISABLE_APT_CLEANUP: '1' DIB_DEV_USER_AUTHORIZED_KEYS: $NODEPOOL_PUBKEY DIB_DEBIAN_COMPONENTS: 'main,universe' - $DIB_DISTRIBUTION_MIRROR_UBUNTU - $DIB_DEBOOTSTRAP_EXTRA_ARGS $DIB_GET_PIP $DIB_GLEAN_INSTALLTYPE $DIB_GLEAN_REPOLOCATION From 60103bda5e9eaf97902c2d57fd727a99363d6963 Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Wed, 8 Mar 2017 14:40:55 -0500 Subject: [PATCH 19/21] Check if /etc/apt/apt.conf.d first exists For example, it doesn't make sense to add this to centos / fedora DIBs. Change-Id: If7574c84e135f447d9601140dae138242162c2d2 Signed-off-by: Paul Belanger --- devstack/plugin.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 8a896ef25..7e9259849 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -89,7 +89,9 @@ sudo mkdir -p /etc/nodepool sudo chmod 777 /etc/nodepool EOF cat > /tmp/50-apt-allow-unauthenticated < Date: Thu, 23 Mar 2017 09:24:31 -0500 Subject: [PATCH 20/21] Add novaclient and keystoneauth debug logging So that we can trace the API calls being made, put in debug logging for novaclient and keystoneauth. Change-Id: Ib2c95a416131f5b7b3d7779e8b9cbf0f4c823e49 --- devstack/plugin.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 7e9259849..2e90b524c 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -117,7 +117,7 @@ function nodepool_write_config { keys=simple [loggers] -keys=root,nodepool,shade,kazoo +keys=root,nodepool,shade,kazoo,keystoneauth,novaclient [handlers] keys=console @@ -138,6 +138,18 @@ handlers=console qualname=shade propagate=0 +[logger_keystoneauth] +level=DEBUG +handlers=console +qualname=keystoneauth +propagate=0 + +[logger_novaclient] +level=DEBUG +handlers=console +qualname=novaclient +propagate=0 + [logger_kazoo] level=INFO handlers=console From cca0636bd60ee5a75cd8212ce5e8b2f449dd0ddb Mon Sep 17 00:00:00 2001 From: Paul Belanger Date: Sat, 25 Mar 2017 10:34:51 -0400 Subject: [PATCH 21/21] Add debian-jessie DIB for dsvm testing Add more test coverage for debian, due to recent issues with system-config debian-jessie jobs failing. It also give projects like glean and diskimage-builder more coverage. Change-Id: If5e7fa98ee379f7339148de3ea6574bbcda2b032 Signed-off-by: Paul Belanger --- devstack/plugin.sh | 38 ++++++++++++++++++++++++++++++++++ devstack/settings | 1 + tools/check_devstack_plugin.sh | 8 +++++++ 3 files changed, 47 insertions(+) diff --git a/devstack/plugin.sh b/devstack/plugin.sh index 7e9259849..28ef7594e 100644 --- a/devstack/plugin.sh +++ b/devstack/plugin.sh @@ -184,9 +184,11 @@ EOF NODEPOOL_MIRROR_HOST=$(echo $NODEPOOL_MIRROR_HOST|tr '[:upper:]' '[:lower:]') NODEPOOL_CENTOS_MIRROR=${NODEPOOL_CENTOS_MIRROR:-http://$NODEPOOL_MIRROR_HOST/centos} + NODEPOOL_DEBIAN_MIRROR=${NODEPOOL_DEBIAN_MIRROR:-http://$NODEPOOL_MIRROR_HOST/debian} NODEPOOL_UBUNTU_MIRROR=${NODEPOOL_UBUNTU_MIRROR:-http://$NODEPOOL_MIRROR_HOST/ubuntu} DIB_DISTRIBUTION_MIRROR_CENTOS="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_CENTOS_MIRROR" + DIB_DISTRIBUTION_MIRROR_DEBIAN="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_DEBIAN_MIRROR" DIB_DISTRIBUTION_MIRROR_UBUNTU="DIB_DISTRIBUTION_MIRROR: $NODEPOOL_UBUNTU_MIRROR" DIB_DEBOOTSTRAP_EXTRA_ARGS="DIB_DEBOOTSTRAP_EXTRA_ARGS: '--no-check-gpg'" fi @@ -227,6 +229,11 @@ labels: min-ready: 1 providers: - name: devstack + - name: debian-jessie + image: debian-jessie + min-ready: 1 + providers: + - name: devstack - name: fedora-25 image: fedora-25 min-ready: 1 @@ -265,6 +272,12 @@ providers: username: devuser private-key: $NODEPOOL_KEY config-drive: true + - name: debian-jessie + min-ram: 512 + name-filter: 'nodepool' + username: devuser + private-key: $NODEPOOL_KEY + config-drive: true - name: fedora-25 min-ram: 1024 name-filter: 'nodepool' @@ -311,6 +324,31 @@ diskimages: $DIB_GLEAN_INSTALLTYPE $DIB_GLEAN_REPOLOCATION $DIB_GLEAN_REPOREF + - name: debian-jessie + pause: $NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB + rebuild-age: 86400 + elements: + - debian-minimal + - vm + - simple-init + - devuser + - openssh-server + - nodepool-setup + release: jessie + env-vars: + TMPDIR: $NODEPOOL_DIB_BASE_PATH/tmp + DIB_CHECKSUM: '1' + DIB_IMAGE_CACHE: $NODEPOOL_DIB_BASE_PATH/cache + DIB_APT_LOCAL_CACHE: '0' + DIB_DISABLE_APT_CLEANUP: '1' + DIB_DEV_USER_AUTHORIZED_KEYS: $NODEPOOL_PUBKEY + DIB_DEBIAN_COMPONENTS: 'main' + $DIB_DISTRIBUTION_MIRROR_DEBIAN + $DIB_DEBOOTSTRAP_EXTRA_ARGS + $DIB_GET_PIP + $DIB_GLEAN_INSTALLTYPE + $DIB_GLEAN_REPOLOCATION + $DIB_GLEAN_REPOREF - name: fedora-25 pause: $NODEPOOL_PAUSE_FEDORA_25_DIB rebuild-age: 86400 diff --git a/devstack/settings b/devstack/settings index dc2107a84..08fc9d603 100644 --- a/devstack/settings +++ b/devstack/settings @@ -7,6 +7,7 @@ NODEPOOL_DIB_BASE_PATH=/opt/dib # NOTE(pabelanger): Be sure to also update tools/check_devstack_plugin.sh if you # change the defaults. NODEPOOL_PAUSE_CENTOS_7_DIB=${NODEPOOL_PAUSE_CENTOS_7_DIB:-true} +NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB=${NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB:-true} NODEPOOL_PAUSE_FEDORA_25_DIB=${NODEPOOL_PAUSE_FEDORA_25_DIB:-true} NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB=${NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB:-true} NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB=${NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB:-false} diff --git a/tools/check_devstack_plugin.sh b/tools/check_devstack_plugin.sh index 16b5dd669..23d5a7c09 100755 --- a/tools/check_devstack_plugin.sh +++ b/tools/check_devstack_plugin.sh @@ -9,6 +9,7 @@ NODEPOOL="$NODEPOOL_INSTALL/bin/nodepool -c $NODEPOOL_CONFIG -s $NODEPOOL_SECURE # NOTE(pabelanger): Be sure to also update devstack/settings if you change the # defaults. NODEPOOL_PAUSE_CENTOS_7_DIB=${NODEPOOL_PAUSE_CENTOS_7_DIB:-true} +NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB=${NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB:-true} NODEPOOL_PAUSE_FEDORA_25_DIB=${NODEPOOL_PAUSE_FEDORA_25_DIB:-true} NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB=${NODEPOOL_PAUSE_UBUNTU_PRECISE_DIB:-true} NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB=${NODEPOOL_PAUSE_UBUNTU_TRUSTY_DIB:-false} @@ -47,6 +48,13 @@ if [ $NODEPOOL_PAUSE_CENTOS_7_DIB = 'false' ]; then waitfornode centos-7 fi +if [ $NODEPOOL_PAUSE_DEBIAN_JESSIE_DIB = 'false' ]; then + # check that image built + waitforimage debian-jessie + # check image was bootable + waitfornode debian-jessie +fi + if [ $NODEPOOL_PAUSE_FEDORA_25_DIB = 'false' ]; then # check that image built waitforimage fedora-25