Merge "Fix race with two builders deleting images"

This commit is contained in:
Zuul 2022-07-29 08:23:43 +00:00 committed by Gerrit Code Review
commit 2fb6ea2776
5 changed files with 165 additions and 3 deletions

View File

@ -467,7 +467,13 @@ class CleanupWorker(BaseWorker):
# have a consistent view of the data.
all_builds = self._zk.getBuilds(image)
diskimage = self._config.diskimages.get(image)
if not diskimage or (diskimage and not diskimage.image_types):
if not diskimage:
# If the diskimage does not appear in the config at all,
# we interpret that as the user intending to remove it.
# If it appears in the config, even if it's not used by a
# provider, we will assume there is some other builder
# responsible for it, so we fall through to the else
# condition below and rely on data in ZK.
builds_to_keep = set()
# TODO(jeblair): When all builds for an image which is not
# in use are deleted, the image znode should be deleted as

View File

@ -508,7 +508,8 @@ class DBTestCase(BaseTestCase):
break
self.wait_for_threads()
def waitForBuild(self, image_name, build_id, states=None):
def waitForBuild(self, image_name, build_id, states=None,
check_files=True):
if states is None:
states = (zk.READY,)
@ -523,7 +524,7 @@ class DBTestCase(BaseTestCase):
break
# We should only expect a dib manifest with a successful build.
while build.state == zk.READY:
while check_files and build.state == zk.READY:
self.wait_for_threads()
files = builder.DibImageFile.from_image_id(
self._config_images_dir.path, base)

View File

@ -0,0 +1,64 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
build-log-retention: 1
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: fake-label1
- name: fake-label2
providers:
- name: fake-provider1
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image1
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label1
diskimage: fake-image1
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image1
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
- name: fake-image2
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -0,0 +1,64 @@
elements-dir: .
images-dir: '{images_dir}'
build-log-dir: '{build_log_dir}'
build-log-retention: 1
zookeeper-servers:
- host: {zookeeper_host}
port: {zookeeper_port}
chroot: {zookeeper_chroot}
zookeeper-tls:
ca: {zookeeper_ca}
cert: {zookeeper_cert}
key: {zookeeper_key}
labels:
- name: fake-label1
- name: fake-label2
providers:
- name: fake-provider2
cloud: fake
driver: fake
region-name: fake-region
rate: 0.0001
diskimages:
- name: fake-image2
pools:
- name: main
max-servers: 96
availability-zones:
- az1
networks:
- net-name
labels:
- name: fake-label2
diskimage: fake-image2
min-ram: 8192
flavor-name: 'Fake'
diskimages:
- name: fake-image1
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
- name: fake-image2
elements:
- fedora
- vm
release: 21
dib-cmd: nodepool/tests/fake-image-create
env-vars:
TMPDIR: /opt/dib_tmp
DIB_IMAGE_CACHE: /opt/dib_cache
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2

View File

@ -235,6 +235,33 @@ class TestNodePoolBuilder(tests.DBTestCase):
self.waitForImageDeletion('fake-provider', 'fake-image2')
self.waitForBuildDeletion('fake-image2', '0000000001')
def test_image_removal_two_builders(self):
# This tests the gap between building and uploading an image.
# We build an image on one builder, then delete any uploads
# (to simulate the time between when the builder completed a
# build and started the first upload). Then we start a second
# builder which is not configured with the same provider as
# the first builder and ensure that it doesn't delete the
# ready-but-not-yet-uploaded build from the other builder.
configfile1 = self.setup_config('builder_image1.yaml')
builder1 = self.useBuilder(configfile1)
self.waitForImage('fake-provider1', 'fake-image1')
self.waitForBuild('fake-image1', '0000000001')
for worker in builder1._upload_workers:
worker.shutdown()
worker.join()
builder1.stop()
self.zk.deleteUpload('fake-image1', '0000000001',
'fake-provider1', '0000000001')
configfile2 = self.setup_config('builder_image2.yaml')
self.useBuilder(configfile2)
self.waitForImage('fake-provider2', 'fake-image2')
# Don't check files because the image path switched to the
# second builder; we're really only interested in ZK.
self.waitForBuild('fake-image1', '0000000001', check_files=False)
def test_image_removal_dib_deletes_first(self):
# Break cloud image deleting
fake_client = fakeprovider.FakeDeleteImageFailCloud()