Fix race with two builders deleting images
In a situation with multiple builders, each configured with different providers, it is possible for one builder to delete the ZK ImageBuild record for a build from another builder between the time that the build is completed but before the first upload starts. This is because every builder looks for images to delete from ZK. It keeps the 2 most recent ready images (this should normally cover the time period between a build and upload), unless the image is not configured for any provider this builder knows about. This is where the disjoint providers come into play -- builder1 in our scenario is not expected to have a configuration for provider2. To correct this, we adjust this check so that the only time we bypass the 2-most-recent-ready-images check is if the diskimage is not configured at all. That means that we still expect all builders to have a "diskimage" entry for every image, but we don't need those to be configured for any providers which this builder is not expected to handle. Change-Id: Ic2fefda293fa0bcbc98ee7313198b37df0576299
This commit is contained in:
parent
9641b719f9
commit
6a56940275
@ -467,7 +467,13 @@ class CleanupWorker(BaseWorker):
|
||||
# have a consistent view of the data.
|
||||
all_builds = self._zk.getBuilds(image)
|
||||
diskimage = self._config.diskimages.get(image)
|
||||
if not diskimage or (diskimage and not diskimage.image_types):
|
||||
if not diskimage:
|
||||
# If the diskimage does not appear in the config at all,
|
||||
# we interpret that as the user intending to remove it.
|
||||
# If it appears in the config, even if it's not used by a
|
||||
# provider, we will assume there is some other builder
|
||||
# responsible for it, so we fall through to the else
|
||||
# condition below and rely on data in ZK.
|
||||
builds_to_keep = set()
|
||||
# TODO(jeblair): When all builds for an image which is not
|
||||
# in use are deleted, the image znode should be deleted as
|
||||
|
@ -508,7 +508,8 @@ class DBTestCase(BaseTestCase):
|
||||
break
|
||||
self.wait_for_threads()
|
||||
|
||||
def waitForBuild(self, image_name, build_id, states=None):
|
||||
def waitForBuild(self, image_name, build_id, states=None,
|
||||
check_files=True):
|
||||
if states is None:
|
||||
states = (zk.READY,)
|
||||
|
||||
@ -523,7 +524,7 @@ class DBTestCase(BaseTestCase):
|
||||
break
|
||||
|
||||
# We should only expect a dib manifest with a successful build.
|
||||
while build.state == zk.READY:
|
||||
while check_files and build.state == zk.READY:
|
||||
self.wait_for_threads()
|
||||
files = builder.DibImageFile.from_image_id(
|
||||
self._config_images_dir.path, base)
|
||||
|
64
nodepool/tests/fixtures/builder_image1.yaml
vendored
Normal file
64
nodepool/tests/fixtures/builder_image1.yaml
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
elements-dir: .
|
||||
images-dir: '{images_dir}'
|
||||
build-log-dir: '{build_log_dir}'
|
||||
build-log-retention: 1
|
||||
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
zookeeper-tls:
|
||||
ca: {zookeeper_ca}
|
||||
cert: {zookeeper_cert}
|
||||
key: {zookeeper_key}
|
||||
|
||||
labels:
|
||||
- name: fake-label1
|
||||
- name: fake-label2
|
||||
|
||||
providers:
|
||||
- name: fake-provider1
|
||||
cloud: fake
|
||||
driver: fake
|
||||
region-name: fake-region
|
||||
rate: 0.0001
|
||||
diskimages:
|
||||
- name: fake-image1
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 96
|
||||
availability-zones:
|
||||
- az1
|
||||
networks:
|
||||
- net-name
|
||||
labels:
|
||||
- name: fake-label1
|
||||
diskimage: fake-image1
|
||||
min-ram: 8192
|
||||
flavor-name: 'Fake'
|
||||
|
||||
diskimages:
|
||||
- name: fake-image1
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
dib-cmd: nodepool/tests/fake-image-create
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
||||
|
||||
- name: fake-image2
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
dib-cmd: nodepool/tests/fake-image-create
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
64
nodepool/tests/fixtures/builder_image2.yaml
vendored
Normal file
64
nodepool/tests/fixtures/builder_image2.yaml
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
elements-dir: .
|
||||
images-dir: '{images_dir}'
|
||||
build-log-dir: '{build_log_dir}'
|
||||
build-log-retention: 1
|
||||
|
||||
zookeeper-servers:
|
||||
- host: {zookeeper_host}
|
||||
port: {zookeeper_port}
|
||||
chroot: {zookeeper_chroot}
|
||||
|
||||
zookeeper-tls:
|
||||
ca: {zookeeper_ca}
|
||||
cert: {zookeeper_cert}
|
||||
key: {zookeeper_key}
|
||||
|
||||
labels:
|
||||
- name: fake-label1
|
||||
- name: fake-label2
|
||||
|
||||
providers:
|
||||
- name: fake-provider2
|
||||
cloud: fake
|
||||
driver: fake
|
||||
region-name: fake-region
|
||||
rate: 0.0001
|
||||
diskimages:
|
||||
- name: fake-image2
|
||||
pools:
|
||||
- name: main
|
||||
max-servers: 96
|
||||
availability-zones:
|
||||
- az1
|
||||
networks:
|
||||
- net-name
|
||||
labels:
|
||||
- name: fake-label2
|
||||
diskimage: fake-image2
|
||||
min-ram: 8192
|
||||
flavor-name: 'Fake'
|
||||
|
||||
diskimages:
|
||||
- name: fake-image1
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
dib-cmd: nodepool/tests/fake-image-create
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
||||
|
||||
- name: fake-image2
|
||||
elements:
|
||||
- fedora
|
||||
- vm
|
||||
release: 21
|
||||
dib-cmd: nodepool/tests/fake-image-create
|
||||
env-vars:
|
||||
TMPDIR: /opt/dib_tmp
|
||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
@ -235,6 +235,33 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
||||
self.waitForImageDeletion('fake-provider', 'fake-image2')
|
||||
self.waitForBuildDeletion('fake-image2', '0000000001')
|
||||
|
||||
def test_image_removal_two_builders(self):
|
||||
# This tests the gap between building and uploading an image.
|
||||
# We build an image on one builder, then delete any uploads
|
||||
# (to simulate the time between when the builder completed a
|
||||
# build and started the first upload). Then we start a second
|
||||
# builder which is not configured with the same provider as
|
||||
# the first builder and ensure that it doesn't delete the
|
||||
# ready-but-not-yet-uploaded build from the other builder.
|
||||
configfile1 = self.setup_config('builder_image1.yaml')
|
||||
builder1 = self.useBuilder(configfile1)
|
||||
self.waitForImage('fake-provider1', 'fake-image1')
|
||||
self.waitForBuild('fake-image1', '0000000001')
|
||||
for worker in builder1._upload_workers:
|
||||
worker.shutdown()
|
||||
worker.join()
|
||||
builder1.stop()
|
||||
|
||||
self.zk.deleteUpload('fake-image1', '0000000001',
|
||||
'fake-provider1', '0000000001')
|
||||
|
||||
configfile2 = self.setup_config('builder_image2.yaml')
|
||||
self.useBuilder(configfile2)
|
||||
self.waitForImage('fake-provider2', 'fake-image2')
|
||||
# Don't check files because the image path switched to the
|
||||
# second builder; we're really only interested in ZK.
|
||||
self.waitForBuild('fake-image1', '0000000001', check_files=False)
|
||||
|
||||
def test_image_removal_dib_deletes_first(self):
|
||||
# Break cloud image deleting
|
||||
fake_client = fakeprovider.FakeDeleteImageFailCloud()
|
||||
|
Loading…
Reference in New Issue
Block a user