Merge "Revert "Add a timeout for the image build""
This commit is contained in:
commit
f2c155821c
@ -277,12 +277,6 @@ Options
|
|||||||
Specifies the distro to be used as a base image to build the image using
|
Specifies the distro to be used as a base image to build the image using
|
||||||
diskimage-builder.
|
diskimage-builder.
|
||||||
|
|
||||||
.. attr:: build-timeout
|
|
||||||
:type: int
|
|
||||||
|
|
||||||
How long (in seconds) to wait for the diskimage build before giving up.
|
|
||||||
The default is 8 hours.
|
|
||||||
|
|
||||||
.. attr:: elements
|
.. attr:: elements
|
||||||
:type: list
|
:type: list
|
||||||
|
|
||||||
|
@ -739,28 +739,21 @@ class BuildWorker(BaseWorker):
|
|||||||
if 'qcow2' in img_types:
|
if 'qcow2' in img_types:
|
||||||
qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS
|
qemu_img_options = DEFAULT_QEMU_IMAGE_COMPAT_OPTIONS
|
||||||
|
|
||||||
log_fn = self._getBuildLog(diskimage.name, build_id)
|
cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s %s' %
|
||||||
|
|
||||||
cmd = ('%s -x -t %s --checksum --no-tmpfs %s -o %s --logfile %s %s' %
|
|
||||||
(self.dib_cmd, img_types, qemu_img_options, filename,
|
(self.dib_cmd, img_types, qemu_img_options, filename,
|
||||||
log_fn, img_elements))
|
img_elements))
|
||||||
|
|
||||||
self._pruneBuildLogs(diskimage.name)
|
self._pruneBuildLogs(diskimage.name)
|
||||||
|
log_fn = self._getBuildLog(diskimage.name, build_id)
|
||||||
|
|
||||||
self.log.info('Running %s' % (cmd,))
|
self.log.info('Running %s' % (cmd,))
|
||||||
self.log.info('Logging to %s' % (log_fn,))
|
self.log.info('Logging to %s' % (log_fn,))
|
||||||
|
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
|
|
||||||
# We used to use readline() on stdout to output the lines to the
|
|
||||||
# build log. Unfortunately, this would block as long as the process
|
|
||||||
# ran (with no easy way to timeout the read) and wedge the builder.
|
|
||||||
# Now we use --logfile option to the dib command and set a timeout
|
|
||||||
# on the wait() call to prevent the wedge.
|
|
||||||
did_timeout = False
|
|
||||||
try:
|
try:
|
||||||
p = subprocess.Popen(
|
p = subprocess.Popen(
|
||||||
shlex.split(cmd),
|
shlex.split(cmd),
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
env=env)
|
env=env)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
@ -768,20 +761,17 @@ class BuildWorker(BaseWorker):
|
|||||||
"Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror)
|
"Failed to exec '%s'. Error: '%s'" % (cmd, e.strerror)
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
with open(log_fn, 'wb') as log:
|
||||||
rc = p.wait(timeout=diskimage.build_timeout)
|
while True:
|
||||||
except subprocess.TimeoutExpired:
|
ln = p.stdout.readline()
|
||||||
p.kill()
|
log.write(ln)
|
||||||
did_timeout = True
|
log.flush()
|
||||||
rc = 1
|
if not ln:
|
||||||
self.log.error(
|
break
|
||||||
"Build timeout for image %s, build %s (log: %s)",
|
|
||||||
diskimage.name, build_id, log_fn)
|
rc = p.wait()
|
||||||
else:
|
m = "Exit code: %s\n" % rc
|
||||||
# Append return code to dib's log file
|
log.write(m.encode('utf8'))
|
||||||
with open(log_fn, 'ab') as log:
|
|
||||||
m = "Exit code: %s\n" % rc
|
|
||||||
log.write(m.encode('utf8'))
|
|
||||||
|
|
||||||
# It's possible the connection to the ZK cluster could have been
|
# It's possible the connection to the ZK cluster could have been
|
||||||
# interrupted during the build. If so, wait for it to return.
|
# interrupted during the build. If so, wait for it to return.
|
||||||
@ -806,10 +796,9 @@ class BuildWorker(BaseWorker):
|
|||||||
self.log.info("ZooKeeper lost while building %s" % diskimage.name)
|
self.log.info("ZooKeeper lost while building %s" % diskimage.name)
|
||||||
self._zk.resetLostFlag()
|
self._zk.resetLostFlag()
|
||||||
build_data.state = zk.FAILED
|
build_data.state = zk.FAILED
|
||||||
elif p.returncode or did_timeout:
|
elif p.returncode:
|
||||||
self.log.info(
|
self.log.info(
|
||||||
"DIB failed creating %s (%s) (timeout=%s)" % (
|
"DIB failed creating %s (%s)" % (diskimage.name, p.returncode))
|
||||||
diskimage.name, p.returncode, did_timeout))
|
|
||||||
build_data.state = zk.FAILED
|
build_data.state = zk.FAILED
|
||||||
else:
|
else:
|
||||||
self.log.info("DIB image %s is built" % diskimage.name)
|
self.log.info("DIB image %s is built" % diskimage.name)
|
||||||
|
@ -44,7 +44,6 @@ class ConfigValidator:
|
|||||||
'rebuild-age': int,
|
'rebuild-age': int,
|
||||||
'env-vars': {str: str},
|
'env-vars': {str: str},
|
||||||
'username': str,
|
'username': str,
|
||||||
'build-timeout': int,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
webapp = {
|
webapp = {
|
||||||
|
@ -118,7 +118,6 @@ class Config(ConfigValue):
|
|||||||
d.image_types = set(diskimage.get('formats', []))
|
d.image_types = set(diskimage.get('formats', []))
|
||||||
d.pause = bool(diskimage.get('pause', False))
|
d.pause = bool(diskimage.get('pause', False))
|
||||||
d.username = diskimage.get('username', 'zuul')
|
d.username = diskimage.get('username', 'zuul')
|
||||||
d.build_timeout = diskimage.get('build-timeout', (8 * 60 * 60))
|
|
||||||
self.diskimages[d.name] = d
|
self.diskimages[d.name] = d
|
||||||
|
|
||||||
def setSecureDiskimageEnv(self, diskimages, secure_config_path):
|
def setSecureDiskimageEnv(self, diskimages, secure_config_path):
|
||||||
@ -180,7 +179,6 @@ class DiskImage(ConfigValue):
|
|||||||
self.image_types = None
|
self.image_types = None
|
||||||
self.pause = False
|
self.pause = False
|
||||||
self.username = None
|
self.username = None
|
||||||
self.build_timeout = None
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if isinstance(other, DiskImage):
|
if isinstance(other, DiskImage):
|
||||||
@ -191,8 +189,7 @@ class DiskImage(ConfigValue):
|
|||||||
other.env_vars == self.env_vars and
|
other.env_vars == self.env_vars and
|
||||||
other.image_types == self.image_types and
|
other.image_types == self.image_types and
|
||||||
other.pause == self.pause and
|
other.pause == self.pause and
|
||||||
other.username == self.username and
|
other.username == self.username)
|
||||||
other.build_timeout == self.build_timeout)
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -411,21 +411,9 @@ class DBTestCase(BaseTestCase):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
|
|
||||||
def waitForBuild(self, image_name, build_id, states=None):
|
def waitForBuild(self, image_name, build_id):
|
||||||
if states is None:
|
|
||||||
states = (zk.READY,)
|
|
||||||
|
|
||||||
base = "-".join([image_name, build_id])
|
base = "-".join([image_name, build_id])
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
self.wait_for_threads()
|
|
||||||
build = self.zk.getBuild(image_name, build_id)
|
|
||||||
if build and build.state in states:
|
|
||||||
break
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
# We should only expect a dib manifest with a successful build.
|
|
||||||
while build.state == zk.READY:
|
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
files = builder.DibImageFile.from_image_id(
|
files = builder.DibImageFile.from_image_id(
|
||||||
self._config_images_dir.path, base)
|
self._config_images_dir.path, base)
|
||||||
@ -433,6 +421,13 @@ class DBTestCase(BaseTestCase):
|
|||||||
break
|
break
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
while True:
|
||||||
|
self.wait_for_threads()
|
||||||
|
build = self.zk.getBuild(image_name, build_id)
|
||||||
|
if build and build.state == zk.READY:
|
||||||
|
break
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
self.wait_for_threads()
|
self.wait_for_threads()
|
||||||
return build
|
return build
|
||||||
|
|
||||||
|
@ -1,49 +1,10 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
outfile=
|
|
||||||
outtypes=("qcow2")
|
|
||||||
|
|
||||||
all_args=$*
|
|
||||||
logfile=
|
|
||||||
checksum=
|
|
||||||
no_tmpfs=
|
|
||||||
qemu_img_options=
|
|
||||||
x=
|
|
||||||
|
|
||||||
TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum,logfile: -- "$@")
|
|
||||||
if [ $? -ne 0 ]; then
|
|
||||||
echo "Invalid option"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
eval set -- "$TEMP"
|
|
||||||
while true ; do
|
|
||||||
case "$1" in
|
|
||||||
--checksum)
|
|
||||||
checksum=1; shift 1;;
|
|
||||||
--no-tmpfs)
|
|
||||||
no_tmpfs=1; shift 1;;
|
|
||||||
--qemu-img-options)
|
|
||||||
qemu_img_options=$2; shift 2;;
|
|
||||||
--logfile)
|
|
||||||
logfile=$2; shift 2;;
|
|
||||||
-o) outfile=$2; shift 2;;
|
|
||||||
-t) IFS="," read -a outtypes <<< "$2"; shift 2;;
|
|
||||||
-x) x=1; shift;;
|
|
||||||
--) shift ; break ;;
|
|
||||||
*) echo "Unknown option : $1"; exit 1;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# If --logfile was given, direct stdout to it, as well
|
|
||||||
if [ ! -z "$logfile" ]; then
|
|
||||||
exec > >(tee -a ${logfile})
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "*** fake-image-create: start"
|
echo "*** fake-image-create: start"
|
||||||
|
|
||||||
echo "arguments:"
|
echo "arguments:"
|
||||||
echo "----"
|
echo "----"
|
||||||
echo "$all_args"
|
echo $*
|
||||||
echo "----"
|
echo "----"
|
||||||
|
|
||||||
if [[ "${SHOULD_FAIL}" == 'true' ]]; then
|
if [[ "${SHOULD_FAIL}" == 'true' ]]; then
|
||||||
@ -77,21 +38,30 @@ if [[ "${BASE_IMAGE_FILE}" != "Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2"
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z "$logfile" ]; then
|
outfile=
|
||||||
echo " -> logfile: $logfile"
|
outtypes=("qcow2")
|
||||||
fi
|
|
||||||
if [ ! -z "$checksum" ]; then
|
TEMP=$(getopt -o xo:t: --long qemu-img-options:,no-tmpfs,checksum -- "$@")
|
||||||
echo " -> set --checksum"
|
if [ $? -ne 0 ]; then
|
||||||
fi
|
echo "Invalid option"
|
||||||
if [ ! -z "$no_tmpfs" ]; then
|
exit 1
|
||||||
echo " -> set --no-tmpfs"
|
|
||||||
fi
|
|
||||||
if [ ! -z "$qemu_img_options" ]; then
|
|
||||||
echo " -> qemu-img-options: $qemu_img_options"
|
|
||||||
fi
|
|
||||||
if [ ! -z "$x" ]; then
|
|
||||||
echo " -> debugging enabled"
|
|
||||||
fi
|
fi
|
||||||
|
eval set -- "$TEMP"
|
||||||
|
while true ; do
|
||||||
|
case "$1" in
|
||||||
|
--checksum)
|
||||||
|
echo " -> set --checksum"; shift 1;;
|
||||||
|
--no-tmpfs)
|
||||||
|
echo " -> set --no-tmpfs"; shift 1;;
|
||||||
|
--qemu-img-options)
|
||||||
|
echo " -> qemu-img-options: $2"; shift 2;;
|
||||||
|
-o) outfile=$2; shift 2;;
|
||||||
|
-t) IFS="," read -a outtypes <<< "$2"; shift 2;;
|
||||||
|
-x) echo " -> debugging enabled"; shift;;
|
||||||
|
--) shift ; break ;;
|
||||||
|
*) echo "Unknown option : $1"; exit 1;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
if [ -z "$outfile" ]; then
|
if [ -z "$outfile" ]; then
|
||||||
echo "No output file specified."
|
echo "No output file specified."
|
||||||
|
@ -152,7 +152,6 @@ diskimages:
|
|||||||
- cache-devstack
|
- cache-devstack
|
||||||
release: trusty
|
release: trusty
|
||||||
rebuild-age: 3600
|
rebuild-age: 3600
|
||||||
build-timeout: 3600
|
|
||||||
env-vars:
|
env-vars:
|
||||||
TMPDIR: /opt/dib_tmp
|
TMPDIR: /opt/dib_tmp
|
||||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
DIB_IMAGE_CACHE: /opt/dib_cache
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
elements-dir: .
|
|
||||||
images-dir: '{images_dir}'
|
|
||||||
build-log-dir: '{build_log_dir}'
|
|
||||||
|
|
||||||
zookeeper-servers:
|
|
||||||
- host: {zookeeper_host}
|
|
||||||
port: {zookeeper_port}
|
|
||||||
chroot: {zookeeper_chroot}
|
|
||||||
|
|
||||||
labels: []
|
|
||||||
|
|
||||||
providers: []
|
|
||||||
|
|
||||||
diskimages:
|
|
||||||
- name: fake-image
|
|
||||||
formats:
|
|
||||||
- tar
|
|
||||||
elements:
|
|
||||||
- fedora
|
|
||||||
- vm
|
|
||||||
release: 21
|
|
||||||
env-vars:
|
|
||||||
TMPDIR: /opt/dib_tmp
|
|
||||||
DIB_IMAGE_CACHE: /opt/dib_cache
|
|
||||||
DIB_CLOUD_IMAGES: http://download.fedoraproject.org/pub/fedora/linux/releases/test/21-Beta/Cloud/Images/x86_64/
|
|
||||||
BASE_IMAGE_FILE: Fedora-Cloud-Base-20141029-21_Beta.x86_64.qcow2
|
|
@ -16,8 +16,6 @@
|
|||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
import fixtures
|
import fixtures
|
||||||
import mock
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from nodepool import builder, exceptions, tests
|
from nodepool import builder, exceptions, tests
|
||||||
from nodepool.driver.fake import provider as fakeprovider
|
from nodepool.driver.fake import provider as fakeprovider
|
||||||
@ -337,10 +335,3 @@ class TestNodePoolBuilder(tests.DBTestCase):
|
|||||||
|
|
||||||
self.assertEqual(build_default._formats, ['qcow2'])
|
self.assertEqual(build_default._formats, ['qcow2'])
|
||||||
self.assertEqual(build_vhd._formats, ['vhd'])
|
self.assertEqual(build_vhd._formats, ['vhd'])
|
||||||
|
|
||||||
@mock.patch.object(subprocess.Popen, 'wait')
|
|
||||||
def test_diskimage_build_timeout(self, mock_wait):
|
|
||||||
mock_wait.side_effect = subprocess.TimeoutExpired('dib_cmd', 1)
|
|
||||||
configfile = self.setup_config('diskimage_build_timeout.yaml')
|
|
||||||
self.useBuilder(configfile, cleanup_interval=0)
|
|
||||||
self.waitForBuild('fake-image', '0000000001', states=(zk.FAILED,))
|
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
---
|
|
||||||
features:
|
|
||||||
- |
|
|
||||||
A new option (build-timeout) has been added to the builder diskimage
|
|
||||||
configuration to control how long the builder should wait for image
|
|
||||||
builds before giving up. The default is 8 hours.
|
|
Loading…
x
Reference in New Issue
Block a user