From a9d82bb12b6e68a841a34668b3dc83a928fd6a50 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Wed, 8 Sep 2021 15:12:28 +0200 Subject: [PATCH] Enable parallel downloads and allow tuning concurrency Currently we set parallel_image_downloads to False, which means that all downloads that go through the image cache are serialized. This change enables it by default and deprecates in favour of a new more fine-grained mechanism: the new option image_download_concurrency specifies how many downloads (and raw conversions) will run in parallel. Update logging to trace how long each download takes. Change-Id: I8b85afda295029f85e82143cf7d4bcb2316860f6 --- ironic/common/images.py | 7 ++++++- ironic/conf/default.py | 10 ++++++++-- ironic/drivers/modules/image_cache.py | 14 ++++++++++---- releasenotes/notes/parallel-6c54b4131b4ba991.yaml | 15 +++++++++++++++ 4 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 releasenotes/notes/parallel-6c54b4131b4ba991.yaml diff --git a/ironic/common/images.py b/ironic/common/images.py index 587e3a8235..ece093ae1b 100644 --- a/ironic/common/images.py +++ b/ironic/common/images.py @@ -21,6 +21,7 @@ Handling of VM disk images. import os import shutil +import time from ironic_lib import disk_utils from ironic_lib import utils as ironic_utils @@ -354,8 +355,9 @@ def fetch_into(context, image_href, image_file): image_service = service.get_image_service(image_href, context=context) LOG.debug("Using %(image_service)s to download image %(image_href)s.", - {'image_service': image_service.__class__, + {'image_service': image_service.__class__.__name__, 'image_href': image_href}) + start = time.time() if isinstance(image_file, str): with open(image_file, "wb") as image_file_obj: @@ -363,6 +365,9 @@ def fetch_into(context, image_href, image_file): else: image_service.download(image_href, image_file) + LOG.debug("Image %(image_href)s downloaded in %(time).2f seconds.", + {'image_href': image_href, 'time': time.time() - start}) + def fetch(context, image_href, path, force_raw=False): with fileutils.remove_path_on_error(path): diff --git a/ironic/conf/default.py b/ironic/conf/default.py index b3a6943da1..f5dcd4e625 100644 --- a/ironic/conf/default.py +++ b/ironic/conf/default.py @@ -249,10 +249,16 @@ image_opts = [ img_cache_opts = [ cfg.BoolOpt('parallel_image_downloads', - default=False, + default=True, mutable=True, help=_('Run image downloads and raw format conversions in ' - 'parallel.')), + 'parallel.'), + deprecated_for_removal=True, + deprecated_reason=_('Use image_download_concurrency')), + cfg.IntOpt('image_download_concurrency', + default=20, min=1, + help=_('How many image downloads and raw format conversions ' + 'to run in parallel. Only affects image caches.')), ] netconf_opts = [ diff --git a/ironic/drivers/modules/image_cache.py b/ironic/drivers/modules/image_cache.py index fba6fe77e2..41ab836fe5 100644 --- a/ironic/drivers/modules/image_cache.py +++ b/ironic/drivers/modules/image_cache.py @@ -20,6 +20,7 @@ Utility for caching master images. import os import tempfile +import threading import time import uuid @@ -44,6 +45,9 @@ LOG = logging.getLogger(__name__) _cache_cleanup_list = [] +_concurrency_semaphore = threading.Semaphore(CONF.image_download_concurrency) + + class ImageCache(object): """Class handling access to cache for master images.""" @@ -84,7 +88,8 @@ class ImageCache(object): with lockutils.lock(img_download_lock_name): _fetch(ctx, href, dest_path, force_raw) else: - _fetch(ctx, href, dest_path, force_raw) + with _concurrency_semaphore: + _fetch(ctx, href, dest_path, force_raw) return # TODO(ghe): have hard links and counts the same behaviour in all fs @@ -129,8 +134,8 @@ class ImageCache(object): {'href': href}) return - LOG.info("Master cache miss for image %(href)s, " - "starting download", {'href': href}) + LOG.info("Master cache miss for image %(href)s, will download", + {'href': href}) self._download_image( href, master_path, dest_path, ctx=ctx, force_raw=force_raw) @@ -159,7 +164,8 @@ class ImageCache(object): tmp_path = os.path.join(tmp_dir, href.split('/')[-1]) try: - _fetch(ctx, href, tmp_path, force_raw) + with _concurrency_semaphore: + _fetch(ctx, href, tmp_path, force_raw) # NOTE(dtantsur): no need for global lock here - master_path # will have link count >1 at any moment, so won't be cleaned up os.link(tmp_path, master_path) diff --git a/releasenotes/notes/parallel-6c54b4131b4ba991.yaml b/releasenotes/notes/parallel-6c54b4131b4ba991.yaml new file mode 100644 index 0000000000..1c59874e76 --- /dev/null +++ b/releasenotes/notes/parallel-6c54b4131b4ba991.yaml @@ -0,0 +1,15 @@ +--- +features: + - | + Allows limiting the number of parallel downloads for cached images + (instance and TFTP images currently). +upgrade: + - | + The ``parallel_image_downloads`` option is now set to ``True`` by default. + Use the new ``image_download_concurrency`` option to tune the behavior, + the default concurrency is 20. +deprecations: + - | + The ``parallel_image_downloads`` option is deprecated in favour of + the new ``image_download_concurrency`` option that allows more precise + tuning.