From a9d82bb12b6e68a841a34668b3dc83a928fd6a50 Mon Sep 17 00:00:00 2001
From: Dmitry Tantsur <dtantsur@protonmail.com>
Date: Wed, 8 Sep 2021 15:12:28 +0200
Subject: [PATCH] Enable parallel downloads and allow tuning concurrency

Currently we set parallel_image_downloads to False, which means that
all downloads that go through the image cache are serialized.
This change enables it by default and deprecates in favour of a new
more fine-grained mechanism: the new option image_download_concurrency
specifies how many downloads (and raw conversions) will run in parallel.

Update logging to trace how long each download takes.

Change-Id: I8b85afda295029f85e82143cf7d4bcb2316860f6
---
 ironic/common/images.py                           |  7 ++++++-
 ironic/conf/default.py                            | 10 ++++++++--
 ironic/drivers/modules/image_cache.py             | 14 ++++++++++----
 releasenotes/notes/parallel-6c54b4131b4ba991.yaml | 15 +++++++++++++++
 4 files changed, 39 insertions(+), 7 deletions(-)
 create mode 100644 releasenotes/notes/parallel-6c54b4131b4ba991.yaml

diff --git a/ironic/common/images.py b/ironic/common/images.py
index 587e3a8235..ece093ae1b 100644
--- a/ironic/common/images.py
+++ b/ironic/common/images.py
@@ -21,6 +21,7 @@ Handling of VM disk images.
 
 import os
 import shutil
+import time
 
 from ironic_lib import disk_utils
 from ironic_lib import utils as ironic_utils
@@ -354,8 +355,9 @@ def fetch_into(context, image_href, image_file):
     image_service = service.get_image_service(image_href,
                                               context=context)
     LOG.debug("Using %(image_service)s to download image %(image_href)s.",
-              {'image_service': image_service.__class__,
+              {'image_service': image_service.__class__.__name__,
                'image_href': image_href})
+    start = time.time()
 
     if isinstance(image_file, str):
         with open(image_file, "wb") as image_file_obj:
@@ -363,6 +365,9 @@ def fetch_into(context, image_href, image_file):
     else:
         image_service.download(image_href, image_file)
 
+    LOG.debug("Image %(image_href)s downloaded in %(time).2f seconds.",
+              {'image_href': image_href, 'time': time.time() - start})
+
 
 def fetch(context, image_href, path, force_raw=False):
     with fileutils.remove_path_on_error(path):
diff --git a/ironic/conf/default.py b/ironic/conf/default.py
index b3a6943da1..f5dcd4e625 100644
--- a/ironic/conf/default.py
+++ b/ironic/conf/default.py
@@ -249,10 +249,16 @@ image_opts = [
 
 img_cache_opts = [
     cfg.BoolOpt('parallel_image_downloads',
-                default=False,
+                default=True,
                 mutable=True,
                 help=_('Run image downloads and raw format conversions in '
-                       'parallel.')),
+                       'parallel.'),
+                deprecated_for_removal=True,
+                deprecated_reason=_('Use image_download_concurrency')),
+    cfg.IntOpt('image_download_concurrency',
+               default=20, min=1,
+               help=_('How many image downloads and raw format conversions '
+                      'to run in parallel. Only affects image caches.')),
 ]
 
 netconf_opts = [
diff --git a/ironic/drivers/modules/image_cache.py b/ironic/drivers/modules/image_cache.py
index fba6fe77e2..41ab836fe5 100644
--- a/ironic/drivers/modules/image_cache.py
+++ b/ironic/drivers/modules/image_cache.py
@@ -20,6 +20,7 @@ Utility for caching master images.
 
 import os
 import tempfile
+import threading
 import time
 import uuid
 
@@ -44,6 +45,9 @@ LOG = logging.getLogger(__name__)
 _cache_cleanup_list = []
 
 
+_concurrency_semaphore = threading.Semaphore(CONF.image_download_concurrency)
+
+
 class ImageCache(object):
     """Class handling access to cache for master images."""
 
@@ -84,7 +88,8 @@ class ImageCache(object):
                 with lockutils.lock(img_download_lock_name):
                     _fetch(ctx, href, dest_path, force_raw)
             else:
-                _fetch(ctx, href, dest_path, force_raw)
+                with _concurrency_semaphore:
+                    _fetch(ctx, href, dest_path, force_raw)
             return
 
         # TODO(ghe): have hard links and counts the same behaviour in all fs
@@ -129,8 +134,8 @@ class ImageCache(object):
                           {'href': href})
                 return
 
-            LOG.info("Master cache miss for image %(href)s, "
-                     "starting download", {'href': href})
+            LOG.info("Master cache miss for image %(href)s, will download",
+                     {'href': href})
             self._download_image(
                 href, master_path, dest_path, ctx=ctx, force_raw=force_raw)
 
@@ -159,7 +164,8 @@ class ImageCache(object):
         tmp_path = os.path.join(tmp_dir, href.split('/')[-1])
 
         try:
-            _fetch(ctx, href, tmp_path, force_raw)
+            with _concurrency_semaphore:
+                _fetch(ctx, href, tmp_path, force_raw)
             # NOTE(dtantsur): no need for global lock here - master_path
             # will have link count >1 at any moment, so won't be cleaned up
             os.link(tmp_path, master_path)
diff --git a/releasenotes/notes/parallel-6c54b4131b4ba991.yaml b/releasenotes/notes/parallel-6c54b4131b4ba991.yaml
new file mode 100644
index 0000000000..1c59874e76
--- /dev/null
+++ b/releasenotes/notes/parallel-6c54b4131b4ba991.yaml
@@ -0,0 +1,15 @@
+---
+features:
+  - |
+    Allows limiting the number of parallel downloads for cached images
+    (instance and TFTP images currently).
+upgrade:
+  - |
+    The ``parallel_image_downloads`` option is now set to ``True`` by default.
+    Use the new ``image_download_concurrency`` option to tune the behavior,
+    the default concurrency is 20.
+deprecations:
+  - |
+    The ``parallel_image_downloads`` option is deprecated in favour of
+    the new ``image_download_concurrency`` option that allows more precise
+    tuning.