From 127658bbee5cff51323357c96dc4c459f6f60039 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Fri, 4 Oct 2019 11:38:42 -0700 Subject: [PATCH] Rework the stream_blob/stream_object API Instead of just returning a generator for the data, return a tuple of (size, generator). This removes the need to call blob_size in get_blob, and leaves it up to the backend to decide how best to actually get that information. As a result: * The swift backend just needs to do a GET, rather than a HEAD then a GET. Either way, you should still get a Content-Length. * Now neither backend has a race wherein a backing blob may get deleted between the call to blob_size and stream_blob; previously, we could erroneously tell the client the blob exists but is empty. While we're refactoring, add constants for (and increase) the chunk-read size, and include Content-Length headers for GET and HEAD responses. Note that there's a bit of nuance to the return-check now: if the generator is None, the blob could not be found; if the size is None, the blob's size could not be determined -- possibly because we got a chunk-encoded response from swift. In practice, though, expect either both to be None, or neither. Change-Id: Ib85cffe17d2d57cc499d863f1b07cfad8ecd401a --- zuul_registry/filesystem.py | 37 ++++++++++++++++++++++++++++------- zuul_registry/main.py | 9 ++++++--- zuul_registry/storageutils.py | 4 ++-- zuul_registry/swift.py | 9 +++++++-- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/zuul_registry/filesystem.py b/zuul_registry/filesystem.py index 641fbc3..087efed 100644 --- a/zuul_registry/filesystem.py +++ b/zuul_registry/filesystem.py @@ -18,6 +18,9 @@ import os from . import storageutils +DISK_CHUNK_SIZE = 64 * 1024 + + class FilesystemDriver(storageutils.StorageDriver): def __init__(self, conf): self.root = conf['root'] @@ -60,13 +63,33 @@ class FilesystemDriver(storageutils.StorageDriver): def stream_object(self, path): path = os.path.join(self.root, path) if not os.path.exists(path): - return None - with open(path, 'rb') as f: - while True: - chunk = f.read(4096) - if not chunk: - return - yield chunk + return None, None + f = open(path, 'rb', buffering=DISK_CHUNK_SIZE) + try: + size = os.fstat(f.fileno()).st_size + except OSError: + f.close() + raise + + def data_iter(f=f): + with f: + yield b'' # will get discarded; see note below + yield from iter(lambda: f.read(DISK_CHUNK_SIZE), b'') + + ret = data_iter() + # This looks a little funny, because it is. We're going to discard the + # empty bytes added at the start, but that's not the important part. + # We want to ensure that + # + # 1. the generator has started executing and + # 2. it left off *inside the with block* + # + # This ensures that when the generator gets cleaned up (either because + # everything went according to plan and the generator exited cleanly + # *or* there was an error which eventually raised a GeneratorExit), + # the file we opened will get closed. + next(ret) + return size, ret def delete_object(self, path): path = os.path.join(self.root, path) diff --git a/zuul_registry/main.py b/zuul_registry/main.py index 8a8a9c1..3831c1c 100644 --- a/zuul_registry/main.py +++ b/zuul_registry/main.py @@ -114,6 +114,7 @@ class RegistryAPI: return self.not_found() res = cherrypy.response res.headers['Docker-Content-Digest'] = digest + res.headers['Content-Length'] = str(size) return {} @cherrypy.expose @@ -122,12 +123,14 @@ class RegistryAPI: def get_blob(self, repository, digest): namespace = self.get_namespace() self.log.info('Get blob %s %s', repository, digest) - size = self.storage.blob_size(namespace, digest) - if size is None: + size, data_iter = self.storage.stream_blob(namespace, digest) + if data_iter is None: return self.not_found() res = cherrypy.response res.headers['Docker-Content-Digest'] = digest - return self.storage.stream_blob(namespace, digest) + if size is not None: + res.headers['Content-Length'] = str(size) + return data_iter @cherrypy.expose @cherrypy.config(**{'tools.auth_basic.checkpassword': require_write}) diff --git a/zuul_registry/storageutils.py b/zuul_registry/storageutils.py index 42aed71..e15c222 100644 --- a/zuul_registry/storageutils.py +++ b/zuul_registry/storageutils.py @@ -99,8 +99,8 @@ class StorageDriver(metaclass=ABCMeta): :arg str path: The object path. - :returns: The contents of the object. - :rtype: generator of bytearray + :returns: The size and contents of the object. + :rtype: tuple of (int or None, generator-of-bytearray or None) """ pass diff --git a/zuul_registry/swift.py b/zuul_registry/swift.py index 709a572..25027c1 100644 --- a/zuul_registry/swift.py +++ b/zuul_registry/swift.py @@ -26,6 +26,7 @@ import dateutil.parser from . import storageutils POST_ATTEMPTS = 3 +SWIFT_CHUNK_SIZE = 64 * 1024 def retry_function(func): @@ -125,8 +126,12 @@ class SwiftDriver(storageutils.StorageDriver): ret = retry_function( lambda: self.conn.session.get(self.get_url(path), stream=True)) except keystoneauth1.exceptions.http.NotFound: - return None - return ret.iter_content(chunk_size=4096) + return None, None + try: + size = int(ret.headers.get('Content-Length', '')) + except ValueError: + size = None + return size, ret.iter_content(chunk_size=SWIFT_CHUNK_SIZE) def delete_object(self, path): retry_function(