Rework the stream_blob/stream_object API

Instead of just returning a generator for the data, return a tuple of
(size, generator). This removes the need to call blob_size in get_blob,
and leaves it up to the backend to decide how best to actually get that
information. As a result:

* The swift backend just needs to do a GET, rather than a HEAD then a
  GET. Either way, you should still get a Content-Length.
* Now neither backend has a race wherein a backing blob may get deleted
  between the call to blob_size and stream_blob; previously, we could
  erroneously tell the client the blob exists but is empty.

While we're refactoring, add constants for (and increase) the chunk-read
size, and include Content-Length headers for GET and HEAD responses.

Note that there's a bit of nuance to the return-check now: if the
generator is None, the blob could not be found; if the size is None, the
blob's size could not be determined -- possibly because we got a
chunk-encoded response from swift. In practice, though, expect either
both to be None, or neither.

Change-Id: Ib85cffe17d2d57cc499d863f1b07cfad8ecd401a
This commit is contained in:
Tim Burke 2019-10-04 11:38:42 -07:00 committed by Tim Burke
parent a6ebe80fc4
commit 127658bbee
4 changed files with 45 additions and 14 deletions

View File

@ -18,6 +18,9 @@ import os
from . import storageutils from . import storageutils
DISK_CHUNK_SIZE = 64 * 1024
class FilesystemDriver(storageutils.StorageDriver): class FilesystemDriver(storageutils.StorageDriver):
def __init__(self, conf): def __init__(self, conf):
self.root = conf['root'] self.root = conf['root']
@ -60,13 +63,33 @@ class FilesystemDriver(storageutils.StorageDriver):
def stream_object(self, path): def stream_object(self, path):
path = os.path.join(self.root, path) path = os.path.join(self.root, path)
if not os.path.exists(path): if not os.path.exists(path):
return None return None, None
with open(path, 'rb') as f: f = open(path, 'rb', buffering=DISK_CHUNK_SIZE)
while True: try:
chunk = f.read(4096) size = os.fstat(f.fileno()).st_size
if not chunk: except OSError:
return f.close()
yield chunk raise
def data_iter(f=f):
with f:
yield b'' # will get discarded; see note below
yield from iter(lambda: f.read(DISK_CHUNK_SIZE), b'')
ret = data_iter()
# This looks a little funny, because it is. We're going to discard the
# empty bytes added at the start, but that's not the important part.
# We want to ensure that
#
# 1. the generator has started executing and
# 2. it left off *inside the with block*
#
# This ensures that when the generator gets cleaned up (either because
# everything went according to plan and the generator exited cleanly
# *or* there was an error which eventually raised a GeneratorExit),
# the file we opened will get closed.
next(ret)
return size, ret
def delete_object(self, path): def delete_object(self, path):
path = os.path.join(self.root, path) path = os.path.join(self.root, path)

View File

@ -114,6 +114,7 @@ class RegistryAPI:
return self.not_found() return self.not_found()
res = cherrypy.response res = cherrypy.response
res.headers['Docker-Content-Digest'] = digest res.headers['Docker-Content-Digest'] = digest
res.headers['Content-Length'] = str(size)
return {} return {}
@cherrypy.expose @cherrypy.expose
@ -122,12 +123,14 @@ class RegistryAPI:
def get_blob(self, repository, digest): def get_blob(self, repository, digest):
namespace = self.get_namespace() namespace = self.get_namespace()
self.log.info('Get blob %s %s', repository, digest) self.log.info('Get blob %s %s', repository, digest)
size = self.storage.blob_size(namespace, digest) size, data_iter = self.storage.stream_blob(namespace, digest)
if size is None: if data_iter is None:
return self.not_found() return self.not_found()
res = cherrypy.response res = cherrypy.response
res.headers['Docker-Content-Digest'] = digest res.headers['Docker-Content-Digest'] = digest
return self.storage.stream_blob(namespace, digest) if size is not None:
res.headers['Content-Length'] = str(size)
return data_iter
@cherrypy.expose @cherrypy.expose
@cherrypy.config(**{'tools.auth_basic.checkpassword': require_write}) @cherrypy.config(**{'tools.auth_basic.checkpassword': require_write})

View File

@ -99,8 +99,8 @@ class StorageDriver(metaclass=ABCMeta):
:arg str path: The object path. :arg str path: The object path.
:returns: The contents of the object. :returns: The size and contents of the object.
:rtype: generator of bytearray :rtype: tuple of (int or None, generator-of-bytearray or None)
""" """
pass pass

View File

@ -26,6 +26,7 @@ import dateutil.parser
from . import storageutils from . import storageutils
POST_ATTEMPTS = 3 POST_ATTEMPTS = 3
SWIFT_CHUNK_SIZE = 64 * 1024
def retry_function(func): def retry_function(func):
@ -125,8 +126,12 @@ class SwiftDriver(storageutils.StorageDriver):
ret = retry_function( ret = retry_function(
lambda: self.conn.session.get(self.get_url(path), stream=True)) lambda: self.conn.session.get(self.get_url(path), stream=True))
except keystoneauth1.exceptions.http.NotFound: except keystoneauth1.exceptions.http.NotFound:
return None return None, None
return ret.iter_content(chunk_size=4096) try:
size = int(ret.headers.get('Content-Length', ''))
except ValueError:
size = None
return size, ret.iter_content(chunk_size=SWIFT_CHUNK_SIZE)
def delete_object(self, path): def delete_object(self, path):
retry_function( retry_function(