Rework the stream_blob/stream_object API
Instead of just returning a generator for the data, return a tuple of (size, generator). This removes the need to call blob_size in get_blob, and leaves it up to the backend to decide how best to actually get that information. As a result: * The swift backend just needs to do a GET, rather than a HEAD then a GET. Either way, you should still get a Content-Length. * Now neither backend has a race wherein a backing blob may get deleted between the call to blob_size and stream_blob; previously, we could erroneously tell the client the blob exists but is empty. While we're refactoring, add constants for (and increase) the chunk-read size, and include Content-Length headers for GET and HEAD responses. Note that there's a bit of nuance to the return-check now: if the generator is None, the blob could not be found; if the size is None, the blob's size could not be determined -- possibly because we got a chunk-encoded response from swift. In practice, though, expect either both to be None, or neither. Change-Id: Ib85cffe17d2d57cc499d863f1b07cfad8ecd401a
This commit is contained in:
parent
a6ebe80fc4
commit
127658bbee
@ -18,6 +18,9 @@ import os
|
|||||||
from . import storageutils
|
from . import storageutils
|
||||||
|
|
||||||
|
|
||||||
|
DISK_CHUNK_SIZE = 64 * 1024
|
||||||
|
|
||||||
|
|
||||||
class FilesystemDriver(storageutils.StorageDriver):
|
class FilesystemDriver(storageutils.StorageDriver):
|
||||||
def __init__(self, conf):
|
def __init__(self, conf):
|
||||||
self.root = conf['root']
|
self.root = conf['root']
|
||||||
@ -60,13 +63,33 @@ class FilesystemDriver(storageutils.StorageDriver):
|
|||||||
def stream_object(self, path):
|
def stream_object(self, path):
|
||||||
path = os.path.join(self.root, path)
|
path = os.path.join(self.root, path)
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
return None
|
return None, None
|
||||||
with open(path, 'rb') as f:
|
f = open(path, 'rb', buffering=DISK_CHUNK_SIZE)
|
||||||
while True:
|
try:
|
||||||
chunk = f.read(4096)
|
size = os.fstat(f.fileno()).st_size
|
||||||
if not chunk:
|
except OSError:
|
||||||
return
|
f.close()
|
||||||
yield chunk
|
raise
|
||||||
|
|
||||||
|
def data_iter(f=f):
|
||||||
|
with f:
|
||||||
|
yield b'' # will get discarded; see note below
|
||||||
|
yield from iter(lambda: f.read(DISK_CHUNK_SIZE), b'')
|
||||||
|
|
||||||
|
ret = data_iter()
|
||||||
|
# This looks a little funny, because it is. We're going to discard the
|
||||||
|
# empty bytes added at the start, but that's not the important part.
|
||||||
|
# We want to ensure that
|
||||||
|
#
|
||||||
|
# 1. the generator has started executing and
|
||||||
|
# 2. it left off *inside the with block*
|
||||||
|
#
|
||||||
|
# This ensures that when the generator gets cleaned up (either because
|
||||||
|
# everything went according to plan and the generator exited cleanly
|
||||||
|
# *or* there was an error which eventually raised a GeneratorExit),
|
||||||
|
# the file we opened will get closed.
|
||||||
|
next(ret)
|
||||||
|
return size, ret
|
||||||
|
|
||||||
def delete_object(self, path):
|
def delete_object(self, path):
|
||||||
path = os.path.join(self.root, path)
|
path = os.path.join(self.root, path)
|
||||||
|
@ -114,6 +114,7 @@ class RegistryAPI:
|
|||||||
return self.not_found()
|
return self.not_found()
|
||||||
res = cherrypy.response
|
res = cherrypy.response
|
||||||
res.headers['Docker-Content-Digest'] = digest
|
res.headers['Docker-Content-Digest'] = digest
|
||||||
|
res.headers['Content-Length'] = str(size)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@cherrypy.expose
|
@cherrypy.expose
|
||||||
@ -122,12 +123,14 @@ class RegistryAPI:
|
|||||||
def get_blob(self, repository, digest):
|
def get_blob(self, repository, digest):
|
||||||
namespace = self.get_namespace()
|
namespace = self.get_namespace()
|
||||||
self.log.info('Get blob %s %s', repository, digest)
|
self.log.info('Get blob %s %s', repository, digest)
|
||||||
size = self.storage.blob_size(namespace, digest)
|
size, data_iter = self.storage.stream_blob(namespace, digest)
|
||||||
if size is None:
|
if data_iter is None:
|
||||||
return self.not_found()
|
return self.not_found()
|
||||||
res = cherrypy.response
|
res = cherrypy.response
|
||||||
res.headers['Docker-Content-Digest'] = digest
|
res.headers['Docker-Content-Digest'] = digest
|
||||||
return self.storage.stream_blob(namespace, digest)
|
if size is not None:
|
||||||
|
res.headers['Content-Length'] = str(size)
|
||||||
|
return data_iter
|
||||||
|
|
||||||
@cherrypy.expose
|
@cherrypy.expose
|
||||||
@cherrypy.config(**{'tools.auth_basic.checkpassword': require_write})
|
@cherrypy.config(**{'tools.auth_basic.checkpassword': require_write})
|
||||||
|
@ -99,8 +99,8 @@ class StorageDriver(metaclass=ABCMeta):
|
|||||||
|
|
||||||
:arg str path: The object path.
|
:arg str path: The object path.
|
||||||
|
|
||||||
:returns: The contents of the object.
|
:returns: The size and contents of the object.
|
||||||
:rtype: generator of bytearray
|
:rtype: tuple of (int or None, generator-of-bytearray or None)
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@ import dateutil.parser
|
|||||||
from . import storageutils
|
from . import storageutils
|
||||||
|
|
||||||
POST_ATTEMPTS = 3
|
POST_ATTEMPTS = 3
|
||||||
|
SWIFT_CHUNK_SIZE = 64 * 1024
|
||||||
|
|
||||||
|
|
||||||
def retry_function(func):
|
def retry_function(func):
|
||||||
@ -125,8 +126,12 @@ class SwiftDriver(storageutils.StorageDriver):
|
|||||||
ret = retry_function(
|
ret = retry_function(
|
||||||
lambda: self.conn.session.get(self.get_url(path), stream=True))
|
lambda: self.conn.session.get(self.get_url(path), stream=True))
|
||||||
except keystoneauth1.exceptions.http.NotFound:
|
except keystoneauth1.exceptions.http.NotFound:
|
||||||
return None
|
return None, None
|
||||||
return ret.iter_content(chunk_size=4096)
|
try:
|
||||||
|
size = int(ret.headers.get('Content-Length', ''))
|
||||||
|
except ValueError:
|
||||||
|
size = None
|
||||||
|
return size, ret.iter_content(chunk_size=SWIFT_CHUNK_SIZE)
|
||||||
|
|
||||||
def delete_object(self, path):
|
def delete_object(self, path):
|
||||||
retry_function(
|
retry_function(
|
||||||
|
Loading…
Reference in New Issue
Block a user