Merge "Add debug/verification for uploads"

This commit is contained in:
Zuul 2020-04-01 21:22:23 +00:00 committed by Gerrit Code Review
commit 033b8a89a5
4 changed files with 128 additions and 34 deletions

View File

@ -43,7 +43,7 @@ class FilesystemDriver(storageutils.StorageDriver):
return None return None
return os.stat(path).st_size return os.stat(path).st_size
def put_object(self, path, data): def put_object(self, path, data, uuid=None):
path = os.path.join(self.root, path) path = os.path.join(self.root, path)
os.makedirs(os.path.dirname(path), exist_ok=True) os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as f: with open(path, 'wb') as f:
@ -99,26 +99,26 @@ class FilesystemDriver(storageutils.StorageDriver):
else: else:
os.unlink(path) os.unlink(path)
def move_object(self, src_path, dst_path): def move_object(self, src_path, dst_path, uuid=None):
src_path = os.path.join(self.root, src_path) src_path = os.path.join(self.root, src_path)
dst_path = os.path.join(self.root, dst_path) dst_path = os.path.join(self.root, dst_path)
os.makedirs(os.path.dirname(dst_path), exist_ok=True) os.makedirs(os.path.dirname(dst_path), exist_ok=True)
os.rename(src_path, dst_path) os.rename(src_path, dst_path)
def cat_objects(self, path, chunks): def cat_objects(self, path, chunks, uuid=None):
path = os.path.join(self.root, path) path = os.path.join(self.root, path)
os.makedirs(os.path.dirname(path), exist_ok=True) os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'wb') as outf: with open(path, 'wb') as outf:
for chunk_path in chunks: for chunk in chunks:
chunk_path = os.path.join(self.root, chunk_path) chunk_path = os.path.join(self.root, chunk['path'])
with open(chunk_path, 'rb') as inf: with open(chunk_path, 'rb') as inf:
while True: while True:
d = inf.read(4096) d = inf.read(4096)
if not d: if not d:
break break
outf.write(d) outf.write(d)
for chunk_path in chunks: for chunk in chunks:
chunk_path = os.path.join(self.root, chunk_path) chunk_path = os.path.join(self.root, chunk['path'])
os.unlink(chunk_path) os.unlink(chunk_path)

View File

@ -196,8 +196,8 @@ class RegistryAPI:
namespace, repository = self.get_namespace(repository) namespace, repository = self.get_namespace(repository)
method = cherrypy.request.method method = cherrypy.request.method
uuid = self.storage.start_upload(namespace) uuid = self.storage.start_upload(namespace)
self.log.info('Start upload %s %s %s uuid %s digest %s', self.log.info('[u: %s] Start upload %s %s %s digest %s',
method, namespace, repository, uuid, digest) uuid, method, namespace, repository, digest)
res = cherrypy.response res = cherrypy.response
res.headers['Location'] = '/v2/%s/blobs/uploads/%s' % ( res.headers['Location'] = '/v2/%s/blobs/uploads/%s' % (
orig_repository, uuid) orig_repository, uuid)
@ -210,7 +210,8 @@ class RegistryAPI:
def upload_chunk(self, repository, uuid): def upload_chunk(self, repository, uuid):
orig_repository = repository orig_repository = repository
namespace, repository = self.get_namespace(repository) namespace, repository = self.get_namespace(repository)
self.log.info('Upload chunk %s %s %s', namespace, repository, uuid) self.log.info('[u: %s] Upload chunk %s %s',
uuid, namespace, repository)
old_length, new_length = self.storage.upload_chunk( old_length, new_length = self.storage.upload_chunk(
namespace, uuid, cherrypy.request.body) namespace, uuid, cherrypy.request.body)
res = cherrypy.response res = cherrypy.response
@ -220,17 +221,22 @@ class RegistryAPI:
res.headers['Range'] = '0-%s' % (new_length,) res.headers['Range'] = '0-%s' % (new_length,)
res.status = '204 No Content' res.status = '204 No Content'
self.log.info( self.log.info(
'Finish Upload chunk %s %s %s', repository, uuid, new_length) '[u: %s] Finish Upload chunk %s %s', uuid, repository, new_length)
@cherrypy.expose @cherrypy.expose
@cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE}) @cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE})
def finish_upload(self, repository, uuid, digest): def finish_upload(self, repository, uuid, digest):
orig_repository = repository orig_repository = repository
namespace, repository = self.get_namespace(repository) namespace, repository = self.get_namespace(repository)
self.log.info('Finish upload %s %s %s', namespace, repository, uuid) self.log.info('[u: %s] Upload final chunk %s %s',
uuid, namespace, repository)
old_length, new_length = self.storage.upload_chunk( old_length, new_length = self.storage.upload_chunk(
namespace, uuid, cherrypy.request.body) namespace, uuid, cherrypy.request.body)
self.log.debug('[u: %s] Store upload %s %s',
uuid, namespace, repository)
self.storage.store_upload(namespace, uuid, digest) self.storage.store_upload(namespace, uuid, digest)
self.log.info('[u: %s] Upload complete %s %s',
uuid, namespace, repository)
res = cherrypy.response res = cherrypy.response
res.headers['Location'] = '/v2/%s/blobs/%s' % (orig_repository, digest) res.headers['Location'] = '/v2/%s/blobs/%s' % (orig_repository, digest)
res.headers['Docker-Content-Digest'] = digest res.headers['Docker-Content-Digest'] = digest
@ -409,18 +415,20 @@ def main():
help='Command: serve, prune', help='Command: serve, prune',
default='serve') default='serve')
args = parser.parse_args() args = parser.parse_args()
logformat = '%(levelname)s %(name)s: %(message)s' logformat = '%(asctime)s %(levelname)s %(name)s: %(message)s'
if args.debug or os.environ.get('DEBUG') == '1': if args.debug or os.environ.get('DEBUG') == '1':
logging.basicConfig(level=logging.DEBUG, format=logformat) logging.basicConfig(level=logging.DEBUG, format=logformat)
logging.getLogger("openstack").setLevel(logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.DEBUG)
logging.getLogger("requests").setLevel(logging.DEBUG)
else: else:
logging.basicConfig(level=logging.INFO, format=logformat) logging.basicConfig(level=logging.INFO, format=logformat)
logging.getLogger("openstack").setLevel(logging.INFO)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
cherrypy.log.access_log.propagate = False cherrypy.log.access_log.propagate = False
logging.getLogger("requests").setLevel(logging.DEBUG)
logging.getLogger("keystoneauth").setLevel(logging.ERROR) logging.getLogger("keystoneauth").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.DEBUG) logging.getLogger("stevedore").setLevel(logging.ERROR)
logging.getLogger("stevedore").setLevel(logging.INFO)
logging.getLogger("openstack").setLevel(logging.DEBUG)
# cherrypy.log.error_log.propagate = False
s = RegistryServer(args.config) s = RegistryServer(args.config)
if args.command == 'serve': if args.command == 'serve':

View File

@ -19,6 +19,7 @@ import logging
import os import os
import queue import queue
import rehash import rehash
import hashlib
import threading import threading
import time import time
from uuid import uuid4 from uuid import uuid4
@ -167,7 +168,9 @@ class Storage:
def _update_upload(self, namespace, uuid, upload): def _update_upload(self, namespace, uuid, upload):
path = os.path.join(namespace, 'uploads', uuid, 'metadata') path = os.path.join(namespace, 'uploads', uuid, 'metadata')
self.backend.put_object(path, upload.dump()) self.log.debug("[u: %s] Update upload metadata chunks: %s",
uuid, upload.chunks)
self.backend.put_object(path, upload.dump(), uuid)
def upload_chunk(self, namespace, uuid, fp): def upload_chunk(self, namespace, uuid, fp):
"""Add a chunk to an upload. """Add a chunk to an upload.
@ -188,9 +191,13 @@ class Storage:
path = os.path.join(namespace, 'uploads', uuid, str(upload.count + 1)) path = os.path.join(namespace, 'uploads', uuid, str(upload.count + 1))
streamer = UploadStreamer() streamer = UploadStreamer()
t = threading.Thread(target=self.backend.put_object, t = threading.Thread(target=self.backend.put_object,
args=(path, streamer)) args=(path, streamer, uuid))
t.start() t.start()
size = 0 size = 0
# This calculates the md5 of just this chunk for internal
# integrity checking; it is not the overall hash of the layer
# (that's a running calculation in the upload record).
chunk_hasher = hashlib.md5()
while True: while True:
try: try:
d = fp.read(4096) d = fp.read(4096)
@ -200,11 +207,12 @@ class Storage:
if not d: if not d:
break break
upload.hasher.update(d) upload.hasher.update(d)
chunk_hasher.update(d)
size += len(d) size += len(d)
streamer.write(d) streamer.write(d)
streamer.write(None) streamer.write(None)
t.join() t.join()
upload.chunks.append(dict(size=size)) upload.chunks.append(dict(size=size, md5=chunk_hasher.hexdigest()))
self._update_upload(namespace, uuid, upload) self._update_upload(namespace, uuid, upload)
return upload.size - size, upload.size return upload.size - size, upload.size
@ -223,14 +231,15 @@ class Storage:
# Move the chunks into the blob dir to get them out of the # Move the chunks into the blob dir to get them out of the
# uploads dir. # uploads dir.
chunks = [] chunks = []
for i in range(1, upload.count + 1): for i, chunk in enumerate(upload.chunks):
src_path = os.path.join(namespace, 'uploads', uuid, str(i)) src_path = os.path.join(namespace, 'uploads', uuid, str(i + 1))
dst_path = os.path.join(namespace, 'blobs', digest, str(i)) dst_path = os.path.join(namespace, 'blobs', digest, str(i + 1))
chunks.append(dst_path) chunks.append(dict(path=dst_path,
self.backend.move_object(src_path, dst_path) md5=chunk['md5'], size=chunk['size']))
self.backend.move_object(src_path, dst_path, uuid)
# Concatenate the chunks into one blob. # Concatenate the chunks into one blob.
path = os.path.join(namespace, 'blobs', digest, 'data') path = os.path.join(namespace, 'blobs', digest, 'data')
self.backend.cat_objects(path, chunks) self.backend.cat_objects(path, chunks, uuid)
path = os.path.join(namespace, 'uploads', uuid, 'metadata') path = os.path.join(namespace, 'uploads', uuid, 'metadata')
self.backend.delete_object(path) self.backend.delete_object(path)

View File

@ -94,7 +94,7 @@ class SwiftDriver(storageutils.StorageDriver):
return None return None
return ret.headers['Content-Length'] return ret.headers['Content-Length']
def put_object(self, path, data): def put_object(self, path, data, uuid=None):
name = None name = None
try: try:
with tempfile.NamedTemporaryFile('wb', delete=False) as f: with tempfile.NamedTemporaryFile('wb', delete=False) as f:
@ -109,6 +109,24 @@ class SwiftDriver(storageutils.StorageDriver):
self.container_name, self.container_name,
path, path,
filename=name)) filename=name))
# Get the md5sum and size of the object, and make sure it
# matches the upload.
ret = retry_function(lambda: self.conn.session.head(
self.get_url(path)))
try:
size = int(ret.headers.get('Content-Length', ''))
except ValueError:
size = None
md5 = ret.headers.get('Etag', '')
sdk_md5 = ret.headers.get('X-Object-Meta-X-Sdk-Md5', '')
self.log.debug("[u: %s] Upload object %s "
"md5: %s sdkmd5: %s size: %s",
uuid, path, md5, sdk_md5, size)
if md5 != sdk_md5:
raise Exception("Swift and SDK md5s did not match (u: %s)" %
uuid)
finally: finally:
if name: if name:
os.unlink(name) os.unlink(name)
@ -138,28 +156,87 @@ class SwiftDriver(storageutils.StorageDriver):
lambda: self.conn.session.delete( lambda: self.conn.session.delete(
self.get_url(path))) self.get_url(path)))
def move_object(self, src_path, dst_path): def move_object(self, src_path, dst_path, uuid=None):
dst = os.path.join(self.container_name, dst_path) dst = os.path.join(self.container_name, dst_path)
# Get the md5sum and size of the object, and make sure it
# matches on both sides of the copy.
ret = retry_function(lambda: self.conn.session.head(
self.get_url(src_path)))
try:
size = int(ret.headers.get('Content-Length', ''))
except ValueError:
size = None
md5 = ret.headers.get('Etag', '')
sdk_md5 = ret.headers.get('X-Object-Meta-X-Sdk-Md5', '')
old_md = dict(md5=md5, sdk_md5=sdk_md5, size=size)
self.log.debug("[u: %s] Move object %s %s %s",
uuid, src_path, dst_path, old_md)
if md5 != sdk_md5:
raise Exception("Swift and SDK md5s did not match at start "
"of copy (u: %s) %s" % (uuid, old_md))
# FIXME: The multipart-manifest argument below means that in
# the event this docker chunk is a large object, we intend to
# copy the manifest but not the underlying large object
# segments. That seems incorrect, and we should actually just
# recast the large object segments into docker chunks and
# discard this manifest. But first we should verify that's
# what's happening -- it's not clear we ever hit a segment
# limit in practice, so we may never have a large object
# chunk.
retry_function( retry_function(
lambda: self.conn.session.request( lambda: self.conn.session.request(
self.get_url(src_path) + "?multipart-manfest=get", self.get_url(src_path) + "?multipart-manfest=get",
'COPY', 'COPY',
headers={'Destination': dst} headers={'Destination': dst}
)) ))
# Get the md5sum and size of the object, and make sure it
# matches on both sides of the copy.
ret = retry_function(lambda: self.conn.session.head(
self.get_url(dst_path)))
try:
size = int(ret.headers.get('Content-Length', ''))
except ValueError:
size = None
md5 = ret.headers.get('Etag', '')
sdk_md5 = ret.headers.get('X-Object-Meta-X-Sdk-Md5', '')
new_md = dict(md5=md5, sdk_md5=sdk_md5, size=size)
self.log.debug("[u: %s] Moved object %s %s %s",
uuid, src_path, dst_path, new_md)
if md5 != sdk_md5:
raise Exception("Swift and SDK md5s did not match at end of copy "
"(u: %s) %s" % (uuid, new_md))
if old_md != new_md:
raise Exception("Object metadata did not match after copy "
"(u: %s) old: %s new: %s" % (uuid, old_md, new_md))
retry_function( retry_function(
lambda: self.conn.session.delete( lambda: self.conn.session.delete(
self.get_url(src_path))) self.get_url(src_path)))
def cat_objects(self, path, chunks): def cat_objects(self, path, chunks, uuid=None):
manifest = [] manifest = []
# TODO: Would it be better to move 1-chunk objects? # TODO: Would it be better to move 1-chunk objects?
for chunk_path in chunks: for chunk in chunks:
ret = retry_function( ret = retry_function(
lambda: self.conn.session.head(self.get_url(chunk_path))) lambda: self.conn.session.head(self.get_url(chunk['path'])))
if int(ret.headers['Content-Length']) == 0: size = int(ret.headers['Content-Length'])
if size == 0:
continue continue
etag = ret.headers['Etag']
sdk_md5 = ret.headers['X-Object-Meta-X-Sdk-Md5']
if not (sdk_md5 == etag == chunk['md5']):
raise Exception("Object metadata did not match during cat "
"(u: %s) orig: %s sdk: %s etag: %s" % (
uuid, chunk['md5'], sdk_md5, etag))
if not (size == chunk['size']):
raise Exception("Object metadata did not match during cat "
"(u: %s) orig: %s size: %s" % (
uuid, chunk['size'], size))
manifest.append({'path': manifest.append({'path':
os.path.join(self.container_name, chunk_path), os.path.join(self.container_name, chunk['path']),
'etag': ret.headers['Etag'], 'etag': ret.headers['Etag'],
'size_bytes': ret.headers['Content-Length']}) 'size_bytes': ret.headers['Content-Length']})
retry_function(lambda: retry_function(lambda: