zuul-registry/zuul_registry/main.py
Ian Wienand 6439b16c04 Fix manifest HEAD response when looking up by label
Prior change Iaf6b83b61a470e8c5a61d18b7c241f9ca002c08a contained a bug
that and was returning an blank Content-Length for manifest HEAD
queries.  For this case, we have already read() the blob so we can
just return it's length for the HEAD query.

(the extant code was wrong for this case and self.storage.blob_size
was returning None, which got turned into 0 silently.  This leads to
the client reporting:

 failed commit on ref "manifest-sha256:ad38c...": unexpected commit digest
 sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,
 expected  sha256:ad38c... failed precondition

I put this in for anyone googling who is seeing that e3b0c4... hash;
it's a clue that somehow you're trying to hash something empty.  This
slipped through because I tested pulling images, which worked, but it
fails when pulled in by docker build).

Change-Id: I7218843879ecd7939730060b6cee18af8190b888
2021-09-17 16:32:51 +10:00

589 lines
24 KiB
Python

# Copyright 2019 Red Hat, Inc.
# Copyright 2021 Acme Gating, LLC
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this software. If not, see <http://www.gnu.org/licenses/>.
import argparse
import base64
import os
import sys
import logging
import cherrypy
import hashlib
import json
import typing
import functools
import yaml
from . import filesystem
from . import storage
from . import swift
import jwt
DRIVERS = {
'filesystem': filesystem.Driver,
'swift': swift.Driver,
}
class Authorization(cherrypy.Tool):
log = logging.getLogger("registry.authz")
READ = 'read'
WRITE = 'write'
AUTH = 'auth'
def __init__(self, secret, users, public_url):
self.secret = secret
self.public_url = public_url
self.rw = {}
self.ro = {}
self.anonymous_read = True
for user in users:
if user['access'] == self.WRITE:
self.rw[user['name']] = user['pass']
if user['access'] == self.READ:
self.ro[user['name']] = user['pass']
self.anonymous_read = False
if self.anonymous_read:
self.log.info("Anonymous read access enabled")
else:
self.log.info("Anonymous read access disabled")
cherrypy.Tool.__init__(self, 'before_handler',
self.check_auth,
priority=1)
def check(self, store, user, password):
if user not in store:
return False
return store[user] == password
def unauthorized(self, scope):
cherrypy.response.headers['www-authenticate'] = (
'Bearer realm="%s/auth/token",scope="%s"' % (
self.public_url, scope)
)
raise cherrypy.HTTPError(401, 'Authentication required')
def check_auth(self, level=READ):
auth_header = cherrypy.request.headers.get('authorization')
if auth_header and 'Bearer' in auth_header:
token = auth_header.split()[1]
payload = jwt.decode(token, 'secret', algorithms=['HS256'])
if payload.get('level') in [level, self.WRITE]:
self.log.debug('Auth ok %s', level)
return
self.log.debug('Unauthorized %s', level)
self.unauthorized(level)
def _get_level(self, scope):
level = None
if not isinstance(scope, list):
scope = scope.split(' ')
for resource_scope in scope:
parts = resource_scope.split(':')
if parts[0] == 'repository' and 'push' in parts[2]:
level = self.WRITE
if (parts[0] == 'repository' and 'pull' in parts[2]
and level is None):
level = self.READ
if level is None:
if self.anonymous_read:
# No scope was provided, so this is an authentication
# request; treat it as requesting 'write' access so
# that we validate the password.
level = self.WRITE
else:
level = self.READ
return level
@cherrypy.expose
@cherrypy.tools.json_out(content_type='application/json; charset=utf-8')
def token(self, **kw):
# If the scope of the token requested is for pushing an image,
# that corresponds to 'write' level access, so we verify the
# password.
#
# If the scope of the token is not specified, we treat it as
# 'write' since it probably means the client is performing
# login validation. The _get_level method takes care of that.
#
# If the scope requested is for pulling an image, we always
# grant a read-level token. This covers the case where no
# authentication credentials are supplied, and also an
# interesting edge case: the docker client, when configured
# with a registry mirror, will, bless it's little heart, send
# the *docker hub* credentials to that mirror. In order for
# us to act as a a stand-in for docker hub, we need to accept
# those credentials.
auth_header = cherrypy.request.headers.get('authorization')
level = self._get_level(kw.get('scope', ''))
self.log.info('Authenticate level %s', level)
if level == self.WRITE:
self._check_creds(auth_header, [self.rw], level)
elif level == self.READ and not self.anonymous_read:
self._check_creds(auth_header, [self.rw, self.ro], level)
# If we permit anonymous read and we're requesting read, no
# check is performed.
self.log.debug('Generate %s token', level)
token = jwt.encode({'level': level}, 'secret', algorithm='HS256')
return {'token': token,
'access_token': token}
def _check_creds(self, auth_header, credstores, level):
# If the password is okay, fall through; otherwise call
# unauthorized for the side effect of raising an exception.
if auth_header and 'Basic' in auth_header:
cred = auth_header.split()[1]
cred = base64.decodebytes(cred.encode('utf8')).decode('utf8')
user, pw = cred.split(':', 1)
# Return true on the first credstore with the user, false otherwise
if not next(filter(
lambda cs: self.check(cs, user, pw), credstores), False):
self.unauthorized(level)
else:
self.unauthorized(level)
class RegistryAPI:
"""Registry API server.
Implements the container registry protocol as documented in
https://docs.docker.com/registry/spec/api/
"""
log = logging.getLogger("registry.api")
DEFAULT_NAMESPACE = '_local'
# A list of content types ordered by preference. Manifest lists
# come first so that multi-arch builds are supported.
CONTENT_TYPES = [
'application/vnd.docker.distribution.manifest.list.v2+json',
'application/vnd.oci.image.index.v1+json',
'application/vnd.docker.distribution.manifest.v2+json',
'application/vnd.oci.image.manifest.v1+json',
]
def __init__(self, store, namespaced, authz, conf):
self.storage = store
self.authz = authz
self.namespaced = namespaced
self.conf = conf
def get_namespace(self, repository):
if not self.namespaced:
return (self.DEFAULT_NAMESPACE, repository)
parts = repository.split('/')
return (parts[0], '/'.join(parts[1:]))
def not_found(self):
raise cherrypy.HTTPError(404)
@cherrypy.expose
@cherrypy.tools.json_out(content_type='application/json; charset=utf-8')
def version_check(self):
self.log.info('Version check')
return {'version': '1.0'}
res = cherrypy.response
res.headers['Distribution-API-Version'] = 'registry/2.0'
@cherrypy.expose
# By default CherryPy will try to encode the body/add a charset to
# headers if the response type is text/*. However, since it's
# changing unicode things that may alter the body length, CherryPy
# deletes the Content-Length so that the framework will
# automatically re-caclulate it when the response is sent.
#
# This poses a problem for blob HEAD requests which return a blank
# body -- we don't really have a Content-Type for a blank body so
# it defaults to text/html and goes into the charset detection
# path where the Content-Length set would get set to zero.
# Clients handle this in different and confusing ways; doing
# things sending back invalid manifests several steps later.
#
# Disabling the add_charset tool here is important to avoid this
# behaviour and send a correct Content-Length.
@cherrypy.config(**{'tools.encode.add_charset': False})
def head_blob(self, repository, digest):
namespace, repository = self.get_namespace(repository)
size = self.storage.blob_size(namespace, digest)
if size is None:
self.log.info('Head blob %s %s %s not found',
namespace, repository, digest)
return self.not_found()
self.log.info('Head blob %s %s %s size %s',
namespace, repository, digest, size)
res = cherrypy.response
res.headers['Docker-Content-Digest'] = digest
res.headers['Content-Length'] = str(size)
return ''
@cherrypy.expose
@cherrypy.config(**{'response.stream': True})
def get_blob(self, repository, digest, ns=None):
# The ns parameter is supplied by some image clients (like the one
# found in buildx). We specify it here so that cherrypy doesn't 404
# when receiving that parameter, but we ignore it otherwise.
namespace, repository = self.get_namespace(repository)
self.log.info('Get blob %s %s %s', namespace, repository, digest)
size, data_iter = self.storage.stream_blob(namespace, digest)
if data_iter is None:
return self.not_found()
res = cherrypy.response
res.headers['Docker-Content-Digest'] = digest
res.headers['Content-Type'] = 'application/octet-stream'
if size is not None:
res.headers['Content-Length'] = str(size)
return data_iter
@cherrypy.expose
@cherrypy.tools.json_out(content_type='application/json; charset=utf-8')
def get_tags(self, repository):
namespace, repository = self.get_namespace(repository)
self.log.info('Get tags %s %s', namespace, repository)
tags = self.storage.list_tags(namespace, repository)
return {'name': repository,
'tags': [t.name for t in tags]}
@cherrypy.expose
@cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE})
def start_upload(self, repository, digest=None):
orig_repository = repository
namespace, repository = self.get_namespace(repository)
method = cherrypy.request.method
uuid = self.storage.start_upload(namespace)
self.log.info('[u: %s] Start upload %s %s %s digest %s',
uuid, method, namespace, repository, digest)
res = cherrypy.response
res.headers['Location'] = '/v2/%s/blobs/uploads/%s' % (
orig_repository, uuid)
res.headers['Docker-Upload-UUID'] = uuid
res.headers['Range'] = '0-0'
res.headers['Content-Length'] = '0'
res.status = '202 Accepted'
@cherrypy.expose
@cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE})
def upload_chunk(self, repository, uuid):
orig_repository = repository
namespace, repository = self.get_namespace(repository)
self.log.info('[u: %s] Upload chunk %s %s',
uuid, namespace, repository)
old_length, new_length = self.storage.upload_chunk(
namespace, uuid, cherrypy.request.body)
res = cherrypy.response
res.headers['Location'] = '/v2/%s/blobs/uploads/%s' % (
orig_repository, uuid)
res.headers['Docker-Upload-UUID'] = uuid
res.headers['Content-Length'] = '0'
# Be careful to not be off-by-one, range starts at 0
res.headers['Range'] = '0-%s' % (new_length - 1,)
res.status = '202 Accepted'
self.log.info(
'[u: %s] Finish Upload chunk %s %s', uuid, repository, new_length)
@cherrypy.expose
@cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE})
def finish_upload(self, repository, uuid, digest):
orig_repository = repository
namespace, repository = self.get_namespace(repository)
self.log.info('[u: %s] Upload final chunk %s %s digest %s',
uuid, namespace, repository, digest)
old_length, new_length = self.storage.upload_chunk(
namespace, uuid, cherrypy.request.body)
self.log.debug('[u: %s] Store upload %s %s',
uuid, namespace, repository)
self.storage.store_upload(namespace, uuid, digest)
self.log.info('[u: %s] Upload complete %s %s digest %s',
uuid, namespace, repository, digest)
res = cherrypy.response
res.headers['Location'] = '/v2/%s/blobs/%s' % (orig_repository, digest)
res.headers['Docker-Content-Digest'] = digest
res.headers['Content-Range'] = '%s-%s' % (old_length, new_length)
res.headers['Content-Length'] = '0'
res.status = '201 Created'
def _validate_manifest(self, namespace, request):
body = request.body.read()
content_type = request.headers.get('Content-Type')
# Only v2 manifests are validated
if (content_type !=
'application/vnd.docker.distribution.manifest.v2+json'):
return body
data = json.loads(body)
# We should not be missing a size in the manifest. At one
# point we did accept this but it turned out to be related to
# zuul-registry returning invalid data in HEAD requests.
if 'size' not in data['config']:
msg = ('Manifest missing size attribute, can not create')
raise cherrypy.HTTPError(400, msg)
# Validate layer sizes
for layer in data['layers']:
digest = layer['digest']
actual_size = self.storage.blob_size(namespace, digest)
if 'size' not in layer:
msg = ('Client push error: layer %s missing size ' % digest)
raise cherrypy.HTTPError(400, msg)
size = layer['size']
if size == actual_size:
continue
msg = ("Manifest has invalid size for layer %s "
"(size:%d actual:%d)" % (digest, size, actual_size))
self.log.error(msg)
# We don't delete layers here as they may be used by
# different images with valid manifests. Return an error to
# the client so it can try again.
raise cherrypy.HTTPError(400, msg)
return body
@cherrypy.expose
@cherrypy.config(**{'tools.check_auth.level': Authorization.WRITE})
def put_manifest(self, repository, ref):
namespace, repository = self.get_namespace(repository)
body = self._validate_manifest(namespace, cherrypy.request)
hasher = hashlib.sha256()
hasher.update(body)
digest = 'sha256:' + hasher.hexdigest()
self.log.info('Put manifest %s %s %s digest %s',
namespace, repository, ref, digest)
self.storage.put_blob(namespace, digest, body)
manifest = self.storage.get_manifest(namespace, repository, ref)
if manifest is None:
manifest = {}
else:
manifest = json.loads(manifest)
manifest[cherrypy.request.headers['Content-Type']] = digest
self.storage.put_manifest(
namespace, repository, ref, json.dumps(manifest).encode('utf8'))
res = cherrypy.response
res.headers['Location'] = '/v2/%s/manifests/%s' % (repository, ref)
res.headers['Docker-Content-Digest'] = digest
res.status = '201 Created'
@cherrypy.expose
# see prior note; this avoids destroying Content-Length on HEAD requests
@cherrypy.config(**{'tools.encode.add_charset': False})
def get_manifest(self, repository, ref, ns=None):
# The ns parameter is supplied by some image clients (like the one
# found in buildx). We specify it here so that cherrypy doesn't 404
# when receiving that parameter, but we ignore it otherwise.
namespace, repository = self.get_namespace(repository)
method = cherrypy.request.method
headers = cherrypy.request.headers
res = cherrypy.response
self.log.info(
'%s manifest %s %s %s', method, namespace, repository, ref)
if ref.startswith('sha256:'):
manifest = self.storage.get_blob(namespace, ref)
if manifest is None:
self.log.error('Manifest %s %s not found', repository, ref)
return self.not_found()
res.headers['Content-Type'] = json.loads(manifest)['mediaType']
res.headers['Docker-Content-Digest'] = ref
if method == 'HEAD':
# HEAD requests just return a blank body with the size
# of the manifest in Content-Length
size = self.storage.blob_size(namespace, ref)
res.headers['Content-Length'] = size
return ''
return manifest
# looking up by tag
manifest = self.storage.get_manifest(namespace, repository, ref)
if manifest is None:
manifest = {}
else:
manifest = json.loads(manifest)
accept = [x.strip() for x in headers['Accept'].split(',')]
# Resort content types by ones that we know about in our
# preference order, followed by ones we don't know about in
# the original order.
content_types = ([h for h in self.CONTENT_TYPES if h in accept] +
[h for h in accept if h not in self.CONTENT_TYPES])
for ct in content_types:
if ct in manifest:
self.log.debug('Manifest %s %s digest found %s',
repository, ref, manifest[ct])
data = self.storage.get_blob(namespace, manifest[ct])
if not data:
self.log.error(
'Blob %s %s not found', namespace, manifest[ct])
return self.not_found()
res.headers['Content-Type'] = ct
hasher = hashlib.sha256()
hasher.update(data)
self.log.debug('Retrieved sha256 %s', hasher.hexdigest())
res.headers['Docker-Content-Digest'] = manifest[ct]
if method == 'HEAD':
# See comment above about head response
res.headers['Content-Length'] = len(data)
return ''
return data
self.log.error('Manifest %s %s not found', repository, ref)
return self.not_found()
class RegistryServer:
log = logging.getLogger("registry.server")
def __init__(self, config_path):
self.log.info("Loading config from %s", config_path)
self.conf = RegistryServer.load_config(
config_path, os.environ)['registry']
# TODO: pyopenssl?
if 'tls-key' in self.conf:
cherrypy.server.ssl_module = 'builtin'
cherrypy.server.ssl_certificate = self.conf['tls-cert']
cherrypy.server.ssl_private_key = self.conf['tls-key']
driver = self.conf['storage']['driver']
backend = DRIVERS[driver](self.conf['storage'])
self.store = storage.Storage(backend, self.conf['storage'])
authz = Authorization(self.conf['secret'], self.conf['users'],
self.conf['public-url'])
route_map = cherrypy.dispatch.RoutesDispatcher()
api = RegistryAPI(self.store,
False,
authz,
self.conf)
cherrypy.tools.check_auth = authz
route_map.connect('api', '/v2/',
controller=api, action='version_check')
route_map.connect('api', '/v2/{repository:.*}/blobs/uploads/',
controller=api, action='start_upload')
route_map.connect('api', '/v2/{repository:.*}/blobs/uploads/{uuid}',
conditions=dict(method=['PATCH']),
controller=api, action='upload_chunk')
route_map.connect('api', '/v2/{repository:.*}/blobs/uploads/{uuid}',
conditions=dict(method=['PUT']),
controller=api, action='finish_upload')
route_map.connect('api', '/v2/{repository:.*}/manifests/{ref}',
conditions=dict(method=['PUT']),
controller=api, action='put_manifest')
route_map.connect('api', '/v2/{repository:.*}/manifests/{ref}',
conditions=dict(method=['GET', 'HEAD']),
controller=api, action='get_manifest')
route_map.connect('api', '/v2/{repository:.*}/blobs/{digest}',
conditions=dict(method=['HEAD']),
controller=api, action='head_blob')
route_map.connect('api', '/v2/{repository:.*}/blobs/{digest}',
conditions=dict(method=['GET']),
controller=api, action='get_blob')
route_map.connect('api', '/v2/{repository:.*}/tags/list',
conditions=dict(method=['GET']),
controller=api, action='get_tags')
route_map.connect('authz', '/auth/token',
controller=authz, action='token')
conf = {
'/': {
'request.dispatch': route_map,
'tools.check_auth.on': True,
},
'/auth': {
'tools.check_auth.on': False,
}
}
cherrypy.config.update({
'global': {
'environment': 'production',
'server.max_request_body_size': 1e12,
'server.socket_host': self.conf['address'],
'server.socket_port': self.conf['port'],
},
})
cherrypy.tree.mount(api, '/', config=conf)
@staticmethod
def load_config(path: str, env: typing.Dict[str, str]) -> typing.Any:
"""Replace path content value of the form %(ZUUL_ENV_NAME) with environment,
Then return the yaml load result"""
with open(path) as f:
return yaml.safe_load(functools.reduce(
lambda config, env_item: config.replace(
f"%({env_item[0]})", env_item[1]),
[(k, v) for k, v in env.items() if k.startswith('ZUUL_')],
f.read()
))
@property
def port(self):
return cherrypy.server.bound_addr[1]
def start(self):
self.log.info("Registry starting")
cherrypy.engine.start()
def stop(self):
self.log.info("Registry stopping")
cherrypy.engine.exit()
# Not strictly necessary, but without this, if the server is
# started again (e.g., in the unit tests) it will reuse the
# same host/port settings.
cherrypy.server.httpserver = None
def prune(self):
self.store.prune()
def main():
parser = argparse.ArgumentParser(
description='Zuul registry server')
parser.add_argument('-c', dest='config',
help='Config file path',
default='/conf/registry.yaml')
parser.add_argument('-d', dest='debug',
help='Debug log level',
action='store_true')
parser.add_argument('command',
nargs='?',
help='Command: serve, prune',
default='serve')
args = parser.parse_args()
logformat = '%(asctime)s %(levelname)s %(name)s: %(message)s'
if args.debug or os.environ.get('DEBUG') == '1':
logging.basicConfig(level=logging.DEBUG, format=logformat)
logging.getLogger("openstack").setLevel(logging.DEBUG)
logging.getLogger("urllib3").setLevel(logging.DEBUG)
logging.getLogger("requests").setLevel(logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO, format=logformat)
logging.getLogger("openstack").setLevel(logging.INFO)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
cherrypy.log.access_log.propagate = False
logging.getLogger("keystoneauth").setLevel(logging.ERROR)
logging.getLogger("stevedore").setLevel(logging.ERROR)
s = RegistryServer(args.config)
if args.command == 'serve':
s.start()
cherrypy.engine.block()
elif args.command == 'prune':
s.prune()
else:
print("Unknown command: %s", args.command)
sys.exit(1)