From 7be52cae3a21bcfeb268572df12aa2e0bab9855a Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Tue, 2 Jan 2024 11:47:03 -0800 Subject: [PATCH] Don't upload empty final chunks An upload can be finished with a PUT with no body. If that happens, we send an empty chunk to our backend, which can end up using extra storage and potentially using multipart objects on object storage servers when not strictly necessary. Simplify this by checking to see if we are getting any data from the client before we start our push to the backend. Change-Id: Ia2069364ba2a5e03331f9eabca692b5aa065ce6e --- zuul_registry/storage.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/zuul_registry/storage.py b/zuul_registry/storage.py index 1fef65b..3492951 100644 --- a/zuul_registry/storage.py +++ b/zuul_registry/storage.py @@ -1,4 +1,5 @@ # Copyright 2019 Red Hat, Inc. +# Copyright 2024 Acme Gating, LLC # # This module is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -184,10 +185,8 @@ class Storage: upload = self._get_upload(namespace, uuid) path = os.path.join(namespace, 'uploads', uuid, str(upload.count + 1)) - streamer = UploadStreamer() - t = threading.Thread(target=self.backend.put_object, - args=(path, streamer, uuid)) - t.start() + streamer = None + thread = None size = 0 # This calculates the md5 of just this chunk for internal # integrity checking; it is not the overall hash of the layer @@ -201,14 +200,20 @@ class Storage: d = b'' if not d: break + if streamer is None: + streamer = UploadStreamer() + thread = threading.Thread(target=self.backend.put_object, + args=(path, streamer, uuid)) + thread.start() upload.hasher.update(d) chunk_hasher.update(d) size += len(d) streamer.write(d) - streamer.write(None) - t.join() - upload.chunks.append(dict(size=size, md5=chunk_hasher.hexdigest())) - self._update_upload(namespace, uuid, upload) + if streamer: + streamer.write(None) + thread.join() + upload.chunks.append(dict(size=size, md5=chunk_hasher.hexdigest())) + self._update_upload(namespace, uuid, upload) return upload.size - size, upload.size def store_upload(self, namespace, uuid, digest):