Don't upload empty final chunks

An upload can be finished with a PUT with no body.  If that happens,
we send an empty chunk to our backend, which can end up using
extra storage and potentially using multipart objects on object
storage servers when not strictly necessary.

Simplify this by checking to see if we are getting any data from the
client before we start our push to the backend.

Change-Id: Ia2069364ba2a5e03331f9eabca692b5aa065ce6e
This commit is contained in:
James E. Blair 2024-01-02 11:47:03 -08:00
parent 9c9395df2c
commit 7be52cae3a

View File

@ -1,4 +1,5 @@
# Copyright 2019 Red Hat, Inc.
# Copyright 2024 Acme Gating, LLC
#
# This module is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -184,10 +185,8 @@ class Storage:
upload = self._get_upload(namespace, uuid)
path = os.path.join(namespace, 'uploads', uuid, str(upload.count + 1))
streamer = UploadStreamer()
t = threading.Thread(target=self.backend.put_object,
args=(path, streamer, uuid))
t.start()
streamer = None
thread = None
size = 0
# This calculates the md5 of just this chunk for internal
# integrity checking; it is not the overall hash of the layer
@ -201,12 +200,18 @@ class Storage:
d = b''
if not d:
break
if streamer is None:
streamer = UploadStreamer()
thread = threading.Thread(target=self.backend.put_object,
args=(path, streamer, uuid))
thread.start()
upload.hasher.update(d)
chunk_hasher.update(d)
size += len(d)
streamer.write(d)
if streamer:
streamer.write(None)
t.join()
thread.join()
upload.chunks.append(dict(size=size, md5=chunk_hasher.hexdigest()))
self._update_upload(namespace, uuid, upload)
return upload.size - size, upload.size