From 7d70e05aeb6127b234d147102bb6f5500295f9ca Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Tue, 13 Nov 2012 08:32:38 -0500 Subject: [PATCH] Refactor DiskFile to hide temp file names and exts This set of changes reworks the DiskFile class to remove the "extension" parameter from the put() method, offering the new put_metadata() method with an optional tombstone keyword boolean, and changes the mkstemp method to only return the file descriptor. Reviewing the code it was found that the temporary file name created as a result of calling DiskFile.mkstemp() was never used by the caller, but the caller was responsible for passing it back to the DiskFile.put() method. That seems like too much information is exposed to the caller, when all the caller requires is the file descriptor to write data into it. Upon further review, the mkstemp() method was used in three places: PUT, POST and DELETE method handling. Of those three cases, only PUT requires the file descriptor, since it is responsible for writing the object contents. For POST and DELETE, DiskFile only needs to associate metadata with the correct file name. We abstract the pattern that those two use (once we also refactor the code to move the fetch of the delete-at metadata, and subsequent delete-at-update initiation, from under the mkstemp context) by adding the new put_metadata() method. As a result, the DiskFile class is then free to do whatever file system operations it must to meet the API, without the caller having to know more than just how to write data to a file descriptor. Note that DiskFile itself key'd off of the '.ts' and '.meta' extensions for its operations, and for that to work properly, the caller had to know to use those correctly. With this change, the caller has no knowledge of how the file system is being used to accomplish data and metadata storage. See also Question 213796 at: https://answers.launchpad.net/swift/+question/213796 Change-Id: I267f68e64391ba627b2a13682393bec62600159d Signed-off-by: Peter Portante --- swift/obj/server.py | 44 ++++++++++++++++++++++------------- test/unit/obj/test_auditor.py | 32 ++++++++++++------------- test/unit/obj/test_server.py | 25 +++++++++++++++++--- 3 files changed, 66 insertions(+), 35 deletions(-) diff --git a/swift/obj/server.py b/swift/obj/server.py index c654d80053..3c03aec4ed 100755 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -118,6 +118,7 @@ class DiskFile(object): path, device, storage_directory(DATADIR, partition, name_hash)) self.device_path = os.path.join(path, device) self.tmpdir = os.path.join(path, device, 'tmp') + self.tmppath = None self.logger = logger self.metadata = {} self.meta_file = None @@ -278,30 +279,31 @@ class DiskFile(object): """Contextmanager to make a temporary file.""" if not os.path.exists(self.tmpdir): mkdirs(self.tmpdir) - fd, tmppath = mkstemp(dir=self.tmpdir) + fd, self.tmppath = mkstemp(dir=self.tmpdir) try: - yield fd, tmppath + yield fd finally: try: os.close(fd) except OSError: pass + tmppath, self.tmppath = self.tmppath, None try: os.unlink(tmppath) except OSError: pass - def put(self, fd, tmppath, metadata, extension='.data'): + def put(self, fd, metadata, extension='.data'): """ Finalize writing the file on disk, and renames it from the temp file to the real location. This should be called after the data has been written to the temp file. - :params fd: file descriptor of the temp file - :param tmppath: path to the temporary file being used + :param fd: file descriptor of the temp file :param metadata: dictionary of metadata to be written :param extension: extension to be used when making the file """ + assert self.tmppath is not None metadata['name'] = self.name timestamp = normalize_timestamp(metadata['X-Timestamp']) write_metadata(fd, metadata) @@ -309,9 +311,21 @@ class DiskFile(object): self.drop_cache(fd, 0, int(metadata['Content-Length'])) tpool.execute(fsync, fd) invalidate_hash(os.path.dirname(self.datadir)) - renamer(tmppath, os.path.join(self.datadir, timestamp + extension)) + renamer(self.tmppath, + os.path.join(self.datadir, timestamp + extension)) self.metadata = metadata + def put_metadata(self, metadata, tombstone=False): + """ + Short hand for putting metadata to .meta and .ts files. + + :param metadata: dictionary of metadata to be written + :param tombstone: whether or not we are writing a tombstone + """ + extension = '.ts' if tombstone else '.meta' + with self.mkstemp() as fd: + self.put(fd, metadata, extension=extension) + def unlinkold(self, timestamp): """ Remove any older versions of the object file. Any file that has an @@ -565,8 +579,7 @@ class ObjectController(object): if old_delete_at: self.delete_at_update('DELETE', old_delete_at, account, container, obj, request.headers, device) - with file.mkstemp() as (fd, tmppath): - file.put(fd, tmppath, metadata, extension='.meta') + file.put_metadata(metadata) return HTTPAccepted(request=request) @public @@ -600,7 +613,7 @@ class ObjectController(object): etag = md5() upload_size = 0 last_sync = 0 - with file.mkstemp() as (fd, tmppath): + with file.mkstemp() as fd: if 'content-length' in request.headers: try: fallocate(fd, int(request.headers['content-length'])) @@ -654,7 +667,7 @@ class ObjectController(object): self.delete_at_update( 'DELETE', old_delete_at, account, container, obj, request.headers, device) - file.put(fd, tmppath, metadata) + file.put(fd, metadata) file.unlinkold(metadata['X-Timestamp']) if not orig_timestamp or \ orig_timestamp < request.headers['x-timestamp']: @@ -821,12 +834,11 @@ class ObjectController(object): metadata = { 'X-Timestamp': request.headers['X-Timestamp'], 'deleted': True, } - with file.mkstemp() as (fd, tmppath): - old_delete_at = int(file.metadata.get('X-Delete-At') or 0) - if old_delete_at: - self.delete_at_update('DELETE', old_delete_at, account, - container, obj, request.headers, device) - file.put(fd, tmppath, metadata, extension='.ts') + old_delete_at = int(file.metadata.get('X-Delete-At') or 0) + if old_delete_at: + self.delete_at_update('DELETE', old_delete_at, account, + container, obj, request.headers, device) + file.put_metadata(metadata, tombstone=True) file.unlinkold(metadata['X-Timestamp']) if not orig_timestamp or \ orig_timestamp < request.headers['x-timestamp']: diff --git a/test/unit/obj/test_auditor.py b/test/unit/obj/test_auditor.py index 9d9e4bfc92..a655170495 100644 --- a/test/unit/obj/test_auditor.py +++ b/test/unit/obj/test_auditor.py @@ -64,7 +64,7 @@ class TestAuditor(unittest.TestCase): self.auditor = auditor.AuditorWorker(self.conf) data = '0' * 1024 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -74,7 +74,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) pre_quarantines = self.auditor.quarantines self.auditor.object_audit( @@ -93,7 +93,7 @@ class TestAuditor(unittest.TestCase): data = '0' * 1024 etag = md5() timestamp = str(normalize_timestamp(time.time())) - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -102,7 +102,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) pre_quarantines = self.auditor.quarantines # remake so it will have metadata self.disk_file = DiskFile(self.devices, 'sda', '0', 'a', 'c', 'o', @@ -154,7 +154,7 @@ class TestAuditor(unittest.TestCase): pre_quarantines = self.auditor.quarantines data = '0' * 1024 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -163,7 +163,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) self.disk_file.close() self.auditor.audit_all_objects() self.assertEquals(self.auditor.quarantines, pre_quarantines) @@ -174,7 +174,7 @@ class TestAuditor(unittest.TestCase): pre_quarantines = self.auditor.quarantines data = '0' * 1024 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -183,7 +183,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) self.disk_file.close() os.write(fd, 'extra_data') self.auditor.audit_all_objects() @@ -195,7 +195,7 @@ class TestAuditor(unittest.TestCase): pre_quarantines = self.auditor.quarantines data = '0' * 10 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -204,14 +204,14 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) self.disk_file.close() self.auditor.audit_all_objects() self.disk_file = DiskFile(self.devices, 'sdb', '0', 'a', 'c', 'ob', self.logger) data = '1' * 10 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -220,7 +220,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) self.disk_file.close() os.write(fd, 'extra_data') self.auditor.audit_all_objects() @@ -231,7 +231,7 @@ class TestAuditor(unittest.TestCase): self.auditor.log_time = 0 data = '0' * 1024 etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -240,7 +240,7 @@ class TestAuditor(unittest.TestCase): 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': str(os.fstat(fd).st_size), } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) etag = md5() etag.update('1' + '0' * 1023) etag = etag.hexdigest() @@ -270,14 +270,14 @@ class TestAuditor(unittest.TestCase): fp.close() etag = md5() - with self.disk_file.mkstemp() as (fd, tmppath): + with self.disk_file.mkstemp() as fd: etag = etag.hexdigest() metadata = { 'ETag': etag, 'X-Timestamp': str(normalize_timestamp(time.time())), 'Content-Length': 10, } - self.disk_file.put(fd, tmppath, metadata) + self.disk_file.put(fd, metadata) etag = md5() etag = etag.hexdigest() metadata['ETag'] = etag diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index c96c0d5b48..5be7604ece 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -216,7 +216,7 @@ class TestDiskFile(unittest.TestCase): timestamp = ts else: timestamp = str(normalize_timestamp(time())) - with df.mkstemp() as (fd, tmppath): + with df.mkstemp() as fd: os.write(fd, data) etag.update(data) etag = etag.hexdigest() @@ -225,7 +225,7 @@ class TestDiskFile(unittest.TestCase): 'X-Timestamp': timestamp, 'Content-Length': str(os.fstat(fd).st_size), } - df.put(fd, tmppath, metadata, extension=extension) + df.put(fd, metadata, extension=extension) if invalid_type == 'ETag': etag = md5() etag.update('1' + '0' * (fsize - 1)) @@ -316,7 +316,6 @@ class TestDiskFile(unittest.TestCase): extension='.data') df.close() self.assertTrue(df.quarantined_dir) - df = self._get_data_file(invalid_type='Content-Length', extension='.ts') df.close() @@ -325,6 +324,26 @@ class TestDiskFile(unittest.TestCase): extension='.ts') self.assertRaises(DiskFileNotExist, df.get_data_file_size) + def test_put_metadata(self): + df = self._get_data_file() + ts = time() + metadata = { 'X-Timestamp': ts, 'X-Object-Meta-test': 'data' } + df.put_metadata(metadata) + exp_name = '%s.meta' % str(normalize_timestamp(ts)) + dl = os.listdir(df.datadir) + self.assertEquals(len(dl), 2) + self.assertTrue(exp_name in set(dl)) + + def test_put_metadata_ts(self): + df = self._get_data_file() + ts = time() + metadata = { 'X-Timestamp': ts, 'X-Object-Meta-test': 'data' } + df.put_metadata(metadata, tombstone=True) + exp_name = '%s.ts' % str(normalize_timestamp(ts)) + dl = os.listdir(df.datadir) + self.assertEquals(len(dl), 2) + self.assertTrue(exp_name in set(dl)) + def test_unlinkold(self): df1 = self._get_data_file() future_time = str(normalize_timestamp(time() + 100))