From b13b49a27caac17ae55b19f315d5ce31801c9522 Mon Sep 17 00:00:00 2001 From: Alistair Coles Date: Tue, 9 Aug 2016 16:09:38 +0100 Subject: [PATCH] EC - eliminate .durable files Instead of using a separate .durable file to indicate the durable status of a .data file, rename the .data to include a durable marker in the filename. This saves one inode for every EC fragment archive. An EC policy PUT will, as before, first rename a temp file to: #.data but now, when the object is committed, that file will be renamed: ##d.data with the '#d' suffix marking the data file as durable. Diskfile suffix hashing returns the same result when the new durable-data filename or the legacy durable file is found in an object directory. A fragment archive that has been created on an upgraded object server will therefore appear to be in the same state, as far as the consistency engine is concerned, as the same fragment archive created on an older object server. Since legacy .durable files will still exist in deployed clusters, many of the unit tests scenarios have been duplicated for both new durable-data filenames and legacy durable files. Change-Id: I6f1f62d47be0b0ac7919888c77480a636f11f607 --- doc/source/overview_erasure_code.rst | 134 +- swift/obj/diskfile.py | 131 +- swift/obj/server.py | 4 +- swift/obj/ssync_receiver.py | 4 +- swift/proxy/controllers/obj.py | 7 +- test/probe/test_reconstructor_durable.py | 36 +- test/unit/obj/common.py | 46 +- test/unit/obj/test_diskfile.py | 1979 ++++++++++++++-------- test/unit/obj/test_reconstructor.py | 58 +- test/unit/obj/test_server.py | 103 +- test/unit/obj/test_ssync.py | 87 +- test/unit/obj/test_ssync_receiver.py | 4 +- test/unit/proxy/test_server.py | 132 +- 13 files changed, 1737 insertions(+), 988 deletions(-) diff --git a/doc/source/overview_erasure_code.rst b/doc/source/overview_erasure_code.rst index 39aeb10e09..8ce2b323b8 100644 --- a/doc/source/overview_erasure_code.rst +++ b/doc/source/overview_erasure_code.rst @@ -317,35 +317,44 @@ EC archives are stored on disk in their respective objects-N directory based on their policy index. See :doc:`overview_policies` for details on per policy directory information. -The actual names on disk of EC archives also have one additional piece of data -encoded in the filename, the fragment archive index. +In addition to the object timestamp, the filenames of EC archives encode other +information related to the archive: -Each storage policy now must include a transformation function that diskfile -will use to build the filename to store on disk. The functions are implemented -in the diskfile module as policy specific sub classes ``DiskFileManager``. +* The fragment archive index. This is required for a few reasons. For one, it + allows us to store fragment archives of different indexes on the same storage + node which is not typical however it is possible in many circumstances. + Without unique filenames for the different EC archive files in a set, we + would be at risk of overwriting one archive of index `n` with another of + index `m` in some scenarios. -This is required for a few reasons. For one, it allows us to store fragment -archives of different indexes on the same storage node which is not typical -however it is possible in many circumstances. Without unique filenames for the -different EC archive files in a set, we would be at risk of overwriting one -archive of index n with another of index m in some scenarios. - -The transformation function for the replication policy is simply a NOP. For -reconstruction, the index is appended to the filename just before the .data -extension. An example filename for a fragment archive storing the 5th fragment -would like this:: + The index is appended to the filename just before the ``.data`` extension. + For example, the filename for a fragment archive storing the 5th fragment + would be:: 1418673556.92690#5.data -An additional file is also included for Erasure Code policies called the -``.durable`` file. Its meaning will be covered in detail later, however, its on- -disk format does not require the name transformation function that was just -covered. The .durable for the example above would simply look like this:: +* The durable state of the archive. The meaning of this will be described in + more detail later, but a fragment archive that is considered durable has an + additional ``#d`` string included in its filename immediately before the + ``.data`` extension. For example:: - 1418673556.92690.durable + 1418673556.92690#5#d.data + +A policy-specific transformation function is therefore used to build the +archive filename. These functions are implemented in the diskfile module as +methods of policy specific sub classes of ``BaseDiskFileManager``. + +The transformation function for the replication policy is simply a NOP. + +.. note:: + + In older versions the durable state of an archive was represented by an + additional file called the ``.durable`` file instead of the ``#d`` + substring in the ``.data`` filename. The ``.durable`` for the example above + would be:: + + 1418673556.92690.durable -And it would be found alongside every fragment specific .data file following a -100% successful PUT operation. Proxy Server ------------ @@ -393,21 +402,31 @@ communicate back to the storage nodes once it has confirmation that a quorum of fragment archives in the set have been written. For the first phase of the conversation the proxy requires a quorum of -`ec_ndata + 1` fragment archives to be successfully put to storage nodes. -This ensures that the object could still be reconstructed even if one of the -fragment archives becomes unavailable. During the second phase of the -conversation the proxy communicates a confirmation to storage nodes that the -fragment archive quorum has been achieved. This causes the storage node to -create a `ts.durable` file at timestamp `ts` which acts as an indicator of -the last known durable set of fragment archives for a given object. The -presence of a `ts.durable` file means, to the object server, `there is a set -of ts.data files that are durable at timestamp ts`. +`ec_ndata + 1` fragment archives to be successfully put to storage nodes. This +ensures that the object could still be reconstructed even if one of the +fragment archives becomes unavailable. As described above, each fragment +archive file is named:: + + #.data + +where ``ts`` is the timestamp and ``frag_index`` is the fragment archive index. + +During the second phase of the conversation the proxy communicates a +confirmation to storage nodes that the fragment archive quorum has been +achieved. This causes each storage node to rename the fragment archive written +in the first phase of the conversation to include the substring ``#d`` in its +name:: + + ##d.data + +This indicates to the object server that this fragment archive is `durable` and +that there is a set of data files that are durable at timestamp ``ts``. For the second phase of the conversation the proxy requires a quorum of `ec_ndata + 1` successful commits on storage nodes. This ensures that there are sufficient committed fragment archives for the object to be reconstructed even -if one becomes unavailable. The reconstructor ensures that `.durable` files are -replicated on storage nodes where they may be missing. +if one becomes unavailable. The reconstructor ensures that the durable state is +replicated on storage nodes where it may be missing. Note that the completion of the commit phase of the conversation is also a signal for the object server to go ahead and immediately delete older @@ -423,9 +442,9 @@ The basic flow looks like this: data/metadata write, send a 1st-phase response to proxy. * Upon quorum of storage nodes responses, the proxy initiates 2nd-phase by sending commit confirmations to object servers. - * Upon receipt of commit message, object servers store a 0-byte data file as - `.durable` indicating successful PUT, and send a final response to - the proxy server. + * Upon receipt of commit message, object servers rename ``.data`` files to + include the ``#d`` substring, indicating successful PUT, and send a final + response to the proxy server. * The proxy waits for `ec_ndata + 1` object servers to respond with a success (2xx) status before responding to the client with a successful status. @@ -446,24 +465,25 @@ Here is a high level example of what the conversation looks like:: Content-MD5: --MIMEboundary - + #.data file> obj: 100 Continue proxy: X-Document: put commit commit_confirmation --MIMEboundary-- - + #.data to ##d.data> obj: 20x =2 2xx responses> proxy: 2xx -> client -A few key points on the .durable file: +A few key points on the durable state of a fragment archive: -* The .durable file means \"the matching .data file for this has sufficient - fragment archives somewhere, committed, to reconstruct the object\". +* A durable fragment archive means that there exist sufficient other fragment + archives elsewhere in the cluster (durable and/or non-durable) to reconstruct + the object. * When a proxy does a GET, it will require at least one object server to - respond with a fragment archive that has a matching `.durable` file before - reconstructing and returning the object to the client. + respond with a fragment archive is durable before reconstructing and + returning the object to the client. Partial PUT Failures ==================== @@ -471,10 +491,9 @@ Partial PUT Failures A partial PUT failure has a few different modes. In one scenario the Proxy Server is alive through the entire PUT conversation. This is a very straightforward case. The client will receive a good response if and only if a -quorum of fragment archives were successfully landed on their storage nodes. In -this case the Reconstructor will discover the missing fragment archives, perform -a reconstruction and deliver fragment archives and their matching .durable files -to the nodes. +quorum of fragment archives were successfully landed on their storage nodes. +In this case the Reconstructor will discover the missing fragment archives, +perform a reconstruction and deliver those fragment archives to their nodes. The more interesting case is what happens if the proxy dies in the middle of a conversation. If it turns out that a quorum had been met and the commit phase @@ -499,7 +518,7 @@ implement the high level steps described earlier: #. The proxy server makes simultaneous requests to `ec_ndata` primary object server nodes with goal of finding a set of `ec_ndata` distinct EC archives at the same timestamp, and an indication from at least one object server - that a `.durable` file exists for that timestamp. If this goal is + that a durable fragment archive exists for that timestamp. If this goal is not achieved with the first `ec_ndata` requests then the proxy server continues to issue requests to the remaining primary nodes and then handoff nodes. @@ -510,12 +529,12 @@ implement the high level steps described earlier: response since each EC archive's metadata is valid only for that archive. #. The proxy streams the decoded data it has back to the client. -Note that the proxy does not require all objects servers to have a `.durable` -file for the EC archive that they return in response to a GET. The proxy -will be satisfied if just one object server has a `.durable` file at the same -timestamp as EC archives returned from other object servers. This means -that the proxy can successfully GET an object that had missing `.durable` files -when it was PUT (i.e. a partial PUT failure occurred). +Note that the proxy does not require all objects servers to have a durable +fragment archive to return in response to a GET. The proxy will be satisfied if +just one object server has a durable fragment archive at the same timestamp as +EC archives returned from other object servers. This means that the proxy can +successfully GET an object that had missing durable state on some nodes when it +was PUT (i.e. a partial PUT failure occurred). Note also that an object server may inform the proxy server that it has more than one EC archive for different timestamps and/or fragment indexes, which may @@ -541,12 +560,11 @@ which includes things like the entire object etag. DiskFile ======== -Erasure code uses subclassed ``ECDiskFile``, ``ECDiskFileWriter``, +Erasure code policies use subclassed ``ECDiskFile``, ``ECDiskFileWriter``, ``ECDiskFileReader`` and ``ECDiskFileManager`` to implement EC specific handling of on disk files. This includes things like file name manipulation to -include the fragment index in the filename, determination of valid .data files -based on .durable presence, construction of EC specific hashes.pkl file to -include fragment index information, etc., etc. +include the fragment index and durable state in the filename, construction of +EC specific ``hashes.pkl`` file to include fragment index information, etc. Metadata -------- diff --git a/swift/obj/diskfile.py b/swift/obj/diskfile.py index 0551b5e256..d982bdc6f9 100644 --- a/swift/obj/diskfile.py +++ b/swift/obj/diskfile.py @@ -998,7 +998,7 @@ class BaseDiskFileManager(object): def _get_hashes(self, partition_path, recalculate=None, do_listdir=False, reclaim_age=None): """ - Get a list of hashes for the suffix dir. do_listdir causes it to + Get hashes for each suffix dir in a partition. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. @@ -2572,48 +2572,58 @@ class ECDiskFileReader(BaseDiskFileReader): class ECDiskFileWriter(BaseDiskFileWriter): - def _finalize_durable(self, durable_file_path): + def _finalize_durable(self, data_file_path, durable_data_file_path): exc = None try: try: - with open(durable_file_path, 'wb') as _fp: - fsync(_fp.fileno()) + os.rename(data_file_path, durable_data_file_path) fsync_dir(self._datadir) except (OSError, IOError) as err: if err.errno not in (errno.ENOSPC, errno.EDQUOT): # re-raise to catch all handler raise - msg = (_('No space left on device for %(file)s (%(err)s)') % - {'file': durable_file_path, 'err': err}) - self.manager.logger.error(msg) - exc = DiskFileNoSpace(str(err)) + params = {'file': durable_data_file_path, 'err': err} + self.manager.logger.exception( + _('No space left on device for %(file)s (%(err)s)'), + params) + exc = DiskFileNoSpace( + 'No space left on device for %(file)s (%(err)s)' % params) else: try: self.manager.cleanup_ondisk_files(self._datadir)['files'] except OSError as os_err: self.manager.logger.exception( - _('Problem cleaning up %(datadir)s (%(err)s)') % + _('Problem cleaning up %(datadir)s (%(err)s)'), {'datadir': self._datadir, 'err': os_err}) except Exception as err: - msg = (_('Problem writing durable state file %(file)s (%(err)s)') % - {'file': durable_file_path, 'err': err}) - self.manager.logger.exception(msg) - exc = DiskFileError(msg) + params = {'file': durable_data_file_path, 'err': err} + self.manager.logger.exception( + _('Problem making data file durable %(file)s (%(err)s)'), + params) + exc = DiskFileError( + 'Problem making data file durable %(file)s (%(err)s)' % params) if exc: raise exc def commit(self, timestamp): """ - Finalize put by writing a timestamp.durable file for the object. We - do this for EC policy because it requires a 2-phase put commit - confirmation. + Finalize put by renaming the object data file to include a durable + marker. We do this for EC policy because it requires a 2-phase put + commit confirmation. :param timestamp: object put timestamp, an instance of :class:`~swift.common.utils.Timestamp` + :raises DiskFileError: if the diskfile frag_index has not been set + (either during initialisation or a call to put()) """ - durable_file_path = os.path.join( - self._datadir, timestamp.internal + '.durable') - tpool_reraise(self._finalize_durable, durable_file_path) + data_file_path = join( + self._datadir, self.manager.make_on_disk_filename( + timestamp, '.data', self._diskfile._frag_index)) + durable_data_file_path = os.path.join( + self._datadir, self.manager.make_on_disk_filename( + timestamp, '.data', self._diskfile._frag_index, durable=True)) + tpool_reraise( + self._finalize_durable, data_file_path, durable_data_file_path) def put(self, metadata): """ @@ -2631,7 +2641,9 @@ class ECDiskFileWriter(BaseDiskFileWriter): # sure that the fragment index is included in object sysmeta. fi = metadata.setdefault('X-Object-Sysmeta-Ec-Frag-Index', self._diskfile._frag_index) - # defer cleanup until commit() writes .durable + fi = self.manager.validate_fragment_index(fi) + self._diskfile._frag_index = fi + # defer cleanup until commit() writes makes diskfile durable cleanup = False super(ECDiskFileWriter, self)._put(metadata, cleanup, frag_index=fi) @@ -2746,13 +2758,17 @@ class ECDiskFile(BaseDiskFile): :param frag_index: fragment archive index, must be a whole number or None. """ - exts = ['.ts'] - # when frag_index is None it's not possible to build a data file name + purge_file = self.manager.make_on_disk_filename( + timestamp, ext='.ts') + remove_file(os.path.join(self._datadir, purge_file)) if frag_index is not None: - exts.append('.data') - for ext in exts: + # data file may or may not be durable so try removing both filename + # possibilities purge_file = self.manager.make_on_disk_filename( - timestamp, ext=ext, frag_index=frag_index) + timestamp, ext='.data', frag_index=frag_index) + remove_file(os.path.join(self._datadir, purge_file)) + purge_file = self.manager.make_on_disk_filename( + timestamp, ext='.data', frag_index=frag_index, durable=True) remove_file(os.path.join(self._datadir, purge_file)) self.manager.invalidate_hash(dirname(self._datadir)) @@ -2779,7 +2795,7 @@ class ECDiskFileManager(BaseDiskFileManager): return frag_index def make_on_disk_filename(self, timestamp, ext=None, frag_index=None, - ctype_timestamp=None, *a, **kw): + ctype_timestamp=None, durable=False, *a, **kw): """ Returns the EC specific filename for given timestamp. @@ -2791,6 +2807,7 @@ class ECDiskFileManager(BaseDiskFileManager): only, must be a whole number. :param ctype_timestamp: an optional content-type timestamp, an instance of :class:`~swift.common.utils.Timestamp` + :param durable: if True then include a durable marker in data filename. :returns: a file name :raises DiskFileError: if ext=='.data' and the kwarg frag_index is not a whole number @@ -2801,7 +2818,9 @@ class ECDiskFileManager(BaseDiskFileManager): # on the same node in certain situations frag_index = self.validate_fragment_index(frag_index) rv = timestamp.internal + '#' + str(frag_index) - return '%s%s' % (rv, ext or '') + if durable: + rv += '#d' + return '%s%s' % (rv, ext) return super(ECDiskFileManager, self).make_on_disk_filename( timestamp, ext, ctype_timestamp, *a, **kw) @@ -2809,10 +2828,11 @@ class ECDiskFileManager(BaseDiskFileManager): """ Returns timestamp(s) and other info extracted from a policy specific file name. For EC policy the data file name includes a fragment index - which must be stripped off to retrieve the timestamp. + and possibly a durable marker, both of which which must be stripped off + to retrieve the timestamp. :param filename: the file name including extension - :returns: a dict, with keys for timestamp, frag_index, ext and + :returns: a dict, with keys for timestamp, frag_index, durable, ext and ctype_timestamp: * timestamp is a :class:`~swift.common.utils.Timestamp` @@ -2820,7 +2840,9 @@ class ECDiskFileManager(BaseDiskFileManager): * ctype_timestamp is a :class:`~swift.common.utils.Timestamp` or None for .meta files, otherwise None * ext is a string, the file extension including the leading dot or - the empty string if the filename has no extension. + the empty string if the filename has no extension + * durable is a boolean that is True if the filename is a data file + that includes a durable marker :raises DiskFileError: if any part of the filename is not able to be validated. @@ -2828,7 +2850,7 @@ class ECDiskFileManager(BaseDiskFileManager): frag_index = None float_frag, ext = splitext(filename) if ext == '.data': - parts = float_frag.split('#', 1) + parts = float_frag.split('#') try: timestamp = Timestamp(parts[0]) except ValueError: @@ -2842,11 +2864,16 @@ class ECDiskFileManager(BaseDiskFileManager): # expect validate_fragment_index raise DiskFileError pass frag_index = self.validate_fragment_index(frag_index) + try: + durable = parts[2] == 'd' + except IndexError: + durable = False return { 'timestamp': timestamp, 'frag_index': frag_index, 'ext': ext, - 'ctype_timestamp': None + 'ctype_timestamp': None, + 'durable': durable } rv = super(ECDiskFileManager, self).parse_on_disk_filename(filename) rv['frag_index'] = None @@ -2855,7 +2882,8 @@ class ECDiskFileManager(BaseDiskFileManager): def _process_ondisk_files(self, exts, results, frag_index=None, frag_prefs=None, **kwargs): """ - Implement EC policy specific handling of .data and .durable files. + Implement EC policy specific handling of .data and legacy .durable + files. If a frag_prefs keyword arg is provided then its value may determine which fragment index at which timestamp is used to construct the @@ -2898,13 +2926,9 @@ class ECDiskFileManager(BaseDiskFileManager): """ durable_info = None if exts.get('.durable'): + # in older versions, separate .durable files were used to indicate + # the durability of data files having the same timestamp durable_info = exts['.durable'][0] - # Mark everything older than most recent .durable as obsolete - # and remove from the exts dict. - for ext in exts.keys(): - exts[ext], older = self._split_gte_timestamp( - exts[ext], durable_info['timestamp']) - results.setdefault('obsolete', []).extend(older) # Split the list of .data files into sets of frags having the same # timestamp, identifying the durable and newest sets (if any) as we go. @@ -2921,8 +2945,18 @@ class ECDiskFileManager(BaseDiskFileManager): frag_set.sort(key=lambda info: info['frag_index']) timestamp = frag_set[0]['timestamp'] frag_sets[timestamp] = frag_set + for frag in frag_set: + # a data file marked as durable may supersede a legacy durable + # file if it is newer + if frag['durable']: + if (not durable_info or + durable_info['timestamp'] < timestamp): + # this frag defines the durable timestamp + durable_info = frag + break if durable_info and durable_info['timestamp'] == timestamp: durable_frag_set = frag_set + break # ignore frags that are older than durable timestamp # Choose which frag set to use chosen_frag_set = None @@ -2986,7 +3020,15 @@ class ECDiskFileManager(BaseDiskFileManager): exts['.meta'], chosen_frag['timestamp']) results['frag_sets'] = frag_sets - # Mark any isolated .durable as obsolete + # Mark everything older than most recent durable data as obsolete + # and remove from the exts dict. + if durable_info: + for ext in exts.keys(): + exts[ext], older = self._split_gte_timestamp( + exts[ext], durable_info['timestamp']) + results.setdefault('obsolete', []).extend(older) + + # Mark any isolated legacy .durable as obsolete if exts.get('.durable') and not durable_frag_set: results.setdefault('obsolete', []).extend(exts['.durable']) exts.pop('.durable') @@ -3042,6 +3084,15 @@ class ECDiskFileManager(BaseDiskFileManager): fi = file_info['frag_index'] hashes[fi].update(file_info['timestamp'].internal) if 'durable_frag_set' in ondisk_info: + # The durable_frag_set may be indicated by a legacy + # .durable file or by a durable #fi#d.data + # file. Either way we update hashes[None] with the string + # .durable which is a consistent representation of the + # abstract state of the object regardless of the actual file set. + # That way if we use a local combination of a legacy t1.durable and + # t1#0.data to reconstruct a remote t1#0#d.data then, when next + # hashed, the local and remote will make identical updates to their + # suffix hashes. file_info = ondisk_info['durable_frag_set'][0] hashes[None].update(file_info['timestamp'].internal + '.durable') diff --git a/swift/obj/server.py b/swift/obj/server.py index ca5e2850b1..408b6416e1 100644 --- a/swift/obj/server.py +++ b/swift/obj/server.py @@ -818,9 +818,9 @@ class ObjectController(BaseStorageServer): send_hundred_continue_response() if not self._read_put_commit_message(mime_documents_iter): return HTTPServerError(request=request) - # got 2nd phase confirmation, write a timestamp.durable - # state file to indicate a successful PUT + # got 2nd phase confirmation (when required), call commit to + # indicate a successful PUT writer.commit(request.timestamp) # Drain any remaining MIME docs from the socket. There diff --git a/swift/obj/ssync_receiver.py b/swift/obj/ssync_receiver.py index 4825f94740..6859cff2fb 100644 --- a/swift/obj/ssync_receiver.py +++ b/swift/obj/ssync_receiver.py @@ -277,8 +277,8 @@ class Receiver(object): self.frag_index in df.fragments[remote['ts_data']] and (df.durable_timestamp is None or df.durable_timestamp < remote['ts_data'])): - # We have the frag, just missing a .durable, so try to create the - # .durable now. Try this just once to avoid looping if it fails. + # We have the frag, just missing durable state, so make the frag + # durable now. Try this just once to avoid looping if it fails. try: with df.create() as writer: writer.commit(remote['ts_data']) diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index 7b33cc30a1..9e710c7d68 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -1983,7 +1983,7 @@ class ECGetResponseCollection(object): # durable. Note that this may be a different bucket than the one this # response got added to, and that we may never go and get a durable # frag from this node; it is sufficient that we have been told that a - # .durable exists, somewhere, at t_durable. + # durable frag exists, somewhere, at t_durable. t_durable = headers.get('X-Backend-Durable-Timestamp') if not t_durable and not t_data_file: # obj server not upgraded so assume this response's frag is durable @@ -2619,8 +2619,9 @@ class ECObjectController(BaseObjectController): self._transfer_data(req, policy, data_source, putters, nodes, min_conns, etag_hasher) - # The .durable file will propagate in a replicated fashion; if - # one exists, the reconstructor will spread it around. + # The durable state will propagate in a replicated fashion; if + # one fragment is durable then the reconstructor will spread the + # durable status around. # In order to avoid successfully writing an object, but refusing # to serve it on a subsequent GET because don't have enough # durable data fragments - we require the same number of durable diff --git a/test/probe/test_reconstructor_durable.py b/test/probe/test_reconstructor_durable.py index ccd9e1c78c..be8895f88a 100644 --- a/test/probe/test_reconstructor_durable.py +++ b/test/probe/test_reconstructor_durable.py @@ -13,7 +13,7 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. - +import json from hashlib import md5 import unittest import uuid @@ -75,32 +75,44 @@ class TestReconstructorPropDurable(ECProbeTest): hasher = md5() for chunk in data: hasher.update(chunk) - return hasher.hexdigest() + return headers, hasher.hexdigest() def _check_node(self, node, part, etag, headers_post): # get fragment archive etag - fragment_archive_etag = self.direct_get(node, part) + headers, fragment_archive_etag = self.direct_get(node, part) + self.assertIn('X-Backend-Durable-Timestamp', headers) # sanity check + durable_timestamp = headers['X-Backend-Durable-Timestamp'] - # remove the .durable from the selected node + # make the data file non-durable on the selected node part_dir = self.storage_dir('object', node, part=part) for dirs, subdirs, files in os.walk(part_dir): for fname in files: - if fname.endswith('.durable'): - durable = os.path.join(dirs, fname) - os.remove(durable) - break + if fname.endswith('.data'): + non_durable_fname = fname.replace('#d', '') + os.rename(os.path.join(dirs, fname), + os.path.join(dirs, non_durable_fname)) try: os.remove(os.path.join(part_dir, 'hashes.pkl')) except OSError as e: if e.errno != errno.ENOENT: raise - # fire up reconstructor to propagate the .durable + # sanity check that fragment is no longer durable + headers = direct_client.direct_head_object( + node, part, self.account, self.container_name, self.object_name, + headers={'X-Backend-Storage-Policy-Index': int(self.policy), + 'X-Backend-Fragment-Preferences': json.dumps([])}) + self.assertNotIn('X-Backend-Durable-Timestamp', headers) + + # fire up reconstructor to propagate durable state self.reconstructor.once() # fragment is still exactly as it was before! - self.assertEqual(fragment_archive_etag, - self.direct_get(node, part)) + headers, fragment_archive_etag_2 = self.direct_get(node, part) + self.assertEqual(fragment_archive_etag, fragment_archive_etag_2) + self.assertIn('X-Backend-Durable-Timestamp', headers) + self.assertEqual(durable_timestamp, + headers['X-Backend-Durable-Timestamp']) # check meta meta = client.head_object(self.url, self.token, @@ -132,7 +144,7 @@ class TestReconstructorPropDurable(ECProbeTest): self.object_name, headers=headers_post) del headers_post['X-Auth-Token'] # WTF, where did this come from? - # built up a list of node lists to kill a .durable from, + # built up a list of node lists to make non-durable, # first try a single node # then adjacent nodes and then nodes >1 node apart opart, onodes = self.object_ring.get_nodes( diff --git a/test/unit/obj/common.py b/test/unit/obj/common.py index 8cb618f4f0..bc235fb4c1 100644 --- a/test/unit/obj/common.py +++ b/test/unit/obj/common.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import hashlib +import os import shutil import tempfile import unittest @@ -42,6 +43,35 @@ class FakeReplicator(object): self._diskfile_mgr = self._diskfile_router[policy] +def write_diskfile(df, timestamp, data='test data', frag_index=None, + commit=True, legacy_durable=False, extra_metadata=None): + # Helper method to write some data and metadata to a diskfile. + # Optionally do not commit the diskfile, or commit but using a legacy + # durable file + with df.create() as writer: + writer.write(data) + metadata = { + 'ETag': hashlib.md5(data).hexdigest(), + 'X-Timestamp': timestamp.internal, + 'Content-Length': str(len(data)), + } + if extra_metadata: + metadata.update(extra_metadata) + if frag_index is not None: + metadata['X-Object-Sysmeta-Ec-Frag-Index'] = str(frag_index) + writer.put(metadata) + if commit and legacy_durable: + # simulate legacy .durable file creation + durable_file = os.path.join(df._datadir, + timestamp.internal + '.durable') + with open(durable_file, 'wb'): + pass + elif commit: + writer.commit(timestamp) + # else: don't make it durable + return metadata + + class BaseTest(unittest.TestCase): def setUp(self): # daemon will be set in subclass setUp @@ -64,20 +94,8 @@ class BaseTest(unittest.TestCase): df = df_mgr.get_diskfile( device, partition, *object_parts, policy=policy, frag_index=frag_index) - content_length = len(body) - etag = hashlib.md5(body).hexdigest() - with df.create() as writer: - writer.write(body) - metadata = { - 'X-Timestamp': timestamp.internal, - 'Content-Length': str(content_length), - 'ETag': etag, - } - if extra_metadata: - metadata.update(extra_metadata) - writer.put(metadata) - if commit: - writer.commit(timestamp) + write_diskfile(df, timestamp, data=body, extra_metadata=extra_metadata, + commit=commit) return df def _make_open_diskfile(self, device='dev', partition='9', diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index d70fd129d2..fd918bf4e7 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -57,7 +57,7 @@ from swift.common.exceptions import DiskFileNotExist, DiskFileQuarantined, \ from swift.common.storage_policy import ( POLICIES, get_policy_string, StoragePolicy, ECStoragePolicy, BaseStoragePolicy, REPL_POLICY, EC_POLICY) - +from test.unit.obj.common import write_diskfile test_policies = [ StoragePolicy(0, name='zero', is_default=True), @@ -118,6 +118,27 @@ def _create_test_ring(path, policy): reload_time=intended_reload_time) +def _make_datafilename(timestamp, policy, frag_index=None, durable=False): + if frag_index is None: + frag_index = randint(0, 9) + filename = timestamp.internal + if policy.policy_type == EC_POLICY: + filename += '#%d' % int(frag_index) + if durable: + filename += '#d' + filename += '.data' + return filename + + +def _make_metafilename(meta_timestamp, ctype_timestamp=None): + filename = meta_timestamp.internal + if ctype_timestamp is not None: + delta = meta_timestamp.raw - ctype_timestamp.raw + filename = '%s-%x' % (filename, delta) + filename += '.meta' + return filename + + @patch_policies class TestDiskFileModuleMethods(unittest.TestCase): @@ -1424,58 +1445,54 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): mgr_cls = diskfile.ECDiskFileManager - def test_get_ondisk_files_with_ec_policy(self): + def test_get_ondisk_files_with_ec_policy_and_legacy_durable(self): # Each scenario specifies a list of (filename, extension, [survives]) # tuples. If extension is set then that filename should be returned by # the method under test for that extension type. If the optional # 'survives' is True, the filename should still be in the dir after # cleanup. - scenarios = [[('0000000007.00000.ts', '.ts')], + scenarios = [ + # highest frag index is chosen by default + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#1.data', '.data'), + ('0000000007.00000#0.data', False, True)], - [('0000000007.00000.ts', '.ts'), - ('0000000006.00000.ts', False)], + # data older than durable is ignored + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#1.data', '.data'), + ('0000000006.00000#1.data', False), + ('0000000004.00000.ts', False)], - # highest frag index is chosen by default - [('0000000007.00000.durable', '.durable'), - ('0000000007.00000#1.data', '.data'), - ('0000000007.00000#0.data', False, True)], + # data older than durable ignored, even if its only data + [('0000000007.00000.durable', False, False), + ('0000000006.00000#1.data', False), + ('0000000004.00000.ts', False)], - # data older than durable is ignored - [('0000000007.00000.durable', '.durable'), - ('0000000007.00000#1.data', '.data'), - ('0000000006.00000#1.data', False), - ('0000000004.00000.ts', False)], + # newer meta trumps older meta + [('0000000009.00000.meta', '.meta'), + ('0000000008.00000.meta', False), + ('0000000007.00000.durable', '.durable'), + ('0000000007.00000#14.data', '.data'), + ('0000000004.00000.ts', False)], - # data older than durable ignored, even if its only data - [('0000000007.00000.durable', False, False), - ('0000000006.00000#1.data', False), - ('0000000004.00000.ts', False)], + # older meta is ignored + [('0000000007.00000.durable', '.durable'), + ('0000000007.00000#14.data', '.data'), + ('0000000006.00000.meta', False), + ('0000000004.00000.ts', False)], - # newer meta trumps older meta - [('0000000009.00000.meta', '.meta'), - ('0000000008.00000.meta', False), - ('0000000007.00000.durable', '.durable'), - ('0000000007.00000#14.data', '.data'), - ('0000000004.00000.ts', False)], + # tombstone trumps meta, data, durable at older timestamp + [('0000000006.00000.ts', '.ts'), + ('0000000005.00000.meta', False), + ('0000000004.00000.durable', False), + ('0000000004.00000#0.data', False)], - # older meta is ignored - [('0000000007.00000.durable', '.durable'), - ('0000000007.00000#14.data', '.data'), - ('0000000006.00000.meta', False), - ('0000000004.00000.ts', False)], - - # tombstone trumps meta, data, durable at older timestamp - [('0000000006.00000.ts', '.ts'), - ('0000000005.00000.meta', False), - ('0000000004.00000.durable', False), - ('0000000004.00000#0.data', False)], - - # tombstone trumps meta, data, durable at same timestamp - [('0000000006.00000.meta', False), - ('0000000006.00000.ts', '.ts'), - ('0000000006.00000.durable', False), - ('0000000006.00000#0.data', False)], - ] + # tombstone trumps meta, data, durable at same timestamp + [('0000000006.00000.meta', False), + ('0000000006.00000.ts', '.ts'), + ('0000000006.00000.durable', False), + ('0000000006.00000#0.data', False)] + ] # these scenarios have same outcome regardless of whether any # fragment preferences are specified @@ -1548,10 +1565,114 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): frag_index=None, frag_prefs=[]) self._test_cleanup_ondisk_files(scenarios, POLICIES.default) - def test_get_ondisk_files_with_ec_policy_and_frag_index(self): - # Each scenario specifies a list of (filename, extension) tuples. If - # extension is set then that filename should be returned by the method - # under test for that extension type. + def test_get_ondisk_files_with_ec_policy(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set then that filename should be returned by + # the method under test for that extension type. If the optional + # 'survives' is True, the filename should still be in the dir after + # cleanup. + scenarios = [[('0000000007.00000.ts', '.ts')], + + [('0000000007.00000.ts', '.ts'), + ('0000000006.00000.ts', False)], + + # highest frag index is chosen by default + [('0000000007.00000#1#d.data', '.data'), + ('0000000007.00000#0.data', False, True)], + + # data older than durable is ignored + [('0000000007.00000#1#d.data', '.data'), + ('0000000006.00000#1.data', False), + ('0000000004.00000.ts', False)], + + # newer meta trumps older meta + [('0000000009.00000.meta', '.meta'), + ('0000000008.00000.meta', False), + ('0000000007.00000#14#d.data', '.data'), + ('0000000004.00000.ts', False)], + + # older meta is ignored + [('0000000007.00000#14#d.data', '.data'), + ('0000000006.00000.meta', False), + ('0000000004.00000.ts', False)], + + # tombstone trumps meta and data at older timestamp + [('0000000006.00000.ts', '.ts'), + ('0000000005.00000.meta', False), + ('0000000004.00000#0#d.data', False)], + + # tombstone trumps meta and data at same timestamp + [('0000000006.00000.meta', False), + ('0000000006.00000.ts', '.ts'), + ('0000000006.00000#0#d.data', False)], + ] + + # these scenarios have same outcome regardless of whether any + # fragment preferences are specified + self._test_get_ondisk_files(scenarios, POLICIES.default, + frag_index=None) + self._test_get_ondisk_files(scenarios, POLICIES.default, + frag_index=None, frag_prefs=[]) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + self._test_yield_hashes_cleanup(scenarios, POLICIES.default) + + # next scenarios have different outcomes dependent on whether a + # frag_prefs parameter is passed to diskfile constructor or not + scenarios = [ + # non-durable is ignored + [('0000000007.00000#0.data', False, True)], + + # non-durable data newer than tombstone is ignored + [('0000000007.00000#0.data', False, True), + ('0000000006.00000.ts', '.ts', True)], + + # data newer than durable data is ignored + [('0000000009.00000#2.data', False, True), + ('0000000009.00000#1.data', False, True), + ('0000000008.00000#3.data', False, True), + ('0000000007.00000#1#d.data', '.data'), + ('0000000007.00000#0#d.data', False, True)], + + # non-durable data ignored, older meta deleted + [('0000000007.00000.meta', False, True), + ('0000000006.00000#0.data', False, True), + ('0000000005.00000.meta', False, False), + ('0000000004.00000#1.data', False, True)]] + + self._test_get_ondisk_files(scenarios, POLICIES.default, + frag_index=None) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + + scenarios = [ + # non-durable data is chosen + [('0000000007.00000#0.data', '.data', True)], + + # non-durable data newer than tombstone is chosen + [('0000000007.00000#0.data', '.data', True), + ('0000000006.00000.ts', False, True)], + + # non-durable data newer than durable data is chosen, older data + # preserved + [('0000000009.00000#2.data', '.data', True), + ('0000000009.00000#1.data', False, True), + ('0000000008.00000#3.data', False, True), + ('0000000007.00000#1#d.data', False, True), + ('0000000007.00000#0#d.data', False, True)], + + # non-durable data plus meta chosen, older meta deleted + [('0000000007.00000.meta', '.meta', True), + ('0000000006.00000#0.data', '.data', True), + ('0000000005.00000.meta', False, False), + ('0000000004.00000#1.data', False, True)]] + + self._test_get_ondisk_files(scenarios, POLICIES.default, + frag_index=None, frag_prefs=[]) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + + def test_get_ondisk_files_with_ec_policy_and_frag_index_legacy(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set then that filename should be returned by + # the method under test for that extension type. scenarios = [[('0000000007.00000#2.data', False, True), ('0000000007.00000#1.data', '.data'), ('0000000007.00000#0.data', False, True), @@ -1626,6 +1747,170 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): frag_prefs=[]) self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + def test_get_ondisk_files_with_ec_policy_and_frag_index(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set then that filename should be returned by + # the method under test for that extension type. + scenarios = [[('0000000007.00000#2#d.data', False, True), + ('0000000007.00000#1#d.data', '.data'), + ('0000000007.00000#0#d.data', False, True)], + + # specific frag index 1 is returned as long as one durable + [('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', '.data', True), + ('0000000007.00000#0#d.data', False, True)], + + # specific frag newer than durable data is ignored + [('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', False, True), + ('0000000007.00000#0.data', False, True), + ('0000000006.00000#0#d.data', False, True)], + + # specific frag older than durable data is ignored + [('0000000007.00000#2.data', False), + ('0000000007.00000#1.data', False), + ('0000000007.00000#0.data', False), + ('0000000008.00000#0#d.data', False, True)], + + # specific frag older than newest durable data is ignored + # even if is durable + [('0000000007.00000#2#d.data', False), + ('0000000007.00000#1#d.data', False), + ('0000000008.00000#0#d.data', False, True)], + + # meta included when frag index is specified + [('0000000009.00000.meta', '.meta'), + ('0000000007.00000#2#d.data', False, True), + ('0000000007.00000#1#d.data', '.data'), + ('0000000007.00000#0#d.data', False, True)], + + # specific frag older than tombstone is ignored + [('0000000009.00000.ts', '.ts'), + ('0000000007.00000#2#d.data', False), + ('0000000007.00000#1#d.data', False), + ('0000000007.00000#0#d.data', False)], + + # no data file returned if specific frag index missing + [('0000000007.00000#2#d.data', False, True), + ('0000000007.00000#14#d.data', False, True), + ('0000000007.00000#0#d.data', False, True)], + + # meta ignored if specific frag index missing + [('0000000008.00000.meta', False, True), + ('0000000007.00000#14#d.data', False, True), + ('0000000007.00000#0#d.data', False, True)], + + # meta ignored if no data files + # Note: this is anomalous, because we are specifying a + # frag_index, get_ondisk_files will tolerate .meta with + # no .data + [('0000000088.00000.meta', False, True)] + ] + + self._test_get_ondisk_files(scenarios, POLICIES.default, frag_index=1) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + + # scenarios for empty frag_prefs, meaning durable not required + scenarios = [ + # specific frag newer than durable is chosen + [('0000000007.00000#2.data', False, True), + ('0000000007.00000#1.data', '.data', True), + ('0000000007.00000#0.data', False, True)], + ] + self._test_get_ondisk_files(scenarios, POLICIES.default, frag_index=1, + frag_prefs=[]) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + + def test_get_ondisk_files_with_ec_policy_some_legacy(self): + # Test mixture of legacy durable files and durable data files that + # might somehow end up in the same object dir. + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set then that filename should be returned by + # the method under test for that extension type. If the optional + # 'survives' is True, the filename should still be in the dir after + # cleanup. + scenarios = [ + # .durable at same timestamp is ok + [('0000000007.00000#1#d.data', '.data', True), + ('0000000007.00000#0#d.data', False, True), + ('0000000007.00000.durable', False, True)], + + # .durable at same timestamp is ok with non durable wanted frag + [('0000000007.00000#1.data', '.data', True), + ('0000000007.00000#0#d.data', False, True), + ('0000000007.00000.durable', False, True)], + + # older .durable file is cleaned up + [('0000000007.00000#1#d.data', '.data', True), + ('0000000007.00000#0#d.data', False, True), + ('0000000006.00000.durable', False, False)], + + # older .durable does not interfere with non durable wanted frag + [('0000000007.00000#1.data', '.data', True), + ('0000000007.00000#0#d.data', False, True), + ('0000000006.00000.durable', False, False)], + + # ...even if it has accompanying .data file + [('0000000007.00000#1.data', '.data', True), + ('0000000007.00000#0#d.data', False, True), + ('0000000006.00000#0.data', False, False), + ('0000000006.00000.durable', False, False)], + + # newer .durable file trumps older durable-data + [('0000000007.00000#1#d.data', False, False), + ('0000000007.00000#0#d.data', False, False), + ('0000000008.00000#1.data', '.data', True), + ('0000000008.00000.durable', False, True)], + + # newer .durable file with no .data trumps older durable-data + [('0000000007.00000#1#d.data', False, False), + ('0000000007.00000#0#d.data', False, False), + ('0000000008.00000.durable', False, False)], + ] + + self._test_get_ondisk_files(scenarios, POLICIES.default, frag_index=1) + self._test_cleanup_ondisk_files(scenarios, POLICIES.default) + self._test_yield_hashes_cleanup(scenarios, POLICIES.default) + + def test_cleanup_ondisk_files_reclaim_with_data_files_legacy_durable(self): + # Each scenario specifies a list of (filename, extension, [survives]) + # tuples. If extension is set or 'survives' is True, the filename + # should still be in the dir after cleanup. + much_older = Timestamp(time() - 2000).internal + older = Timestamp(time() - 1001).internal + newer = Timestamp(time() - 900).internal + scenarios = [ + # isolated legacy .durable is cleaned up immediately + [('%s.durable' % newer, False, False)], + + # ...even when other older files are in dir + [('%s.durable' % older, False, False), + ('%s.ts' % much_older, False, False)], + + # isolated .data files are cleaned up when stale + # ...even when there is an older legacy durable + [('%s#2.data' % older, False, False), + ('%s#4.data' % older, False, False), + ('%s#2.data' % much_older, '.data', True), + ('%s#4.data' % much_older, False, True), + ('%s.durable' % much_older, '.durable', True)], + + # tombstone reclaimed despite much older legacy durable + [('%s.ts' % older, '.ts', False), + ('%s.durable' % much_older, False, False)], + + # .meta not reclaimed if there is legacy durable data + [('%s.meta' % older, '.meta', True), + ('%s#4.data' % much_older, False, True), + ('%s.durable' % much_older, '.durable', True)], + + # stale .meta reclaimed along with stale legacy .durable + [('%s.meta' % older, False, False), + ('%s.durable' % much_older, False, False)]] + + self._test_cleanup_ondisk_files(scenarios, POLICIES.default, + reclaim_age=1000) + def test_cleanup_ondisk_files_reclaim_with_data_files(self): # Each scenario specifies a list of (filename, extension, [survives]) # tuples. If extension is set or 'survives' is True, the filename @@ -1634,13 +1919,6 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): older = Timestamp(time() - 1001).internal newer = Timestamp(time() - 900).internal scenarios = [ - # isolated .durable is cleaned up immediately - [('%s.durable' % newer, False, False)], - - # ...even when other older files are in dir - [('%s.durable' % older, False, False), - ('%s.ts' % much_older, False, False)], - # isolated .data files are cleaned up when stale [('%s#2.data' % older, False, False), ('%s#4.data' % older, False, False)], @@ -1648,9 +1926,8 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): # ...even when there is an older durable fileset [('%s#2.data' % older, False, False), ('%s#4.data' % older, False, False), - ('%s#2.data' % much_older, '.data', True), - ('%s#4.data' % much_older, False, True), - ('%s.durable' % much_older, '.durable', True)], + ('%s#2#d.data' % much_older, '.data', True), + ('%s#4#d.data' % much_older, False, True)], # ... but preserved if still fresh [('%s#2.data' % newer, False, True), @@ -1667,20 +1944,15 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): # tombstone reclaimed despite much older durable [('%s.ts' % older, '.ts', False), - ('%s.durable' % much_older, False, False)], + ('%s#4#d.data' % much_older, False, False)], # .meta not reclaimed if there is durable data - [('%s.meta' % older, '.meta'), - ('%s#4.data' % much_older, False, True), - ('%s.durable' % much_older, '.durable', True)], + [('%s.meta' % older, '.meta', True), + ('%s#4#d.data' % much_older, False, True)], # stale .meta reclaimed along with stale non-durable .data [('%s.meta' % older, False, False), - ('%s#4.data' % much_older, False, False)], - - # stale .meta reclaimed along with stale .durable - [('%s.meta' % older, False, False), - ('%s.durable' % much_older, False, False)]] + ('%s#4.data' % much_older, False, False)]] self._test_cleanup_ondisk_files(scenarios, POLICIES.default, reclaim_age=1000) @@ -1702,8 +1974,8 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): # sanity files = [ - '0000000006.00000#1.data', - '0000000006.00000.durable', + '0000000006.00000.meta', + '0000000006.00000#1#d.data' ] with create_files(class_under_test, files): class_under_test.open() @@ -1711,13 +1983,13 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): scenarios = [['0000000007.00000.meta'], ['0000000007.00000.meta', - '0000000006.00000.durable'], + '0000000006.00000.durable'], # legacy durable file ['0000000007.00000.meta', '0000000006.00000#1.data'], ['0000000007.00000.meta', - '0000000006.00000.durable', + '0000000006.00000.durable', # legacy durable file '0000000005.00000#1.data'] ] for files in scenarios: @@ -1768,14 +2040,38 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): mgr = self.df_router[POLICIES.default] for ts in (Timestamp('1234567890.00001'), Timestamp('1234567890.00001', offset=17)): + # non-durable data file for frag in (0, 2, 14): fname = '%s#%s.data' % (ts.internal, frag) info = mgr.parse_on_disk_filename(fname) self.assertEqual(ts, info['timestamp']) self.assertEqual('.data', info['ext']) self.assertEqual(frag, info['frag_index']) + self.assertIs(False, info['durable']) self.assertEqual(mgr.make_on_disk_filename(**info), fname) + # durable data file + for frag in (0, 2, 14): + fname = '%s#%s#d.data' % (ts.internal, frag) + info = mgr.parse_on_disk_filename(fname) + self.assertEqual(ts, info['timestamp']) + self.assertEqual('.data', info['ext']) + self.assertEqual(frag, info['frag_index']) + self.assertIs(True, info['durable']) + self.assertEqual(mgr.make_on_disk_filename(**info), fname) + + # data file with unexpected suffix marker, not an error in case + # alternative marker suffixes added in future + for frag in (0, 2, 14): + fname = '%s#%s#junk.data' % (ts.internal, frag) + info = mgr.parse_on_disk_filename(fname) + self.assertEqual(ts, info['timestamp']) + self.assertEqual('.data', info['ext']) + self.assertEqual(frag, info['frag_index']) + self.assertIs(False, info['durable']) + expected = '%s#%s.data' % (ts.internal, frag) + self.assertEqual(mgr.make_on_disk_filename(**info), expected) + for ext in ('.meta', '.durable', '.ts'): fname = '%s%s' % (ts.internal, ext) info = mgr.parse_on_disk_filename(fname) @@ -1804,12 +2100,20 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): 'None': 'bad', } + # non-durable data file for frag, msg in expected.items(): fname = '%s#%s.data' % (ts.internal, frag) with self.assertRaises(DiskFileError) as cm: mgr.parse_on_disk_filename(fname) self.assertIn(msg, str(cm.exception).lower()) + # durable data file + for frag, msg in expected.items(): + fname = '%s#%s#d.data' % (ts.internal, frag) + with self.assertRaises(DiskFileError) as cm: + mgr.parse_on_disk_filename(fname) + self.assertIn(msg, str(cm.exception).lower()) + with self.assertRaises(DiskFileError) as cm: mgr.parse_on_disk_filename('junk') self.assertEqual("Invalid Timestamp value in filename 'junk'", @@ -1820,43 +2124,46 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): for ts in (Timestamp('1234567890.00001'), Timestamp('1234567890.00001', offset=17)): for frag in (0, '0', 2, '2', 14, '14'): - expected = '%s#%s.data' % (ts.internal, frag) - actual = mgr.make_on_disk_filename( - ts, '.data', frag_index=frag) - self.assertEqual(expected, actual) - parsed = mgr.parse_on_disk_filename(actual) - self.assertEqual(parsed, { - 'timestamp': ts, - 'frag_index': int(frag), - 'ext': '.data', - 'ctype_timestamp': None - }) - # these functions are inverse - self.assertEqual( - mgr.make_on_disk_filename(**parsed), - expected) - - for ext in ('.meta', '.durable', '.ts'): - expected = '%s%s' % (ts.internal, ext) - # frag index should not be required - actual = mgr.make_on_disk_filename(ts, ext) - self.assertEqual(expected, actual) - # frag index should be ignored + for durable in (True, False): + expected = _make_datafilename( + ts, POLICIES.default, frag_index=frag, durable=durable) actual = mgr.make_on_disk_filename( - ts, ext, frag_index=frag) + ts, '.data', frag_index=frag, durable=durable) self.assertEqual(expected, actual) parsed = mgr.parse_on_disk_filename(actual) self.assertEqual(parsed, { 'timestamp': ts, - 'frag_index': None, - 'ext': ext, - 'ctype_timestamp': None + 'frag_index': int(frag), + 'ext': '.data', + 'ctype_timestamp': None, + 'durable': durable }) # these functions are inverse self.assertEqual( mgr.make_on_disk_filename(**parsed), expected) + for ext in ('.meta', '.durable', '.ts'): + expected = '%s%s' % (ts.internal, ext) + # frag index should not be required + actual = mgr.make_on_disk_filename(ts, ext) + self.assertEqual(expected, actual) + # frag index should be ignored + actual = mgr.make_on_disk_filename( + ts, ext, frag_index=frag) + self.assertEqual(expected, actual) + parsed = mgr.parse_on_disk_filename(actual) + self.assertEqual(parsed, { + 'timestamp': ts, + 'frag_index': None, + 'ext': ext, + 'ctype_timestamp': None + }) + # these functions are inverse + self.assertEqual( + mgr.make_on_disk_filename(**parsed), + expected) + actual = mgr.make_on_disk_filename(ts) self.assertEqual(ts, actual) @@ -1906,7 +2213,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): mgr.make_on_disk_filename(**parsed), expected) - def test_yield_hashes(self): + def test_yield_hashes_legacy_durable(self): old_ts = '1383180000.12345' fresh_ts = Timestamp(time() - 10).internal fresher_ts = Timestamp(time() - 1).internal @@ -1934,7 +2241,33 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=2) - def test_yield_hashes_yields_meta_timestamp(self): + def test_yield_hashes(self): + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '#2#d.data'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '#2#d.data'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b4abc': {'ts_data': fresh_ts}, + '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts}, + '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_yields_meta_timestamp_legacy_durable(self): ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) ts1 = next(ts_iter) ts2 = next(ts_iter) @@ -1974,7 +2307,45 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=3) - def test_yield_hashes_suffix_filter(self): + def test_yield_hashes_yields_meta_timestamp(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + ts1.internal + '.ts', + ts2.internal + '.meta'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2#d.data', + ts2.internal + '.meta', + ts3.internal + '.meta'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '#2#d.data', + ts2.internal + '.meta'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b4abc': {'ts_data': ts1}, + '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1, + 'ts_meta': ts3}, + '9373a92d072897b136b3fc06595b7456': {'ts_data': ts1, + 'ts_meta': ts2}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected) + + # but meta timestamp is *not* returned if specified frag index + # is not found + expected = { + '9373a92d072897b136b3fc06595b4abc': {'ts_data': ts1}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=3) + + def test_yield_hashes_suffix_filter_legacy_durable(self): # test again with limited suffixes old_ts = '1383180000.12345' fresh_ts = Timestamp(time() - 10).internal @@ -2002,7 +2373,60 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, suffixes=['456'], frag_index=2) - def test_yield_hashes_skips_missing_durable(self): + def test_yield_hashes_suffix_filter(self): + # test again with limited suffixes + old_ts = '1383180000.12345' + fresh_ts = Timestamp(time() - 10).internal + fresher_ts = Timestamp(time() - 1).internal + suffix_map = { + 'abc': { + '9373a92d072897b136b3fc06595b4abc': [ + fresh_ts + '.ts'], + }, + '456': { + '9373a92d072897b136b3fc06595b0456': [ + old_ts + '#2#d.data'], + '9373a92d072897b136b3fc06595b7456': [ + fresh_ts + '.ts', + fresher_ts + '#2#d.data'], + }, + 'def': {}, + } + expected = { + '9373a92d072897b136b3fc06595b0456': {'ts_data': old_ts}, + '9373a92d072897b136b3fc06595b7456': {'ts_data': fresher_ts}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + suffixes=['456'], frag_index=2) + + def test_yield_hashes_skips_non_durable_data(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2#d.data'], + '9373a92d072897b136b3fc06595b7456': [ + ts1.internal + '#2.data'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + # if we add a durable it shows up + suffix_map['456']['9373a92d072897b136b3fc06595b7456'] = [ + ts1.internal + '#2#d.data'] + expected = { + '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1}, + '9373a92d072897b136b3fc06595b7456': {'ts_data': ts1}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_skips_missing_legacy_durable(self): ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) ts1 = next(ts_iter) suffix_map = { @@ -2030,7 +2454,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=2) - def test_yield_hashes_skips_data_without_durable(self): + def test_yield_hashes_skips_newer_data_without_legacy_durable(self): ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) ts1 = next(ts_iter) ts2 = next(ts_iter) @@ -2063,6 +2487,44 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=2) + def test_yield_hashes_skips_newer_non_durable_data(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2#d.data', + ts2.internal + '#2.data', + ts3.internal + '#2.data'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': {'ts_data': ts1}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=None) + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + # if we make it durable then newer data shows up + suffix_map = { + '456': { + '9373a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2#d.data', + ts2.internal + '#2#d.data', + ts3.internal + '#2.data'], + }, + } + expected = { + '9373a92d072897b136b3fc06595b0456': {'ts_data': ts2}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=None) + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + def test_yield_hashes_ignores_bad_ondisk_filesets(self): # this differs from DiskFileManager.yield_hashes which will fail # when encountering a bad on-disk file set @@ -2073,6 +2535,9 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): '456': { # this one is fine '9333a92d072897b136b3fc06595b0456': [ + ts1.internal + '#2#d.data'], + # this one is fine, legacy durable + '9333a92d072897b136b3fc06595b1456': [ ts1.internal + '#2.data', ts1.internal + '.durable'], # missing frag index @@ -2081,7 +2546,7 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): # junk '9555a92d072897b136b3fc06595b8456': [ 'junk_file'], - # missing .durable + # not durable '9666a92d072897b136b3fc06595b9456': [ ts1.internal + '#2.data', ts2.internal + '.meta'], @@ -2094,11 +2559,18 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): ts2.internal + '.meta'], # this is good with meta '9999a92d072897b136b3fc06595bb456': [ + ts1.internal + '#2#d.data', + ts2.internal + '.meta'], + # this is good with meta, legacy durable + '9999a92d072897b136b3fc06595bc456': [ ts1.internal + '#2.data', ts1.internal + '.durable', ts2.internal + '.meta'], # this one is wrong frag index '9aaaa92d072897b136b3fc06595b0456': [ + ts1.internal + '#7#d.data'], + # this one is wrong frag index, legacy durable + '9aaaa92d072897b136b3fc06595b1456': [ ts1.internal + '#7.data', ts1.internal + '.durable'], }, @@ -2107,11 +2579,51 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): '9333a92d072897b136b3fc06595b0456': {'ts_data': ts1}, '9999a92d072897b136b3fc06595bb456': {'ts_data': ts1, 'ts_meta': ts2}, + '9333a92d072897b136b3fc06595b1456': {'ts_data': ts1}, + '9999a92d072897b136b3fc06595bc456': {'ts_data': ts1, + 'ts_meta': ts2}, } self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=2) def test_yield_hashes_filters_frag_index(self): + ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) + ts1 = next(ts_iter) + ts2 = next(ts_iter) + ts3 = next(ts_iter) + suffix_map = { + '27e': { + '1111111111111111111111111111127e': [ + ts1.internal + '#2#d.data', + ts1.internal + '#3#d.data', + ], + '2222222222222222222222222222227e': [ + ts1.internal + '#2#d.data', + ts2.internal + '#2#d.data', + ], + }, + 'd41': { + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaad41': [ + ts1.internal + '#3#d.data', + ], + }, + '00b': { + '3333333333333333333333333333300b': [ + ts1.internal + '#2.data', + ts2.internal + '#2.data', + ts3.internal + '#2#d.data', + ], + }, + } + expected = { + '1111111111111111111111111111127e': {'ts_data': ts1}, + '2222222222222222222222222222227e': {'ts_data': ts2}, + '3333333333333333333333333333300b': {'ts_data': ts3}, + } + self._check_yield_hashes(POLICIES.default, suffix_map, expected, + frag_index=2) + + def test_yield_hashes_filters_frag_index_legacy_durable(self): ts_iter = (Timestamp(t) for t in itertools.count(int(time()))) ts1 = next(ts_iter) ts2 = next(ts_iter) @@ -2153,27 +2665,17 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self._check_yield_hashes(POLICIES.default, suffix_map, expected, frag_index=2) - def test_get_diskfile_from_hash_frag_index_filter(self): + def _test_get_diskfile_from_hash_frag_index_filter(self, legacy_durable): df = self._get_diskfile(POLICIES.default) hash_ = os.path.basename(df._datadir) self.assertRaises(DiskFileNotExist, self.df_mgr.get_diskfile_from_hash, self.existing_device1, '0', hash_, POLICIES.default) # sanity - frag_index = 7 timestamp = Timestamp(time()) for frag_index in (4, 7): - with df.create() as writer: - data = 'test_data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': timestamp.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': str(frag_index), - } - writer.put(metadata) - writer.commit(timestamp) + write_diskfile(df, timestamp, frag_index=frag_index, + legacy_durable=legacy_durable) df4 = self.df_mgr.get_diskfile_from_hash( self.existing_device1, '0', hash_, POLICIES.default, frag_index=4) @@ -2186,6 +2688,12 @@ class TestECDiskFileManager(DiskFileManagerMixin, unittest.TestCase): self.assertEqual( df7.read_metadata()['X-Object-Sysmeta-Ec-Frag-Index'], '7') + def test_get_diskfile_from_hash_frag_index_filter(self): + self._test_get_diskfile_from_hash_frag_index_filter(False) + + def test_get_diskfile_from_hash_frag_index_filter_legacy_durable(self): + self._test_get_diskfile_from_hash_frag_index_filter(True) + class DiskFileMixin(BaseDiskFileTestMixin): @@ -2224,7 +2732,7 @@ class DiskFileMixin(BaseDiskFileTestMixin): def _create_ondisk_file(self, df, data, timestamp, metadata=None, ctype_timestamp=None, - ext='.data'): + ext='.data', legacy_durable=False): mkdirs(df._datadir) if timestamp is None: timestamp = time() @@ -2243,7 +2751,14 @@ class DiskFileMixin(BaseDiskFileTestMixin): metadata['Content-Length'] = str(len(data)) filename = timestamp.internal if ext == '.data' and df.policy.policy_type == EC_POLICY: - filename = '%s#%s' % (timestamp.internal, df._frag_index) + if legacy_durable: + filename = '%s#%s' % (timestamp.internal, df._frag_index) + durable_file = os.path.join(df._datadir, + '%s.durable' % timestamp.internal) + with open(durable_file, 'wb') as f: + pass + else: + filename = '%s#%s#d' % (timestamp.internal, df._frag_index) if ctype_timestamp: metadata.update( {'Content-Type-Timestamp': @@ -3165,61 +3680,61 @@ class DiskFileMixin(BaseDiskFileTestMixin): DiskFileNoSpace, diskfile.write_metadata, 'n/a', metadata) - def _create_diskfile_dir(self, timestamp, policy): + def _create_diskfile_dir(self, timestamp, policy, legacy_durable=False): timestamp = Timestamp(timestamp) df = self._simple_get_diskfile(account='a', container='c', obj='o_%s' % policy, policy=policy) - - with df.create() as writer: - metadata = { - 'ETag': 'bogus_etag', - 'X-Timestamp': timestamp.internal, - 'Content-Length': '0', - } - if policy.policy_type == EC_POLICY: - metadata['X-Object-Sysmeta-Ec-Frag-Index'] = \ - df._frag_index or 7 - writer.put(metadata) - writer.commit(timestamp) - return writer._datadir + frag_index = None + if policy.policy_type == EC_POLICY: + frag_index = df._frag_index or 7 + write_diskfile(df, timestamp, frag_index=frag_index, + legacy_durable=legacy_durable) + return df._datadir def test_commit(self): for policy in POLICIES: - # create first fileset as starting state - timestamp = Timestamp(time()).internal - datadir = self._create_diskfile_dir(timestamp, policy) - dl = os.listdir(datadir) - expected = ['%s.data' % timestamp] - if policy.policy_type == EC_POLICY: - expected = ['%s#2.data' % timestamp, - '%s.durable' % timestamp] + timestamp = Timestamp(time()) + df = self._simple_get_diskfile(account='a', container='c', + obj='o_%s' % policy, + policy=policy) + write_diskfile(df, timestamp, frag_index=2) + dl = os.listdir(df._datadir) + expected = [_make_datafilename( + timestamp, policy, frag_index=2, durable=True)] self.assertEqual(len(dl), len(expected), 'Unexpected dir listing %s' % dl) - self.assertEqual(sorted(expected), sorted(dl)) + self.assertEqual(expected, dl) + if policy.policy_type == EC_POLICY: + self.assertEqual(2, df._frag_index) + + def _do_test_write_cleanup(self, policy, legacy_durable=False): + # create first fileset as starting state + timestamp_1 = Timestamp(time()) + datadir_1 = self._create_diskfile_dir( + timestamp_1, policy, legacy_durable) + # second write should clean up first fileset + timestamp_2 = Timestamp(time() + 1) + datadir_2 = self._create_diskfile_dir(timestamp_2, policy) + # sanity check + self.assertEqual(datadir_1, datadir_2) + dl = os.listdir(datadir_2) + expected = [_make_datafilename( + timestamp_2, policy, frag_index=2, durable=True)] + self.assertEqual(len(dl), len(expected), + 'Unexpected dir listing %s' % dl) + self.assertEqual(expected, dl) def test_write_cleanup(self): for policy in POLICIES: - # create first fileset as starting state - timestamp_1 = Timestamp(time()).internal - datadir_1 = self._create_diskfile_dir(timestamp_1, policy) - # second write should clean up first fileset - timestamp_2 = Timestamp(time() + 1).internal - datadir_2 = self._create_diskfile_dir(timestamp_2, policy) - # sanity check - self.assertEqual(datadir_1, datadir_2) - dl = os.listdir(datadir_2) - expected = ['%s.data' % timestamp_2] - if policy.policy_type == EC_POLICY: - expected = ['%s#2.data' % timestamp_2, - '%s.durable' % timestamp_2] - self.assertEqual(len(dl), len(expected), - 'Unexpected dir listing %s' % dl) - self.assertEqual(sorted(expected), sorted(dl)) + self._do_test_write_cleanup(policy) - def test_commit_fsync(self): + def test_write_cleanup_legacy_durable(self): + for policy in POLICIES: + self._do_test_write_cleanup(policy, legacy_durable=True) + + def test_commit_no_extra_fsync(self): for policy in POLICIES: - mock_fsync = mock.MagicMock() df = self._simple_get_diskfile(account='a', container='c', obj='o', policy=policy) @@ -3230,16 +3745,13 @@ class DiskFileMixin(BaseDiskFileTestMixin): 'X-Timestamp': timestamp.internal, 'Content-Length': '0', } - writer.put(metadata) - with mock.patch('swift.obj.diskfile.fsync', mock_fsync): + with mock.patch('swift.obj.diskfile.fsync') as mock_fsync: + writer.put(metadata) + self.assertEqual(1, mock_fsync.call_count) writer.commit(timestamp) - expected = { - EC_POLICY: 1, - REPL_POLICY: 0, - }[policy.policy_type] - self.assertEqual(expected, mock_fsync.call_count) + self.assertEqual(1, mock_fsync.call_count) if policy.policy_type == EC_POLICY: - self.assertTrue(isinstance(mock_fsync.call_args[0][0], int)) + self.assertIsInstance(mock_fsync.call_args[0][0], int) def test_commit_ignores_cleanup_ondisk_files_error(self): for policy in POLICIES: @@ -3264,14 +3776,17 @@ class DiskFileMixin(BaseDiskFileTestMixin): REPL_POLICY: 0, }[policy.policy_type] self.assertEqual(expected, mock_cleanup.call_count) - expected = ['%s.data' % timestamp.internal] - if policy.policy_type == EC_POLICY: - expected = ['%s#2.data' % timestamp.internal, - '%s.durable' % timestamp.internal] + if expected: + self.assertIn( + 'Problem cleaning up', + df.manager.logger.get_lines_for_level('error')[0]) + + expected = [_make_datafilename( + timestamp, policy, frag_index=2, durable=True)] dl = os.listdir(df._datadir) self.assertEqual(len(dl), len(expected), 'Unexpected dir listing %s' % dl) - self.assertEqual(sorted(expected), sorted(dl)) + self.assertEqual(expected, dl) def test_number_calls_to_cleanup_ondisk_files_during_create(self): # Check how many calls are made to cleanup_ondisk_files, and when, @@ -3466,15 +3981,14 @@ class DiskFileMixin(BaseDiskFileTestMixin): else: self.fail("Expected DiskFileDeleted exception") - def test_ondisk_search_loop_meta_data_ts(self): + def _test_ondisk_search_loop_meta_data_ts(self, legacy_durable=False): df = self._simple_get_diskfile() self._create_ondisk_file(df, '', ext='.meta', timestamp=10) self._create_ondisk_file(df, '', ext='.meta', timestamp=9) - self._create_ondisk_file(df, 'B', ext='.data', timestamp=8) - self._create_ondisk_file(df, 'A', ext='.data', timestamp=7) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=8) - self._create_ondisk_file(df, '', ext='.durable', timestamp=7) + self._create_ondisk_file( + df, 'B', ext='.data', legacy_durable=legacy_durable, timestamp=8) + self._create_ondisk_file( + df, 'A', ext='.data', legacy_durable=legacy_durable, timestamp=7) self._create_ondisk_file(df, '', ext='.ts', timestamp=6) self._create_ondisk_file(df, '', ext='.ts', timestamp=5) df = self._simple_get_diskfile() @@ -3484,7 +3998,14 @@ class DiskFileMixin(BaseDiskFileTestMixin): Timestamp(10).internal) self.assertNotIn('deleted', df._metadata) - def test_ondisk_search_loop_multiple_meta_data(self): + def test_ondisk_search_loop_meta_data_ts(self): + self._test_ondisk_search_loop_meta_data_ts() + + def test_ondisk_search_loop_meta_data_ts_legacy_durable(self): + self._test_ondisk_search_loop_meta_data_ts(legacy_durable=True) + + def _test_ondisk_search_loop_multiple_meta_data(self, + legacy_durable=False): df = self._simple_get_diskfile() self._create_ondisk_file(df, '', ext='.meta', timestamp=10, metadata={'X-Object-Meta-User': 'user-meta'}) @@ -3492,13 +4013,12 @@ class DiskFileMixin(BaseDiskFileTestMixin): ctype_timestamp=9, metadata={'Content-Type': 'newest', 'X-Object-Meta-User': 'blah'}) - self._create_ondisk_file(df, 'B', ext='.data', timestamp=8, - metadata={'Content-Type': 'newer'}) - self._create_ondisk_file(df, 'A', ext='.data', timestamp=7, - metadata={'Content-Type': 'oldest'}) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=8) - self._create_ondisk_file(df, '', ext='.durable', timestamp=7) + self._create_ondisk_file( + df, 'B', ext='.data', legacy_durable=legacy_durable, timestamp=8, + metadata={'Content-Type': 'newer'}) + self._create_ondisk_file( + df, 'A', ext='.data', legacy_durable=legacy_durable, timestamp=7, + metadata={'Content-Type': 'oldest'}) df = self._simple_get_diskfile() with df.open(): self.assertTrue('X-Timestamp' in df._metadata) @@ -3509,7 +4029,13 @@ class DiskFileMixin(BaseDiskFileTestMixin): self.assertTrue('X-Object-Meta-User' in df._metadata) self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta') - def test_ondisk_search_loop_stale_meta_data(self): + def test_ondisk_search_loop_multiple_meta_data(self): + self._test_ondisk_search_loop_multiple_meta_data() + + def test_ondisk_search_loop_multiple_meta_data_legacy_durable(self): + self._test_ondisk_search_loop_multiple_meta_data(legacy_durable=True) + + def _test_ondisk_search_loop_stale_meta_data(self, legacy_durable=False): df = self._simple_get_diskfile() self._create_ondisk_file(df, '', ext='.meta', timestamp=10, metadata={'X-Object-Meta-User': 'user-meta'}) @@ -3517,10 +4043,9 @@ class DiskFileMixin(BaseDiskFileTestMixin): ctype_timestamp=7, metadata={'Content-Type': 'older', 'X-Object-Meta-User': 'blah'}) - self._create_ondisk_file(df, 'B', ext='.data', timestamp=8, - metadata={'Content-Type': 'newer'}) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=8) + self._create_ondisk_file( + df, 'B', ext='.data', legacy_durable=legacy_durable, timestamp=8, + metadata={'Content-Type': 'newer'}) df = self._simple_get_diskfile() with df.open(): self.assertTrue('X-Timestamp' in df._metadata) @@ -3531,13 +4056,43 @@ class DiskFileMixin(BaseDiskFileTestMixin): self.assertTrue('X-Object-Meta-User' in df._metadata) self.assertEqual(df._metadata['X-Object-Meta-User'], 'user-meta') - def test_ondisk_search_loop_data_ts_meta(self): + def test_ondisk_search_loop_stale_meta_data(self): + self._test_ondisk_search_loop_stale_meta_data() + + def test_ondisk_search_loop_stale_meta_data_legacy_durable(self): + self._test_ondisk_search_loop_stale_meta_data(legacy_durable=True) + + def _test_ondisk_search_loop_data_ts_meta(self, legacy_durable=False): df = self._simple_get_diskfile() - self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) - self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=10) - self._create_ondisk_file(df, '', ext='.durable', timestamp=9) + self._create_ondisk_file( + df, 'B', ext='.data', legacy_durable=legacy_durable, timestamp=10) + self._create_ondisk_file( + df, 'A', ext='.data', legacy_durable=legacy_durable, timestamp=9) + self._create_ondisk_file(df, '', ext='.ts', timestamp=8) + self._create_ondisk_file(df, '', ext='.ts', timestamp=7) + self._create_ondisk_file(df, '', ext='.meta', timestamp=6) + self._create_ondisk_file(df, '', ext='.meta', timestamp=5) + df = self._simple_get_diskfile() + with df.open(): + self.assertIn('X-Timestamp', df._metadata) + self.assertEqual(df._metadata['X-Timestamp'], + Timestamp(10).internal) + self.assertNotIn('deleted', df._metadata) + + def test_ondisk_search_loop_data_ts_meta(self): + self._test_ondisk_search_loop_data_ts_meta() + + def test_ondisk_search_loop_data_ts_meta_legacy_durable(self): + self._test_ondisk_search_loop_data_ts_meta(legacy_durable=True) + + def _test_ondisk_search_loop_wayward_files_ignored(self, + legacy_durable=False): + df = self._simple_get_diskfile() + self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11) + self._create_ondisk_file( + df, 'B', ext='.data', legacy_durable=legacy_durable, timestamp=10) + self._create_ondisk_file( + df, 'A', ext='.data', legacy_durable=legacy_durable, timestamp=9) self._create_ondisk_file(df, '', ext='.ts', timestamp=8) self._create_ondisk_file(df, '', ext='.ts', timestamp=7) self._create_ondisk_file(df, '', ext='.meta', timestamp=6) @@ -3550,25 +4105,13 @@ class DiskFileMixin(BaseDiskFileTestMixin): self.assertNotIn('deleted', df._metadata) def test_ondisk_search_loop_wayward_files_ignored(self): - df = self._simple_get_diskfile() - self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11) - self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) - self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=10) - self._create_ondisk_file(df, '', ext='.durable', timestamp=9) - self._create_ondisk_file(df, '', ext='.ts', timestamp=8) - self._create_ondisk_file(df, '', ext='.ts', timestamp=7) - self._create_ondisk_file(df, '', ext='.meta', timestamp=6) - self._create_ondisk_file(df, '', ext='.meta', timestamp=5) - df = self._simple_get_diskfile() - with df.open(): - self.assertIn('X-Timestamp', df._metadata) - self.assertEqual(df._metadata['X-Timestamp'], - Timestamp(10).internal) - self.assertNotIn('deleted', df._metadata) + self._test_ondisk_search_loop_wayward_files_ignored() - def test_ondisk_search_loop_listdir_error(self): + def test_ondisk_search_loop_wayward_files_ignored_legacy_durable(self): + self._test_ondisk_search_loop_wayward_files_ignored( + legacy_durable=True) + + def _test_ondisk_search_loop_listdir_error(self, legacy_durable=False): df = self._simple_get_diskfile() def mock_listdir_exp(*args, **kwargs): @@ -3576,11 +4119,10 @@ class DiskFileMixin(BaseDiskFileTestMixin): with mock.patch("os.listdir", mock_listdir_exp): self._create_ondisk_file(df, 'X', ext='.bar', timestamp=11) - self._create_ondisk_file(df, 'B', ext='.data', timestamp=10) - self._create_ondisk_file(df, 'A', ext='.data', timestamp=9) - if df.policy.policy_type == EC_POLICY: - self._create_ondisk_file(df, '', ext='.durable', timestamp=10) - self._create_ondisk_file(df, '', ext='.durable', timestamp=9) + self._create_ondisk_file(df, 'B', ext='.data', timestamp=10, + legacy_durable=legacy_durable) + self._create_ondisk_file(df, 'A', ext='.data', timestamp=9, + legacy_durable=legacy_durable) self._create_ondisk_file(df, '', ext='.ts', timestamp=8) self._create_ondisk_file(df, '', ext='.ts', timestamp=7) self._create_ondisk_file(df, '', ext='.meta', timestamp=6) @@ -3588,6 +4130,12 @@ class DiskFileMixin(BaseDiskFileTestMixin): df = self._simple_get_diskfile() self.assertRaises(DiskFileError, df.open) + def test_ondisk_search_loop_listdir_error(self): + self._test_ondisk_search_loop_listdir_error() + + def test_ondisk_search_loop_listdir_error_legacy_durable(self): + self._test_ondisk_search_loop_listdir_error(legacy_durable=True) + def test_exception_in_handle_close_quarantine(self): df = self._get_open_disk_file() @@ -3724,9 +4272,13 @@ class DiskFileMixin(BaseDiskFileTestMixin): with self.assertRaises(DiskFileNotOpen): df.content_type - def test_durable_timestamp(self): + def _do_test_durable_timestamp(self, legacy_durable): ts_1 = self.ts() - df = self._get_open_disk_file(ts=ts_1.internal) + df = self._simple_get_diskfile(frag_index=2) + write_diskfile(df, ts_1, legacy_durable=legacy_durable) + # get a new instance of the diskfile to ensure timestamp variable is + # set by the open() and not just the write operations + df = self._simple_get_diskfile(frag_index=2) with df.open(): self.assertEqual(df.durable_timestamp, ts_1.internal) # verify durable timestamp does not change when metadata is written @@ -3735,6 +4287,9 @@ class DiskFileMixin(BaseDiskFileTestMixin): with df.open(): self.assertEqual(df.durable_timestamp, ts_1.internal) + def test_durable_timestamp(self): + self._do_test_durable_timestamp(False) + def test_durable_timestamp_not_open(self): df = self._simple_get_diskfile() with self.assertRaises(DiskFileNotOpen): @@ -4069,41 +4624,10 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): mgr_cls = diskfile.ECDiskFileManager - def test_commit_raises_DiskFileErrors(self): - scenarios = ((errno.ENOSPC, DiskFileNoSpace), - (errno.EDQUOT, DiskFileNoSpace), - (errno.ENOTDIR, DiskFileError), - (errno.EPERM, DiskFileError)) - - # Check IOErrors from open() is handled - for err_number, expected_exception in scenarios: - io_error = IOError() - io_error.errno = err_number - mock_open = mock.MagicMock(side_effect=io_error) - df = self._simple_get_diskfile(account='a', container='c', - obj='o_%s' % err_number, - policy=POLICIES.default) - timestamp = Timestamp(time()) - with df.create() as writer: - metadata = { - 'ETag': 'bogus_etag', - 'X-Timestamp': timestamp.internal, - 'Content-Length': '0', - } - writer.put(metadata) - with mock.patch('six.moves.builtins.open', mock_open): - self.assertRaises(expected_exception, - writer.commit, - timestamp) - dl = os.listdir(df._datadir) - self.assertEqual(1, len(dl), dl) - rmtree(df._datadir) - - # Check OSError from fsync() is handled - mock_fsync = mock.MagicMock(side_effect=OSError) + def _test_commit_raises_DiskFileError_for_rename_error(self, fake_err): df = self._simple_get_diskfile(account='a', container='c', - obj='o_fsync_error') - + obj='o_rename_err', + policy=POLICIES.default) timestamp = Timestamp(time()) with df.create() as writer: metadata = { @@ -4112,50 +4636,54 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): 'Content-Length': '0', } writer.put(metadata) - with mock.patch('swift.obj.diskfile.fsync', mock_fsync): - self.assertRaises(DiskFileError, - writer.commit, timestamp) + with mock.patch('swift.obj.diskfile.os.rename', + side_effect=fake_err): + with self.assertRaises(DiskFileError) as cm: + writer.commit(timestamp) + dl = os.listdir(df._datadir) + datafile = _make_datafilename( + timestamp, POLICIES.default, frag_index=2, durable=False) + self.assertEqual([datafile], dl) + return df, cm.exception - def test_commit_fsync_dir_raises_DiskFileErrors(self): - scenarios = ((errno.ENOSPC, DiskFileNoSpace), - (errno.EDQUOT, DiskFileNoSpace), - (errno.ENOTDIR, DiskFileError), - (errno.EPERM, DiskFileError)) + def test_commit_raises_DiskFileError_for_rename_ENOSPC_IOError(self): + df, exc = self._test_commit_raises_DiskFileError_for_rename_error( + IOError(errno.ENOSPC, 'ENOSPC')) + self.assertIsInstance(exc, DiskFileNoSpace) + self.assertIn('No space left on device', str(exc)) + self.assertIn('No space left on device', + df.manager.logger.get_lines_for_level('error')[0]) + self.assertFalse(df.manager.logger.get_lines_for_level('error')[1:]) - # Check IOErrors from fsync_dir() is handled - for err_number, expected_exception in scenarios: - io_error = IOError(err_number, os.strerror(err_number)) - mock_open = mock.MagicMock(side_effect=io_error) - mock_io_error = mock.MagicMock(side_effect=io_error) - df = self._simple_get_diskfile(account='a', container='c', - obj='o_%s' % err_number, - policy=POLICIES.default) - timestamp = Timestamp(time()) - with df.create() as writer: - metadata = { - 'ETag': 'bogus_etag', - 'X-Timestamp': timestamp.internal, - 'Content-Length': '0', - } - writer.put(metadata) - with mock.patch('six.moves.builtins.open', mock_open): - self.assertRaises(expected_exception, - writer.commit, - timestamp) - with mock.patch('swift.obj.diskfile.fsync_dir', mock_io_error): - self.assertRaises(expected_exception, - writer.commit, - timestamp) - dl = os.listdir(df._datadir) - self.assertEqual(2, len(dl), dl) - rmtree(df._datadir) + def test_commit_raises_DiskFileError_for_rename_EDQUOT_IOError(self): + df, exc = self._test_commit_raises_DiskFileError_for_rename_error( + IOError(errno.EDQUOT, 'EDQUOT')) + self.assertIsInstance(exc, DiskFileNoSpace) + self.assertIn('No space left on device', str(exc)) + self.assertIn('No space left on device', + df.manager.logger.get_lines_for_level('error')[0]) + self.assertFalse(df.manager.logger.get_lines_for_level('error')[1:]) - # Check OSError from fsync_dir() is handled - mock_os_error = mock.MagicMock( - side_effect=OSError(100, 'Some Error')) + def test_commit_raises_DiskFileError_for_rename_other_IOError(self): + df, exc = self._test_commit_raises_DiskFileError_for_rename_error( + IOError(21, 'Some other IO Error')) + self.assertIn('Problem making data file durable', str(exc)) + self.assertIn('Problem making data file durable', + df.manager.logger.get_lines_for_level('error')[0]) + self.assertFalse(df.manager.logger.get_lines_for_level('error')[1:]) + + def test_commit_raises_DiskFileError_for_rename_OSError(self): + df, exc = self._test_commit_raises_DiskFileError_for_rename_error( + OSError(100, 'Some Error')) + self.assertIn('Problem making data file durable', str(exc)) + self.assertIn('Problem making data file durable', + df.manager.logger.get_lines_for_level('error')[0]) + self.assertFalse(df.manager.logger.get_lines_for_level('error')[1:]) + + def _test_commit_raises_DiskFileError_for_fsync_dir_errors(self, fake_err): df = self._simple_get_diskfile(account='a', container='c', - obj='o_fsync_dir_error') - + obj='o_fsync_dir_err', + policy=POLICIES.default) timestamp = Timestamp(time()) with df.create() as writer: metadata = { @@ -4164,16 +4692,34 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): 'Content-Length': '0', } writer.put(metadata) - with mock.patch('swift.obj.diskfile.fsync_dir', mock_os_error): - self.assertRaises(DiskFileError, - writer.commit, timestamp) + with mock.patch('swift.obj.diskfile.fsync_dir', + side_effect=fake_err): + with self.assertRaises(DiskFileError) as cm: + writer.commit(timestamp) + dl = os.listdir(df._datadir) + datafile = _make_datafilename( + timestamp, POLICIES.default, frag_index=2, durable=True) + self.assertEqual([datafile], dl) + self.assertIn('Problem making data file durable', str(cm.exception)) + self.assertIn('Problem making data file durable', + df.manager.logger.get_lines_for_level('error')[0]) + self.assertFalse(df.manager.logger.get_lines_for_level('error')[1:]) + + def test_commit_raises_DiskFileError_for_fsync_dir_IOError(self): + self._test_commit_raises_DiskFileError_for_fsync_dir_errors( + IOError(21, 'Some IO Error')) + + def test_commit_raises_DiskFileError_for_fsync_dir_OSError(self): + self._test_commit_raises_DiskFileError_for_fsync_dir_errors( + OSError(100, 'Some Error')) def test_data_file_has_frag_index(self): policy = POLICIES.default for good_value in (0, '0', 2, '2', 14, '14'): # frag_index set by constructor arg - ts = self.ts().internal - expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + ts = self.ts() + expected = [_make_datafilename( + ts, policy, good_value, durable=True)] df = self._get_open_disk_file(ts=ts, policy=policy, frag_index=good_value) self.assertEqual(expected, sorted(os.listdir(df._datadir))) @@ -4182,8 +4728,9 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(int(good_value), int(actual)) # metadata value overrides the constructor arg - ts = self.ts().internal - expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + ts = self.ts() + expected = [_make_datafilename( + ts, policy, good_value, durable=True)] meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value} df = self._get_open_disk_file(ts=ts, policy=policy, frag_index='99', @@ -4193,8 +4740,9 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(int(good_value), int(actual)) # metadata value alone is sufficient - ts = self.ts().internal - expected = ['%s#%s.data' % (ts, good_value), '%s.durable' % ts] + ts = self.ts() + expected = [_make_datafilename( + ts, policy, good_value, durable=True)] meta = {'X-Object-Sysmeta-Ec-Frag-Index': good_value} df = self._get_open_disk_file(ts=ts, policy=policy, frag_index=None, @@ -4209,8 +4757,9 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): policy = POLICIES.default orig_frag_index = 14 # frag_index set by constructor arg - ts = self.ts().internal - expected = ['%s#%s.data' % (ts, orig_frag_index), '%s.durable' % ts] + ts = self.ts() + expected = [_make_datafilename( + ts, policy, frag_index=orig_frag_index, durable=True)] df = self._get_open_disk_file(ts=ts, policy=policy, obj_name='my_obj', frag_index=orig_frag_index) self.assertEqual(expected, sorted(os.listdir(df._datadir))) @@ -4227,12 +4776,12 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(int(orig_frag_index), int(actual)) # write metadata to a meta file - ts = self.ts().internal - metadata = {'X-Timestamp': ts, + ts = self.ts() + metadata = {'X-Timestamp': ts.internal, 'X-Object-Meta-Fruit': 'kiwi'} df.write_metadata(metadata) # sanity check we did write a meta file - expected.append('%s.meta' % ts) + expected.append('%s.meta' % ts.internal) actual_files = sorted(os.listdir(df._datadir)) self.assertEqual(expected, actual_files) @@ -4249,8 +4798,8 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(int(orig_frag_index), int(actual)) # attempt to overwrite frag index sysmeta - ts = self.ts().internal - metadata = {'X-Timestamp': ts, + ts = self.ts() + metadata = {'X-Timestamp': ts.internal, 'X-Object-Sysmeta-Ec-Frag-Index': 99, 'X-Object-Meta-Fruit': 'apple'} df.write_metadata(metadata) @@ -4314,50 +4863,43 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): ts = self.ts() for frag_index in (1, 2): df = self._simple_get_diskfile(frag_index=frag_index) - with df.create() as writer: - data = 'test data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - } - writer.put(metadata) - writer.commit(ts) + write_diskfile(df, ts) # sanity self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#1.data', - ts.internal + '#2.data', - ts.internal + '.durable', + ts.internal + '#1#d.data', + ts.internal + '#2#d.data', ]) df.purge(ts, 2) - self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#1.data', - ts.internal + '.durable', + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#1#d.data', ]) def test_purge_last_fragment_index(self): ts = self.ts() frag_index = 0 df = self._simple_get_diskfile(frag_index=frag_index) - with df.create() as writer: - data = 'test data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - } - writer.put(metadata) - writer.commit(ts) + write_diskfile(df, ts) + # sanity + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#0#d.data', + ]) + df.purge(ts, frag_index) + self.assertFalse(os.listdir(df._datadir)) + def test_purge_last_fragment_index_legacy_durable(self): + # a legacy durable file doesn't get purged in case another fragment is + # relying on it for durability + ts = self.ts() + frag_index = 0 + df = self._simple_get_diskfile(frag_index=frag_index) + write_diskfile(df, ts, legacy_durable=True) # sanity self.assertEqual(sorted(os.listdir(df._datadir)), [ ts.internal + '#0.data', ts.internal + '.durable', ]) - df.purge(ts, 0) + df.purge(ts, frag_index) self.assertEqual(sorted(os.listdir(df._datadir)), [ ts.internal + '.durable', ]) @@ -4366,27 +4908,16 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): ts = self.ts() frag_index = 7 df = self._simple_get_diskfile(frag_index=frag_index) - with df.create() as writer: - data = 'test data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - } - writer.put(metadata) - writer.commit(ts) + write_diskfile(df, ts) # sanity - self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#7.data', - ts.internal + '.durable', + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#7#d.data', ]) df.purge(ts, 3) # no effect - self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#7.data', - ts.internal + '.durable', + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#7#d.data', ]) def test_purge_old_timestamp_frag_index(self): @@ -4394,27 +4925,16 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): ts = self.ts() frag_index = 1 df = self._simple_get_diskfile(frag_index=frag_index) - with df.create() as writer: - data = 'test data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - } - writer.put(metadata) - writer.commit(ts) + write_diskfile(df, ts) # sanity - self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#1.data', - ts.internal + '.durable', + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#1#d.data', ]) df.purge(old_ts, 1) # no effect - self.assertEqual(sorted(os.listdir(df._datadir)), [ - ts.internal + '#1.data', - ts.internal + '.durable', + self.assertEqual(os.listdir(df._datadir), [ + ts.internal + '#1#d.data', ]) def test_purge_tombstone(self): @@ -4469,7 +4989,7 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): # no effect self.assertEqual(sorted(os.listdir(df._datadir)), []) - def test_open_most_recent_durable(self): + def _do_test_open_most_recent_durable(self, legacy_durable): policy = POLICIES.default df_mgr = self.df_router[policy] @@ -4477,17 +4997,14 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): 'a', 'c', 'o', policy=policy) ts = self.ts() - with df.create() as writer: - data = 'test data' - writer.write(data) - metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 3, - } - writer.put(metadata) - writer.commit(ts) + write_diskfile(df, ts, frag_index=3, + legacy_durable=legacy_durable) + metadata = { + 'ETag': md5('test data').hexdigest(), + 'X-Timestamp': ts.internal, + 'Content-Length': str(len('test data')), + 'X-Object-Sysmeta-Ec-Frag-Index': '3', + } # add some .meta stuff extra_meta = { @@ -4508,23 +5025,21 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) ts = self.ts() - with df.create() as writer: - data = 'test data' - writer.write(data) - new_metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 3, - } - writer.put(new_metadata) - # N.B. don't make it durable + write_diskfile(df, ts, frag_index=3, commit=False, + legacy_durable=legacy_durable) + # N.B. don't make it durable # and we still get the old metadata (same as if no .data!) df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) self.assertEqual(metadata, df.read_metadata()) + def test_open_most_recent_durable(self): + self._do_test_open_most_recent_durable(False) + + def test_open_most_recent_durable_legacy(self): + self._do_test_open_most_recent_durable(True) + def test_open_most_recent_missing_durable(self): policy = POLICIES.default df_mgr = self.df_router[policy] @@ -4536,18 +5051,7 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): # now create a datafile missing durable ts = self.ts() - with df.create() as writer: - data = 'test data' - writer.write(data) - new_metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 3, - } - writer.put(new_metadata) - # N.B. don't make it durable - + write_diskfile(df, ts, frag_index=3, commit=False) # add some .meta stuff extra_meta = { 'X-Object-Meta-Foo': 'Bar', @@ -4563,7 +5067,7 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): frag_index=3) self.assertRaises(DiskFileNotExist, df.read_metadata) - # sanity, withtout the frag_index kwarg + # sanity, without the frag_index kwarg df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) self.assertRaises(DiskFileNotExist, df.read_metadata) @@ -4583,16 +5087,27 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): except DiskFileNotExist: pass - # sanity check: should have 2* .data, .durable, .data + # sanity check: should have 3* .data files = os.listdir(df._datadir) - self.assertEqual(4, len(files)) + self.assertEqual(3, len(files)) with df.open(): self.assertEqual(df.fragments, {ts_1: [0, 2], ts_2: [3]}) - # verify frags available even if open fails e.g. if .durable missing - for f in filter(lambda f: f.endswith('.durable'), files): - os.remove(os.path.join(df._datadir, f)) + def test_fragments_available_when_not_durable(self): + # verify frags available even if open fails e.g. if none are durable + ts_1 = self.ts() + ts_2 = self.ts() + for ts, fi in ((ts_1, 0), (ts_1, 2), (ts_2, 3)): + try: + df = self._get_open_disk_file( + ts=ts, frag_index=fi, commit=False) + except DiskFileNotExist: + pass + df = self._simple_get_diskfile() + # sanity check: should have 3* .data + files = os.listdir(df._datadir) + self.assertEqual(3, len(files)) self.assertRaises(DiskFileNotExist, df.open) self.assertEqual(df.fragments, {ts_1: [0, 2], ts_2: [3]}) @@ -4600,7 +5115,7 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): df = self._simple_get_diskfile() self.assertIsNone(df.fragments) - def test_durable_timestamp_no_durable_file(self): + def test_durable_timestamp_when_not_durable(self): try: self._get_open_disk_file(self.ts().internal, commit=False) except DiskFileNotExist: @@ -4629,12 +5144,15 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): except DiskFileNotExist: pass df = self._simple_get_diskfile() - # sanity check - one .durable file, two .data files - self.assertEqual(3, len(os.listdir(df._datadir))) + # sanity check - two .data files + self.assertEqual(2, len(os.listdir(df._datadir))) df.open() self.assertEqual(ts1, df.durable_timestamp) - def test_open_with_fragment_preferences(self): + def test_durable_timestamp_legacy_durable(self): + self._do_test_durable_timestamp(True) + + def _test_open_with_fragment_preferences(self, legacy_durable=False): policy = POLICIES.default df_mgr = self.df_router[policy] @@ -4644,33 +5162,16 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): ts_1, ts_2, ts_3, ts_4 = (self.ts() for _ in range(4)) # create two durable frags, first with index 0 - with df.create() as writer: - data = 'test data' - writer.write(data) - frag_0_metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts_1.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 0, - } - writer.put(frag_0_metadata) - writer.commit(ts_1) + frag_0_metadata = write_diskfile(df, ts_1, frag_index=0, + legacy_durable=legacy_durable) # second with index 3 - with df.create() as writer: - data = 'test data' - writer.write(data) - frag_3_metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts_1.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 3, - } - writer.put(frag_3_metadata) - writer.commit(ts_1) + frag_3_metadata = write_diskfile(df, ts_1, frag_index=3, + legacy_durable=legacy_durable) - # sanity check: should have 2 * .data plus a .durable - self.assertEqual(3, len(os.listdir(df._datadir))) + # sanity check: should have 2 * .data plus possibly a .durable + self.assertEqual(3 if legacy_durable else 2, + len(os.listdir(df._datadir))) # add some .meta stuff meta_1_metadata = { @@ -4680,8 +5181,9 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) df.write_metadata(meta_1_metadata) - # sanity check: should have 2 * .data, .durable, .meta - self.assertEqual(4, len(os.listdir(df._datadir))) + # sanity check: should have 2 * .data, possibly .durable, .meta + self.assertEqual(4 if legacy_durable else 3, + len(os.listdir(df._datadir))) # sanity: should get frag index 3 df = df_mgr.get_diskfile(self.existing_device, '0', @@ -4691,21 +5193,15 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(expected, df.read_metadata()) # add a newer datafile for frag index 2 + # N.B. don't make it durable - skip call to commit() df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) - with df.create() as writer: - data = 'new test data' - writer.write(data) - frag_2_metadata = { - 'ETag': md5(data).hexdigest(), - 'X-Timestamp': ts_3.internal, - 'Content-Length': len(data), - 'X-Object-Sysmeta-Ec-Frag-Index': 2, - } - writer.put(frag_2_metadata) - # N.B. don't make it durable - skip call to commit() - # sanity check: should have 2* .data, .durable, .meta, .data - self.assertEqual(5, len(os.listdir(df._datadir))) + frag_2_metadata = write_diskfile(df, ts_3, frag_index=2, commit=False, + data='new test data', + legacy_durable=legacy_durable) + # sanity check: should have 2* .data, possibly .durable, .meta, .data + self.assertEqual(5 if legacy_durable else 4, + len(os.listdir(df._datadir))) # sanity check: with no frag preferences we get old metadata df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', @@ -4740,8 +5236,9 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): df = df_mgr.get_diskfile(self.existing_device, '0', 'a', 'c', 'o', policy=policy) df.write_metadata(meta_2_metadata) - # sanity check: should have 2 * .data, .durable, .data, .meta - self.assertEqual(5, len(os.listdir(df._datadir))) + # sanity check: should have 2 * .data, possibly .durable, .data, .meta + self.assertEqual(5 if legacy_durable else 4, + len(os.listdir(df._datadir))) # sanity check: with no frag preferences we get newer metadata applied # to durable data file @@ -4838,6 +5335,12 @@ class TestECDiskFile(DiskFileMixin, unittest.TestCase): self.assertEqual(ts_1.internal, df.durable_timestamp) self.assertEqual({ts_1: [0, 3], ts_3: [2]}, df.fragments) + def test_open_with_fragment_preferences_legacy_durable(self): + self._test_open_with_fragment_preferences(legacy_durable=True) + + def test_open_with_fragment_preferences(self): + self._test_open_with_fragment_preferences(legacy_durable=False) + def test_open_with_bad_fragment_preferences(self): policy = POLICIES.default df_mgr = self.df_router[policy] @@ -4933,31 +5436,19 @@ class TestSuffixHashes(unittest.TestCase): self.policy = policy yield policy - def assertEqual(self, *args): + @contextmanager + def policy_in_message(self): try: - unittest.TestCase.assertEqual(self, *args) + yield except AssertionError as err: if not self.policy: raise policy_trailer = '\n\n... for policy %r' % self.policy raise AssertionError(str(err) + policy_trailer) - def _datafilename(self, timestamp, policy, frag_index=None): - if frag_index is None: - frag_index = randint(0, 9) - filename = timestamp.internal - if policy.policy_type == EC_POLICY: - filename += '#%d' % frag_index - filename += '.data' - return filename - - def _metafilename(self, meta_timestamp, ctype_timestamp=None): - filename = meta_timestamp.internal - if ctype_timestamp is not None: - delta = meta_timestamp.raw - ctype_timestamp.raw - filename = '%s-%x' % (filename, delta) - filename += '.meta' - return filename + def assertEqual(self, *args): + with self.policy_in_message(): + unittest.TestCase.assertEqual(self, *args) def check_cleanup_ondisk_files(self, policy, input_files, output_files): orig_unlink = os.unlink @@ -4992,7 +5483,7 @@ class TestSuffixHashes(unittest.TestCase): def test_cleanup_ondisk_files_purge_data_newer_ts(self): for policy in self.iter_policies(): # purge .data if there's a newer .ts - file1 = self._datafilename(self.ts(), policy) + file1 = _make_datafilename(self.ts(), policy) file2 = self.ts().internal + '.ts' file_list = [file1, file2] self.check_cleanup_ondisk_files(policy, file_list, [file2]) @@ -5003,7 +5494,7 @@ class TestSuffixHashes(unittest.TestCase): file1 = self.ts().internal + '.ts' file2 = self.ts().internal + '.ts' timestamp = self.ts() - file3 = self._datafilename(timestamp, policy) + file3 = _make_datafilename(timestamp, policy, durable=False) file_list = [file1, file2, file3] expected = { # no durable datafile means you can't get rid of the @@ -5013,27 +5504,36 @@ class TestSuffixHashes(unittest.TestCase): }[policy.policy_type] self.check_cleanup_ondisk_files(policy, file_list, expected) + def _do_test_cleanup_ondisk_files_purge_ts_newer_data( + self, policy, legacy_durable=False): + # purge .ts if there's a newer .data + file1 = self.ts().internal + '.ts' + timestamp = self.ts() + file2 = _make_datafilename( + timestamp, policy, durable=not legacy_durable) + file_list = [file1, file2] + expected = [file2] + if policy.policy_type == EC_POLICY and legacy_durable: + durable_file = timestamp.internal + '.durable' + file_list.append(durable_file) + expected.insert(0, durable_file) + self.check_cleanup_ondisk_files(policy, file_list, expected) + def test_cleanup_ondisk_files_purge_ts_newer_data(self): for policy in self.iter_policies(): - # purge .ts if there's a newer .data - file1 = self.ts().internal + '.ts' - timestamp = self.ts() - file2 = self._datafilename(timestamp, policy) - file_list = [file1, file2] + self._do_test_cleanup_ondisk_files_purge_ts_newer_data(policy) + + def test_cleanup_ondisk_files_purge_ts_newer_data_and_legacy_durable(self): + for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: - durable_file = timestamp.internal + '.durable' - file_list.append(durable_file) - expected = { - EC_POLICY: [durable_file, file2], - REPL_POLICY: [file2], - }[policy.policy_type] - self.check_cleanup_ondisk_files(policy, file_list, expected) + self._do_test_cleanup_ondisk_files_purge_ts_newer_data( + policy, legacy_durable=True) def test_cleanup_ondisk_files_purge_older_ts(self): for policy in self.iter_policies(): file1 = self.ts().internal + '.ts' file2 = self.ts().internal + '.ts' - file3 = self._datafilename(self.ts(), policy) + file3 = _make_datafilename(self.ts(), policy, durable=False) file4 = self.ts().internal + '.meta' expected = { # no durable means we can only throw out things before @@ -5045,24 +5545,34 @@ class TestSuffixHashes(unittest.TestCase): file_list = [file1, file2, file3, file4] self.check_cleanup_ondisk_files(policy, file_list, expected) + def _do_test_cleanup_ondisk_files_keep_meta_data_purge_ts( + self, policy, legacy_durable=False): + file1 = self.ts().internal + '.ts' + file2 = self.ts().internal + '.ts' + timestamp = self.ts() + file3 = _make_datafilename( + timestamp, policy, durable=not legacy_durable) + file_list = [file1, file2, file3] + expected = [file3] + if policy.policy_type == EC_POLICY and legacy_durable: + durable_filename = timestamp.internal + '.durable' + file_list.append(durable_filename) + expected.insert(0, durable_filename) + file4 = self.ts().internal + '.meta' + file_list.append(file4) + expected.insert(0, file4) + # keep .meta and .data if meta newer than data and purge .ts + self.check_cleanup_ondisk_files(policy, file_list, expected) + def test_cleanup_ondisk_files_keep_meta_data_purge_ts(self): for policy in self.iter_policies(): - file1 = self.ts().internal + '.ts' - file2 = self.ts().internal + '.ts' - timestamp = self.ts() - file3 = self._datafilename(timestamp, policy) - file_list = [file1, file2, file3] + self._do_test_cleanup_ondisk_files_keep_meta_data_purge_ts(policy) + + def test_cleanup_ondisk_files_keep_meta_data_purge_ts_legacy_durable(self): + for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: - durable_filename = timestamp.internal + '.durable' - file_list.append(durable_filename) - file4 = self.ts().internal + '.meta' - file_list.append(file4) - # keep .meta and .data if meta newer than data and purge .ts - expected = { - EC_POLICY: [file4, durable_filename, file3], - REPL_POLICY: [file4, file3], - }[policy.policy_type] - self.check_cleanup_ondisk_files(policy, file_list, expected) + self._do_test_cleanup_ondisk_files_keep_meta_data_purge_ts( + policy, legacy_durable=True) def test_cleanup_ondisk_files_keep_one_ts(self): for policy in self.iter_policies(): @@ -5074,9 +5584,9 @@ class TestSuffixHashes(unittest.TestCase): def test_cleanup_ondisk_files_multi_data_file(self): for policy in self.iter_policies(): - file1 = self._datafilename(self.ts(), policy, 1) - file2 = self._datafilename(self.ts(), policy, 2) - file3 = self._datafilename(self.ts(), policy, 3) + file1 = _make_datafilename(self.ts(), policy, 1, durable=False) + file2 = _make_datafilename(self.ts(), policy, 2, durable=False) + file3 = _make_datafilename(self.ts(), policy, 3, durable=False) expected = { # keep all non-durable datafiles EC_POLICY: [file3, file2, file1], @@ -5086,40 +5596,56 @@ class TestSuffixHashes(unittest.TestCase): file_list = [file1, file2, file3] self.check_cleanup_ondisk_files(policy, file_list, expected) + def _do_test_cleanup_ondisk_files_keeps_one_datafile(self, policy, + legacy_durable=False): + timestamps = [self.ts() for i in range(3)] + file1 = _make_datafilename(timestamps[0], policy, 1, + durable=not legacy_durable) + file2 = _make_datafilename(timestamps[1], policy, 2, + durable=not legacy_durable) + file3 = _make_datafilename(timestamps[2], policy, 3, + durable=not legacy_durable) + file_list = [file1, file2, file3] + expected = [file3] + if policy.policy_type == EC_POLICY and legacy_durable: + for t in timestamps: + file_list.append(t.internal + '.durable') + expected.insert(0, file_list[-1]) + self.check_cleanup_ondisk_files(policy, file_list, expected) + def test_cleanup_ondisk_files_keeps_one_datafile(self): for policy in self.iter_policies(): - timestamps = [self.ts() for i in range(3)] - file1 = self._datafilename(timestamps[0], policy, 1) - file2 = self._datafilename(timestamps[1], policy, 2) - file3 = self._datafilename(timestamps[2], policy, 3) - file_list = [file1, file2, file3] + self._do_test_cleanup_ondisk_files_keeps_one_datafile(policy) + + def test_cleanup_ondisk_files_keeps_one_datafile_and_legacy_durable(self): + for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: - for t in timestamps: - file_list.append(t.internal + '.durable') - latest_durable = file_list[-1] - expected = { - # keep latest durable and datafile - EC_POLICY: [latest_durable, file3], - # keep only latest of multiple .data files - REPL_POLICY: [file3] - }[policy.policy_type] - self.check_cleanup_ondisk_files(policy, file_list, expected) + self._do_test_cleanup_ondisk_files_keeps_one_datafile( + policy, legacy_durable=True) + + def _do_test_cleanup_ondisk_files_keep_one_meta(self, policy, + legacy_durable=False): + # keep only latest of multiple .meta files + t_data = self.ts() + file1 = _make_datafilename(t_data, policy, durable=not legacy_durable) + file2, file3 = [self.ts().internal + '.meta' for i in range(2)] + file_list = [file1, file2, file3] + expected = [file3, file1] + if policy.policy_type == EC_POLICY and legacy_durable: + durable_file = t_data.internal + '.durable' + file_list.append(durable_file) + expected.insert(1, durable_file) + self.check_cleanup_ondisk_files(policy, file_list, expected) def test_cleanup_ondisk_files_keep_one_meta(self): for policy in self.iter_policies(): - # keep only latest of multiple .meta files - t_data = self.ts() - file1 = self._datafilename(t_data, policy) - file2, file3 = [self.ts().internal + '.meta' for i in range(2)] - file_list = [file1, file2, file3] + self._do_test_cleanup_ondisk_files_keep_one_meta(policy) + + def test_cleanup_ondisk_files_keep_one_meta_legacy_durable(self): + for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: - durable_file = t_data.internal + '.durable' - file_list.append(durable_file) - expected = { - EC_POLICY: [file3, durable_file, file1], - REPL_POLICY: [file3, file1] - }[policy.policy_type] - self.check_cleanup_ondisk_files(policy, file_list, expected) + self._do_test_cleanup_ondisk_files_keep_one_meta( + policy, legacy_durable=True) def test_cleanup_ondisk_files_only_meta(self): for policy in self.iter_policies(): @@ -5139,7 +5665,7 @@ class TestSuffixHashes(unittest.TestCase): def test_cleanup_ondisk_files_purge_old_data_only(self): for policy in self.iter_policies(): # Oldest .data will be purge, .meta and .ts won't be touched - file1 = self._datafilename(self.ts(), policy) + file1 = _make_datafilename(self.ts(), policy) file2 = self.ts().internal + '.ts' file3 = self.ts().internal + '.meta' file_list = [file1, file2, file3] @@ -5165,18 +5691,24 @@ class TestSuffixHashes(unittest.TestCase): def test_cleanup_ondisk_files_keep_single_old_data(self): for policy in self.iter_policies(): old_float = time() - (diskfile.ONE_WEEK + 1) - file1 = self._datafilename(Timestamp(old_float), policy) + file1 = _make_datafilename( + Timestamp(old_float), policy, durable=True) file_list = [file1] + self.check_cleanup_ondisk_files(policy, file_list, file_list) + + def test_cleanup_ondisk_drops_old_non_durable_data(self): + for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: - # for EC an isolated old .data file is removed, its useless - # without a .durable + old_float = time() - (diskfile.ONE_WEEK + 1) + file1 = _make_datafilename( + Timestamp(old_float), policy, durable=False) + file_list = [file1] + # for EC an isolated old non-durable .data file is removed expected = [] - else: - # A single old .data file will not be removed - expected = file_list - self.check_cleanup_ondisk_files(policy, file_list, expected) + self.check_cleanup_ondisk_files(policy, file_list, expected) def test_cleanup_ondisk_files_drops_isolated_durable(self): + # check behaviour for legacy durable files for policy in self.iter_policies(): if policy.policy_type == EC_POLICY: file1 = Timestamp(time()).internal + '.durable' @@ -5222,7 +5754,7 @@ class TestSuffixHashes(unittest.TestCase): for policy in self.iter_policies(): # Timestamp 1 makes the check routine pretend the file # disappeared after listdir before unlink. - file1 = self._datafilename(Timestamp(1), policy) + file1 = _make_datafilename(Timestamp(1), policy) file2 = '0000000002.00000.ts' file_list = [file1, file2] self.check_cleanup_ondisk_files(policy, file_list, []) @@ -5532,6 +6064,7 @@ class TestSuffixHashes(unittest.TestCase): 'Content-Length': len(test_data), } writer.put(metadata) + # note - no commit so data is non-durable hashes = df_mgr.get_hashes('sda1', '0', [], policy) datafile_hash = md5({ EC_POLICY: timestamp.internal, @@ -5540,7 +6073,7 @@ class TestSuffixHashes(unittest.TestCase): expected = { REPL_POLICY: {suffix: datafile_hash}, EC_POLICY: {suffix: { - # because there's no .durable file, we have no hash for + # because there's no durable state, we have no hash for # the None key - only the frag index for the data file 7: datafile_hash}}, }[policy.policy_type] @@ -5579,68 +6112,81 @@ class TestSuffixHashes(unittest.TestCase): found_files = os.listdir(df._datadir) self.assertEqual(found_files, [filename]) - def test_hash_suffix_multi_file_ends_in_datafile(self): - for policy in self.iter_policies(): - df_mgr = self.df_router[policy] - df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy, - frag_index=4) - suffix = os.path.basename(os.path.dirname(df._datadir)) - mkdirs(df._datadir) - now = time() - timestamp = None - # go behind the scenes and setup a bunch of weird file names - for tdiff in [500, 100, 10, 1]: - suffs = ['.meta', '.data'] - if tdiff > 50: - suffs.append('.ts') - if policy.policy_type == EC_POLICY: - suffs.append('.durable') - for suff in suffs: - timestamp = Timestamp(now - tdiff) - filename = timestamp.internal - if policy.policy_type == EC_POLICY and suff == '.data': - filename += '#%s' % df._frag_index - filename += suff - open(os.path.join(df._datadir, filename), 'w').close() - meta_timestamp = Timestamp(now) - metadata_filename = meta_timestamp.internal + '.meta' - open(os.path.join(df._datadir, metadata_filename), 'w').close() + def _do_hash_suffix_multi_file_ends_in_datafile(self, policy, + legacy_durable): + # if legacy_durable is True then synthesize legacy durable files + # instead of having a durable marker in the data file name + frag_index = 4 + df_mgr = self.df_router[policy] + df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy, + frag_index=frag_index) + suffix = os.path.basename(os.path.dirname(df._datadir)) + mkdirs(df._datadir) + now = time() + timestamp = None + # go behind the scenes and setup a bunch of weird file names + for tdiff in [500, 100, 10, 1]: + suffs = ['.meta', '.data'] + if tdiff > 50: + suffs.append('.ts') + if policy.policy_type == EC_POLICY and legacy_durable: + suffs.append('.durable') + for suff in suffs: + timestamp = Timestamp(now - tdiff) + if suff == '.data': + filename = _make_datafilename( + timestamp, policy, frag_index, + durable=not legacy_durable) + else: + filename = timestamp.internal + suff + open(os.path.join(df._datadir, filename), 'w').close() + meta_timestamp = Timestamp(now) + metadata_filename = meta_timestamp.internal + '.meta' + open(os.path.join(df._datadir, metadata_filename), 'w').close() - # call get_hashes and it should clean things up - hashes = df_mgr.get_hashes('sda1', '0', [], policy) + # call get_hashes and it should clean up all but the most recent files + hashes = df_mgr.get_hashes('sda1', '0', [], policy) - data_filename = timestamp.internal - if policy.policy_type == EC_POLICY: - data_filename += '#%s' % df._frag_index - data_filename += '.data' - if policy.policy_type == EC_POLICY: - durable_filename = timestamp.internal + '.durable' - hasher = md5() - hasher.update(metadata_filename) - hasher.update(durable_filename) - expected = { - suffix: { - # metadata & durable updates are hashed separately - None: hasher.hexdigest(), - 4: self.fname_to_ts_hash(data_filename), - } + # calculate expected outcome + data_filename = _make_datafilename( + timestamp, policy, frag_index, durable=not legacy_durable) + expected_files = [data_filename, metadata_filename] + if policy.policy_type == EC_POLICY: + # note: expected hashes is same with or without legacy durable file + hasher = md5() + hasher.update(metadata_filename) + hasher.update(timestamp.internal + '.durable') + expected = { + suffix: { + # metadata & durable updates are hashed separately + None: hasher.hexdigest(), + 4: self.fname_to_ts_hash(data_filename), } - expected_files = [data_filename, durable_filename, - metadata_filename] - elif policy.policy_type == REPL_POLICY: - hasher = md5() - hasher.update(metadata_filename) - hasher.update(data_filename) - expected = {suffix: hasher.hexdigest()} - expected_files = [data_filename, metadata_filename] - else: - self.fail('unknown policy type %r' % policy.policy_type) - msg = 'expected %r != %r for policy %r' % ( - expected, hashes, policy) - self.assertEqual(hashes, expected, msg) - # only the meta and data should be left - self.assertEqual(sorted(os.listdir(df._datadir)), - sorted(expected_files)) + } + if legacy_durable: + expected_files.append(timestamp.internal + '.durable') + elif policy.policy_type == REPL_POLICY: + hasher = md5() + hasher.update(metadata_filename) + hasher.update(data_filename) + expected = {suffix: hasher.hexdigest()} + else: + self.fail('unknown policy type %r' % policy.policy_type) + self.assertEqual(hashes, expected) + # only the meta and data should be left + self.assertEqual(sorted(os.listdir(df._datadir)), + sorted(expected_files)) + + def test_hash_suffix_multifile_ends_in_datafile(self): + for policy in self.iter_policies(): + self._do_hash_suffix_multi_file_ends_in_datafile( + policy, legacy_durable=False) + + def test_hash_suffix_multifile_ends_in_datafile_legacy_durable(self): + for policy in self.iter_policies(): + if policy.policy_type == EC_POLICY: + self._do_hash_suffix_multi_file_ends_in_datafile( + policy, legacy_durable=True) def _verify_get_hashes(self, filenames, ts_data, ts_meta, ts_ctype, policy): @@ -5658,6 +6204,8 @@ class TestSuffixHashes(unittest.TestCase): df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o', policy=policy, frag_index=4) suffix = os.path.basename(os.path.dirname(df._datadir)) + partition_dir = os.path.dirname(os.path.dirname(df._datadir)) + rmtree(partition_dir, ignore_errors=True) # clean dir for each test mkdirs(df._datadir) # calculate expected result @@ -5693,123 +6241,163 @@ class TestSuffixHashes(unittest.TestCase): def test_hash_suffix_with_older_content_type_in_meta(self): # single meta file having older content-type - for policy in self.iter_policies(): - ts_data, ts_ctype, ts_meta = ( - self.ts(), self.ts(), self.ts()) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_ctype, ts_meta = ( + self.ts(), self.ts(), self.ts()) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_meta, ts_ctype)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_meta, ts_ctype)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_meta, ts_ctype, policy) + self._verify_get_hashes( + filenames, ts_data, ts_meta, ts_ctype, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_same_age_content_type_in_meta(self): # single meta file having same age content-type - for policy in self.iter_policies(): - ts_data, ts_meta = (self.ts(), self.ts()) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_meta = (self.ts(), self.ts()) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_meta, ts_meta)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_meta, ts_meta)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_meta, ts_meta, policy) + self._verify_get_hashes( + filenames, ts_data, ts_meta, ts_meta, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_obsolete_content_type_in_meta(self): # After rsync replication we could have a single meta file having # content-type older than a replicated data file - for policy in self.iter_policies(): - ts_ctype, ts_data, ts_meta = (self.ts(), self.ts(), self.ts()) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_ctype, ts_data, ts_meta = (self.ts(), self.ts(), self.ts()) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_meta, ts_ctype)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_meta, ts_ctype)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_meta, None, policy) + self._verify_get_hashes( + filenames, ts_data, ts_meta, None, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_older_content_type_in_newer_meta(self): # After rsync replication we could have two meta files: newest # content-type is in newer meta file, older than newer meta file - for policy in self.iter_policies(): - ts_data, ts_older_meta, ts_ctype, ts_newer_meta = ( - self.ts() for _ in range(4)) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_ctype, ts_newer_meta = ( + self.ts() for _ in range(4)) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_older_meta), - self._metafilename(ts_newer_meta, ts_ctype)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_older_meta), + _make_metafilename(ts_newer_meta, ts_ctype)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_newer_meta, ts_ctype, policy) + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_ctype, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_same_age_content_type_in_newer_meta(self): # After rsync replication we could have two meta files: newest # content-type is in newer meta file, at same age as newer meta file - for policy in self.iter_policies(): - ts_data, ts_older_meta, ts_newer_meta = ( - self.ts() for _ in range(3)) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(3)) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_newer_meta, ts_newer_meta)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_newer_meta, ts_newer_meta)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_newer_meta, ts_newer_meta, policy) + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_newer_meta, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_older_content_type_in_older_meta(self): # After rsync replication we could have two meta files: newest # content-type is in older meta file, older than older meta file - for policy in self.iter_policies(): - ts_data, ts_ctype, ts_older_meta, ts_newer_meta = ( - self.ts() for _ in range(4)) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_ctype, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(4)) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_newer_meta), - self._metafilename(ts_older_meta, ts_ctype)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_newer_meta), + _make_metafilename(ts_older_meta, ts_ctype)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_newer_meta, ts_ctype, policy) + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_ctype, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_same_age_content_type_in_older_meta(self): # After rsync replication we could have two meta files: newest # content-type is in older meta file, at same age as older meta file - for policy in self.iter_policies(): - ts_data, ts_older_meta, ts_newer_meta = ( - self.ts() for _ in range(3)) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(3)) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_newer_meta), - self._metafilename(ts_older_meta, ts_older_meta)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_newer_meta), + _make_metafilename(ts_older_meta, ts_older_meta)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_newer_meta, ts_older_meta, policy) + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, ts_older_meta, policy) + + do_test(False) + do_test(True) def test_hash_suffix_with_obsolete_content_type_in_older_meta(self): # After rsync replication we could have two meta files: newest # content-type is in older meta file, but older than data file - for policy in self.iter_policies(): - ts_ctype, ts_data, ts_older_meta, ts_newer_meta = ( - self.ts() for _ in range(4)) + def do_test(legacy_durable): + for policy in self.iter_policies(): + ts_ctype, ts_data, ts_older_meta, ts_newer_meta = ( + self.ts() for _ in range(4)) - filenames = [self._datafilename(ts_data, policy, frag_index=4), - self._metafilename(ts_newer_meta), - self._metafilename(ts_older_meta, ts_ctype)] - if policy.policy_type == EC_POLICY: - filenames.append(ts_data.internal + '.durable') + filenames = [_make_datafilename(ts_data, policy, frag_index=4, + durable=not legacy_durable), + _make_metafilename(ts_newer_meta), + _make_metafilename(ts_older_meta, ts_ctype)] + if policy.policy_type == EC_POLICY and legacy_durable: + filenames.append(ts_data.internal + '.durable') - self._verify_get_hashes( - filenames, ts_data, ts_newer_meta, None, policy) + self._verify_get_hashes( + filenames, ts_data, ts_newer_meta, None, policy) + + do_test(False) + do_test(True) def test_hash_suffix_removes_empty_hashdir_and_suffix(self): for policy in self.iter_policies(): @@ -6167,10 +6755,8 @@ class TestSuffixHashes(unittest.TestCase): } writer.put(metadata) writer.commit(timestamp) - datafile_name = timestamp.internal - if policy.policy_type == EC_POLICY: - datafile_name += '#%d' % df._frag_index - datafile_name += '.data' + datafile_name = _make_datafilename( + timestamp, policy, frag_index=5) durable_hash = md5(timestamp.internal + '.durable').hexdigest() datafile_suffix = os.path.basename(os.path.dirname(df._datadir)) # in the *third* suffix - two datafiles for different hashes @@ -6190,15 +6776,10 @@ class TestSuffixHashes(unittest.TestCase): writer.put(metadata) writer.commit(timestamp) # we'll keep track of file names for hash calculations - filename = timestamp.internal - if policy.policy_type == EC_POLICY: - filename += '#%d' % df._frag_index - filename += '.data' - filenames = { - 'data': { - 6: filename - }, - 'durable': [timestamp.internal + '.durable'], + filename = _make_datafilename( + timestamp, policy, frag_index=6) + data_filenames = { + 6: filename } df = df_mgr.get_diskfile(self.existing_device, '0', *matching_paths[1], policy=policy, @@ -6216,17 +6797,15 @@ class TestSuffixHashes(unittest.TestCase): } writer.put(metadata) writer.commit(timestamp) - filename = timestamp.internal - if policy.policy_type == EC_POLICY: - filename += '#%d' % df._frag_index - filename += '.data' - filenames['data'][7] = filename - filenames['durable'].append(timestamp.internal + '.durable') + filename = _make_datafilename( + timestamp, policy, frag_index=7) + data_filenames[7] = filename # now make up the expected suffixes! if policy.policy_type == EC_POLICY: hasher = md5() - for filename in filenames['durable']: - hasher.update(filename) + for filename in data_filenames.values(): + # each data file updates the hasher with durable timestamp + hasher.update(filename.split('#', 1)[0] + '.durable') expected = { tombstone_suffix: { None: tombstone_hash, @@ -6237,13 +6816,13 @@ class TestSuffixHashes(unittest.TestCase): }, matching_suffix: { None: hasher.hexdigest(), - 6: self.fname_to_ts_hash(filenames['data'][6]), - 7: self.fname_to_ts_hash(filenames['data'][7]), + 6: self.fname_to_ts_hash(data_filenames[6]), + 7: self.fname_to_ts_hash(data_filenames[7]), }, } elif policy.policy_type == REPL_POLICY: hasher = md5() - for filename in filenames['data'].values(): + for filename in data_filenames.values(): hasher.update(filename) expected = { tombstone_suffix: tombstone_hash, diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py index cf8474888b..150627f758 100755 --- a/test/unit/obj/test_reconstructor.py +++ b/test/unit/obj/test_reconstructor.py @@ -41,6 +41,7 @@ from swift.obj.reconstructor import REVERT from test.unit import (patch_policies, debug_logger, mocked_http_conn, FabricatedRing, make_timestamp_iter, DEFAULT_TEST_EC_TYPE) +from test.unit.obj.common import write_diskfile @contextmanager @@ -136,6 +137,8 @@ def get_header_frag_index(self, body): ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=2, ec_nparity=1)]) class TestGlobalSetupObjectReconstructor(unittest.TestCase): + # Tests for reconstructor using real objects in test partition directories. + legacy_durable = False def setUp(self): self.testdir = tempfile.mkdtemp() @@ -174,22 +177,16 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): # most of the reconstructor test methods require that there be # real objects in place, not just part dirs, so we'll create them # all here.... - # part 0: 3C1/hash/xxx-1.data <-- job: sync_only - parnters (FI 1) - # /xxx.durable <-- included in earlier job (FI 1) - # 061/hash/xxx-1.data <-- included in earlier job (FI 1) - # /xxx.durable <-- included in earlier job (FI 1) - # /xxx-2.data <-- job: sync_revert to index 2 + # part 0: 3C1/hash/xxx#1#d.data <-- job: sync_only - partners (FI 1) + # 061/hash/xxx#1#d.data <-- included in earlier job (FI 1) + # /xxx#2#d.data <-- job: sync_revert to index 2 - # part 1: 3C1/hash/xxx-0.data <-- job: sync_only - parnters (FI 0) - # /xxx-1.data <-- job: sync_revert to index 1 - # /xxx.durable <-- included in earlier jobs (FI 0, 1) - # 061/hash/xxx-1.data <-- included in earlier job (FI 1) - # /xxx.durable <-- included in earlier job (FI 1) + # part 1: 3C1/hash/xxx#0#d.data <-- job: sync_only - partners (FI 0) + # /xxx#1#d.data <-- job: sync_revert to index 1 + # 061/hash/xxx#1#d.data <-- included in earlier job (FI 1) - # part 2: 3C1/hash/xxx-2.data <-- job: sync_revert to index 2 - # /xxx.durable <-- included in earlier job (FI 2) - # 061/hash/xxx-0.data <-- job: sync_revert to index 0 - # /xxx.durable <-- included in earlier job (FI 0) + # part 2: 3C1/hash/xxx#2#d.data <-- job: sync_revert to index 2 + # 061/hash/xxx#0#d.data <-- job: sync_revert to index 0 def _create_frag_archives(policy, obj_path, local_id, obj_set): # we'll create 2 sets of objects in different suffix dirs @@ -202,7 +199,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): # just the local return local_id else: - # onde local and all of another + # one local and all of another if obj_num == 0: return local_id else: @@ -239,7 +236,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): timestamp=utils.Timestamp(t)) for part_num in self.part_nums: - # create 3 unique objcets per part, each part + # create 3 unique objects per part, each part # will then have a unique mix of FIs for the # possible scenarios for obj_num in range(0, 3): @@ -285,18 +282,10 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): df_mgr = self.reconstructor._df_router[policy] df = df_mgr.get_diskfile('sda1', part, 'a', 'c', object_name, policy=policy) - with df.create() as writer: - timestamp = timestamp or utils.Timestamp(time.time()) - test_data = test_data or 'test data' - writer.write(test_data) - metadata = { - 'X-Timestamp': timestamp.internal, - 'Content-Length': len(test_data), - 'Etag': md5(test_data).hexdigest(), - 'X-Object-Sysmeta-Ec-Frag-Index': frag_index, - } - writer.put(metadata) - writer.commit(timestamp) + timestamp = timestamp or utils.Timestamp(time.time()) + test_data = test_data or 'test data' + write_diskfile(df, timestamp, data=test_data, frag_index=frag_index, + legacy_durable=self.legacy_durable) return df def assert_expected_jobs(self, part_num, jobs): @@ -1003,7 +992,7 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): hash_gen = self.reconstructor._df_router[policy].yield_hashes( 'sda1', '2', policy) for path, hash_, ts in hash_gen: - self.fail('found %s with %s in %s', (hash_, ts, path)) + self.fail('found %s with %s in %s' % (hash_, ts, path)) # but the partition directory and hashes pkl still exist self.assertTrue(os.access(part_path, os.F_OK)) hashes_path = os.path.join(self.objects_1, '2', diskfile.HASH_FILE) @@ -1117,6 +1106,12 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): self.assertEqual(len(found_jobs), 6) +class TestGlobalSetupObjectReconstructorLegacyDurable( + TestGlobalSetupObjectReconstructor): + # Tests for reconstructor using real objects in test partition directories. + legacy_durable = True + + @patch_policies(with_ec_default=True) class TestObjectReconstructor(unittest.TestCase): @@ -2444,10 +2439,9 @@ class TestObjectReconstructor(unittest.TestCase): ], [ (r['ip'], r['path']) for r in request_log.requests ]) - # hashpath is still there, but only the durable remains + # hashpath is still there, but all files have been purged files = os.listdir(df._datadir) - self.assertEqual(1, len(files)) - self.assertTrue(files[0].endswith('.durable')) + self.assertFalse(files) # and more to the point, the next suffix recalc will clean it up df_mgr = self.reconstructor._df_router[self.policy] diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py index 5826376f06..8bcb34c744 100755 --- a/test/unit/obj/test_server.py +++ b/test/unit/obj/test_server.py @@ -2368,7 +2368,7 @@ class TestObjectController(unittest.TestCase): timestamp = utils.Timestamp(time()).internal def put_with_index(expected_rsp, frag_index, node_index=None): - data_file_tail = '#%d.data' % frag_index + data_file_tail = '#%d#d.data' % frag_index headers = {'X-Timestamp': timestamp, 'Content-Length': '6', 'Content-Type': 'application/octet-stream', @@ -2420,7 +2420,7 @@ class TestObjectController(unittest.TestCase): # disk file put_with_index(201, 7, 6) - def test_PUT_durable_files(self): + def test_PUT_commits_data(self): for policy in POLICIES: timestamp = utils.Timestamp(int(time())).internal data_file_tail = '.data' @@ -2429,8 +2429,9 @@ class TestObjectController(unittest.TestCase): 'Content-Type': 'application/octet-stream', 'X-Backend-Storage-Policy-Index': int(policy)} if policy.policy_type == EC_POLICY: + # commit renames data file headers['X-Object-Sysmeta-Ec-Frag-Index'] = '2' - data_file_tail = '#2.data' + data_file_tail = '#2#d.data' req = Request.blank( '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, headers=headers) @@ -2446,12 +2447,6 @@ class TestObjectController(unittest.TestCase): self.assertTrue(os.path.isfile(data_file), 'Expected file %r not found in %r for policy %r' % (data_file, os.listdir(obj_dir), int(policy))) - durable_file = os.path.join(obj_dir, timestamp) + '.durable' - if policy.policy_type == EC_POLICY: - self.assertTrue(os.path.isfile(durable_file)) - self.assertFalse(os.path.getsize(durable_file)) - else: - self.assertFalse(os.path.isfile(durable_file)) rmtree(obj_dir) def test_HEAD(self): @@ -3237,7 +3232,7 @@ class TestObjectController(unittest.TestCase): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 202) - # PUT again at ts_2 but without a .durable file + # PUT again at ts_2 but without making the data file durable ts_2 = next(ts_iter) headers = {'X-Timestamp': ts_2.internal, 'Content-Length': '5', @@ -3249,8 +3244,7 @@ class TestObjectController(unittest.TestCase): environ={'REQUEST_METHOD': 'PUT'}, headers=headers) req.body = 'NEWER' - # patch the commit method to do nothing so EC object gets - # no .durable file + # patch the commit method to do nothing so EC object is non-durable with mock.patch('swift.obj.diskfile.ECDiskFileWriter.commit'): resp = req.get_response(self.object_controller) self.assertEqual(resp.status_int, 201) @@ -6853,15 +6847,17 @@ class TestObjectServer(unittest.TestCase): self.assertEqual(len(log_lines), 1) self.assertIn(' 499 ', log_lines[0]) - # verify successful object data and durable state file write + # verify successful object data file write found_files = self.find_files() - # .data file is there + # non durable .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] self.assertEqual("%s#2.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # but .durable isn't - self.assertEqual(found_files['.durable'], []) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And no container update self.assertFalse(_container_update.called) @@ -6891,15 +6887,17 @@ class TestObjectServer(unittest.TestCase): self.assertEqual(len(log_lines), 1) self.assertIn(' 499 ', log_lines[0]) - # verify successful object data and durable state file write + # verify successful object data file write found_files = self.find_files() - # .data file is there + # non durable .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] self.assertEqual("%s#2.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # but .durable isn't - self.assertEqual(found_files['.durable'], []) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And no container update self.assertFalse(_container_update.called) @@ -6948,7 +6946,7 @@ class TestObjectServer(unittest.TestCase): resp.read() resp.close() - # verify successful object data and durable state file write + # verify successful object data file write put_timestamp = context['put_timestamp'] found_files = self.find_files() # .data file is there @@ -6956,8 +6954,10 @@ class TestObjectServer(unittest.TestCase): obj_datafile = found_files['.data'][0] self.assertEqual("%s.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # replicated objects do not have a .durable file - self.assertEqual(found_files['.durable'], []) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And container update was called self.assertTrue(context['mock_container_update'].called) @@ -6991,13 +6991,12 @@ class TestObjectServer(unittest.TestCase): # .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] - self.assertEqual("%s#2.data" % put_timestamp.internal, + self.assertEqual("%s#2#d.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # .durable file is there - self.assertEqual(len(found_files['.durable']), 1) - durable_file = found_files['.durable'][0] - self.assertEqual("%s.durable" % put_timestamp.internal, - os.path.basename(durable_file)) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And container update was called self.assertTrue(context['mock_container_update'].called) @@ -7047,8 +7046,7 @@ class TestObjectServer(unittest.TestCase): # no artifacts left on disk found_files = self.find_files() - self.assertEqual(len(found_files['.data']), 0) - self.assertEqual(len(found_files['.durable']), 0) + self.assertFalse(found_files) # ... and no container update _container_update = context['mock_container_update'] self.assertFalse(_container_update.called) @@ -7112,13 +7110,12 @@ class TestObjectServer(unittest.TestCase): # .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] - self.assertEqual("%s#2.data" % put_timestamp.internal, + self.assertEqual("%s#2#d.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # .durable file is there - self.assertEqual(len(found_files['.durable']), 1) - durable_file = found_files['.durable'][0] - self.assertEqual("%s.durable" % put_timestamp.internal, - os.path.basename(durable_file)) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And container update was called self.assertTrue(context['mock_container_update'].called) @@ -7139,15 +7136,17 @@ class TestObjectServer(unittest.TestCase): resp.close() put_timestamp = context['put_timestamp'] _container_update = context['mock_container_update'] - # verify that durable file was NOT created + # verify that durable data file was NOT created found_files = self.find_files() - # .data file is there + # non durable .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] self.assertEqual("%s#2.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # but .durable isn't - self.assertEqual(found_files['.durable'], []) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And no container update self.assertFalse(_container_update.called) @@ -7196,13 +7195,12 @@ class TestObjectServer(unittest.TestCase): # .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] - self.assertEqual("%s#2.data" % put_timestamp.internal, + self.assertEqual("%s#2#d.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # .durable file is there - self.assertEqual(len(found_files['.durable']), 1) - durable_file = found_files['.durable'][0] - self.assertEqual("%s.durable" % put_timestamp.internal, - os.path.basename(durable_file)) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # And container update was called self.assertTrue(context['mock_container_update'].called) @@ -7246,13 +7244,12 @@ class TestObjectServer(unittest.TestCase): # .data file is there self.assertEqual(len(found_files['.data']), 1) obj_datafile = found_files['.data'][0] - self.assertEqual("%s#2.data" % put_timestamp.internal, + self.assertEqual("%s#2#d.data" % put_timestamp.internal, os.path.basename(obj_datafile)) - # ... and .durable is there - self.assertEqual(len(found_files['.durable']), 1) - durable_file = found_files['.durable'][0] - self.assertEqual("%s.durable" % put_timestamp.internal, - os.path.basename(durable_file)) + # but no other files + self.assertFalse(found_files['.data'][1:]) + found_files.pop('.data') + self.assertFalse(found_files) # but no container update self.assertFalse(context['mock_container_update'].called) diff --git a/test/unit/obj/test_ssync.py b/test/unit/obj/test_ssync.py index 21c09b59f5..9ecc4ca333 100644 --- a/test/unit/obj/test_ssync.py +++ b/test/unit/obj/test_ssync.py @@ -390,9 +390,9 @@ class TestSsyncEC(TestBaseSsync): tx_objs, policy, frag_index, rx_node_index) self._verify_tombstones(tx_tombstones, policy) - def test_handoff_fragment_only_missing_durable(self): + def test_handoff_fragment_only_missing_durable_state(self): # test that a sync_revert type job does not PUT when the rx is only - # missing a durable file + # missing durable state policy = POLICIES.default rx_node_index = frag_index = 0 tx_node_index = 1 @@ -405,10 +405,10 @@ class TestSsyncEC(TestBaseSsync): expected_subreqs = defaultdict(list) - # o1 in sync on rx but rx missing .durable - no PUT required - t1a = next(self.ts_iter) # older rx .data with .durable + # o1 in sync on rx but rx missing durable state - no PUT required + t1a = next(self.ts_iter) # older durable rx .data t1b = next(self.ts_iter) # rx .meta - t1c = next(self.ts_iter) # tx .data with .durable, rx missing .durable + t1c = next(self.ts_iter) # durable tx .data, non-durable rx .data obj_name = 'o1' tx_objs[obj_name] = self._create_ondisk_files( tx_df_mgr, obj_name, policy, t1c, (tx_node_index, rx_node_index,)) @@ -419,7 +419,7 @@ class TestSsyncEC(TestBaseSsync): rx_objs[obj_name] = self._create_ondisk_files( rx_df_mgr, obj_name, policy, t1c, (rx_node_index, 9), commit=False) - # o2 on rx has wrong frag_indexes and missing .durable - PUT required + # o2 on rx has wrong frag_indexes and is non-durable - PUT required t2 = next(self.ts_iter) obj_name = 'o2' tx_objs[obj_name] = self._create_ondisk_files( @@ -428,7 +428,7 @@ class TestSsyncEC(TestBaseSsync): rx_df_mgr, obj_name, policy, t2, (13, 14), commit=False) expected_subreqs['PUT'].append(obj_name) - # o3 on rx has frag at other time missing .durable - PUT required + # o3 on rx has frag at other time and non-durable - PUT required t3 = next(self.ts_iter) obj_name = 'o3' tx_objs[obj_name] = self._create_ondisk_files( @@ -656,6 +656,79 @@ class TestSsyncEC(TestBaseSsync): self.assertIn("Invalid X-Backend-Ssync-Frag-Index 'Not a number'", error_msg) + def test_revert_job_with_legacy_durable(self): + # test a sync_revert type job using a sender object with a legacy + # durable file, that will create a receiver object with durable data + policy = POLICIES.default + rx_node_index = 0 + # for a revert job we iterate over frag index that belongs on + # remote node + frag_index = rx_node_index + + # create non durable tx obj by not committing, then create a legacy + # .durable file + tx_objs = {} + tx_df_mgr = self.daemon._diskfile_router[policy] + rx_df_mgr = self.rx_controller._diskfile_router[policy] + t1 = next(self.ts_iter) + tx_objs['o1'] = self._create_ondisk_files( + tx_df_mgr, 'o1', policy, t1, (rx_node_index,), commit=False) + tx_datadir = tx_objs['o1'][0]._datadir + durable_file = os.path.join(tx_datadir, t1.internal + '.durable') + with open(durable_file, 'wb'): + pass + self.assertEqual(2, len(os.listdir(tx_datadir))) # sanity check + + suffixes = [os.path.basename(os.path.dirname(tx_datadir))] + + # create ssync sender instance... + job = {'device': self.device, + 'partition': self.partition, + 'policy': policy, + 'frag_index': frag_index} + node = dict(self.rx_node) + node.update({'index': rx_node_index}) + sender = ssync_sender.Sender(self.daemon, node, job, suffixes) + # wrap connection from tx to rx to capture ssync messages... + sender.connect, trace = self.make_connect_wrapper(sender) + + # run the sync protocol... + sender() + + # verify protocol + results = self._analyze_trace(trace) + self.assertEqual(1, len(results['tx_missing'])) + self.assertEqual(1, len(results['rx_missing'])) + self.assertEqual(1, len(results['tx_updates'])) + self.assertFalse(results['rx_updates']) + + # sanity check - rx diskfile is durable + expected_rx_file = '%s#%s#d.data' % (t1.internal, rx_node_index) + rx_df = self._open_rx_diskfile('o1', policy, rx_node_index) + self.assertEqual([expected_rx_file], os.listdir(rx_df._datadir)) + + # verify on disk files... + self._verify_ondisk_files( + tx_objs, policy, frag_index, rx_node_index) + + # verify that tx and rx both generate the same suffix hashes... + tx_hashes = tx_df_mgr.get_hashes( + self.device, self.partition, suffixes, policy) + rx_hashes = rx_df_mgr.get_hashes( + self.device, self.partition, suffixes, policy) + self.assertEqual(suffixes, tx_hashes.keys()) # sanity + self.assertEqual(tx_hashes, rx_hashes) + + # sanity check - run ssync again and expect no sync activity + sender = ssync_sender.Sender(self.daemon, node, job, suffixes) + sender.connect, trace = self.make_connect_wrapper(sender) + sender() + results = self._analyze_trace(trace) + self.assertEqual(1, len(results['tx_missing'])) + self.assertFalse(results['rx_missing']) + self.assertFalse(results['tx_updates']) + self.assertFalse(results['rx_updates']) + @patch_policies class TestSsyncReplication(TestBaseSsync): diff --git a/test/unit/obj/test_ssync_receiver.py b/test/unit/obj/test_ssync_receiver.py index fc233601b2..b94cc13fd5 100644 --- a/test/unit/obj/test_ssync_receiver.py +++ b/test/unit/obj/test_ssync_receiver.py @@ -671,7 +671,7 @@ class TestReceiver(unittest.TestCase): self.controller._diskfile_router = diskfile.DiskFileRouter( self.conf, self.controller.logger) - # make rx disk file but don't commit it, so .durable is missing + # make rx disk file but don't commit it, so durable state is missing ts1 = next(make_timestamp_iter()).internal object_dir = utils.storage_directory( os.path.join(self.testdir, 'sda1', @@ -714,7 +714,7 @@ class TestReceiver(unittest.TestCase): self.controller._diskfile_router = diskfile.DiskFileRouter( self.conf, self.controller.logger) - # make rx disk file but don't commit it, so .durable is missing + # make rx disk file but don't commit it, so durable state is missing ts1 = next(make_timestamp_iter()).internal object_dir = utils.storage_directory( os.path.join(self.testdir, 'sda1', diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 72f5a5fd66..290086205e 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -1825,16 +1825,6 @@ class TestObjectController(unittest.TestCase): contents = ''.join(df.reader()) got_pieces.add(contents) - # check presence for a .durable file for the timestamp - durable_file = os.path.join( - _testdir, node['device'], storage_directory( - diskfile.get_data_dir(policy), - partition, hash_path('a', 'ec-con', 'o1')), - utils.Timestamp(df.timestamp).internal + '.durable') - - if os.path.isfile(durable_file): - got_durable.append(True) - lmeta = dict((k.lower(), v) for k, v in meta.items()) got_indices.add( lmeta['x-object-sysmeta-ec-frag-index']) @@ -1855,11 +1845,24 @@ class TestObjectController(unittest.TestCase): lmeta['etag'], md5(contents).hexdigest()) + # check presence for a durable data file for the timestamp + durable_file = ( + utils.Timestamp(df.timestamp).internal + + '#%s' % lmeta['x-object-sysmeta-ec-frag-index'] + + '#d.data') + durable_file = os.path.join( + _testdir, node['device'], storage_directory( + diskfile.get_data_dir(policy), + partition, hash_path('a', 'ec-con', 'o1')), + durable_file) + if os.path.isfile(durable_file): + got_durable.append(True) + self.assertEqual(expected_pieces, got_pieces) self.assertEqual(set(('0', '1', '2')), got_indices) # verify at least 2 puts made it all the way to the end of 2nd - # phase, ie at least 2 .durable statuses were written + # phase, ie at least 2 durable statuses were written num_durable_puts = sum(d is True for d in got_durable) self.assertGreaterEqual(num_durable_puts, 2) @@ -1908,16 +1911,21 @@ class TestObjectController(unittest.TestCase): node['device'], partition, 'a', 'ec-con', 'o2', policy=ec_policy) with df.open(): + meta = df.get_metadata() contents = ''.join(df.reader()) fragment_archives.append(contents) self.assertEqual(len(contents), expected_length) - # check presence for a .durable file for the timestamp + # check presence for a durable data file for the timestamp + durable_file = ( + utils.Timestamp(df.timestamp).internal + + '#%s' % meta['X-Object-Sysmeta-Ec-Frag-Index'] + + '#d.data') durable_file = os.path.join( _testdir, node['device'], storage_directory( diskfile.get_data_dir(ec_policy), partition, hash_path('a', 'ec-con', 'o2')), - utils.Timestamp(df.timestamp).internal + '.durable') + durable_file) if os.path.isfile(durable_file): got_durable.append(True) @@ -1947,7 +1955,7 @@ class TestObjectController(unittest.TestCase): self.assertEqual(seg, obj[segment_start:segment_end]) # verify at least 2 puts made it all the way to the end of 2nd - # phase, ie at least 2 .durable statuses were written + # phase, ie at least 2 durable statuses were written num_durable_puts = sum(d is True for d in got_durable) self.assertGreaterEqual(num_durable_puts, 2) @@ -5618,8 +5626,8 @@ class TestECGets(unittest.TestCase): :param node_state: a dict that maps a node index to the desired state for that node. Each desired state is a list of dicts, with each dict describing object reference, - frag_index and file extensions to be moved to the - node's hash_dir. + frag_index and whether the file moved to the node's + hash_dir should be marked as durable or not. """ (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv) = _test_servers @@ -5682,19 +5690,19 @@ class TestECGets(unittest.TestCase): # node state is in form: # {node_index: [{ref: object reference, # frag_index: index, - # exts: ['.data' etc]}, ...], + # durable: True or False}, ...], # node_index: ...} for node_index, state in node_state.items(): dest = node_hash_dirs[node_index] for frag_info in state: src = node_tmp_dirs[frag_info['frag_index']][frag_info['ref']] - src_files = [f for f in os.listdir(src) - if f.endswith(frag_info['exts'])] - self.assertEqual(len(frag_info['exts']), len(src_files), - 'Bad test setup for node %s, obj %s' - % (node_index, frag_info['ref'])) - for f in src_files: - move(os.path.join(src, f), os.path.join(dest, f)) + src_files = os.listdir(src) + # sanity check, expect just a single .data file + self.assertFalse(src_files[1:]) + dest_file = src_files[0].replace( + '#d', '#d' if frag_info['durable'] else '') + move(os.path.join(src, src_files[0]), + os.path.join(dest, dest_file)) # do an object GET get_req = Request.blank(obj_path, method='GET') @@ -5707,9 +5715,9 @@ class TestECGets(unittest.TestCase): # durable missing from 2/3 nodes node_state = { - 0: [dict(ref='obj1', frag_index=0, exts=('.data', '.durable'))], - 1: [dict(ref='obj1', frag_index=1, exts=('.data',))], - 2: [dict(ref='obj1', frag_index=2, exts=('.data',))] + 0: [dict(ref='obj1', frag_index=0, durable=True)], + 1: [dict(ref='obj1', frag_index=1, durable=False)], + 2: [dict(ref='obj1', frag_index=2, durable=False)] } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5719,9 +5727,9 @@ class TestECGets(unittest.TestCase): # all files missing on 1 node, durable missing from 1/2 other nodes # durable missing from 2/3 nodes node_state = { - 0: [dict(ref='obj1', frag_index=0, exts=('.data', '.durable'))], + 0: [dict(ref='obj1', frag_index=0, durable=True)], 1: [], - 2: [dict(ref='obj1', frag_index=2, exts=('.data',))] + 2: [dict(ref='obj1', frag_index=2, durable=False)] } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5730,9 +5738,9 @@ class TestECGets(unittest.TestCase): # durable missing from all 3 nodes node_state = { - 0: [dict(ref='obj1', frag_index=0, exts=('.data',))], - 1: [dict(ref='obj1', frag_index=1, exts=('.data',))], - 2: [dict(ref='obj1', frag_index=2, exts=('.data',))] + 0: [dict(ref='obj1', frag_index=0, durable=False)], + 1: [dict(ref='obj1', frag_index=1, durable=False)], + 2: [dict(ref='obj1', frag_index=2, durable=False)] } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5746,8 +5754,8 @@ class TestECGets(unittest.TestCase): # scenario: only two frags, both on same node node_state = { 0: [], - 1: [dict(ref='obj1', frag_index=0, exts=('.data', '.durable')), - dict(ref='obj1', frag_index=1, exts=('.data',))], + 1: [dict(ref='obj1', frag_index=0, durable=True), + dict(ref='obj1', frag_index=1, durable=False)], 2: [] } @@ -5758,9 +5766,9 @@ class TestECGets(unittest.TestCase): # scenario: all 3 frags on same node node_state = { 0: [], - 1: [dict(ref='obj1', frag_index=0, exts=('.data', '.durable')), - dict(ref='obj1', frag_index=1, exts=('.data',)), - dict(ref='obj1', frag_index=2, exts=('.data',))], + 1: [dict(ref='obj1', frag_index=0, durable=True), + dict(ref='obj1', frag_index=1, durable=False), + dict(ref='obj1', frag_index=2, durable=False)], 2: [] } @@ -5778,32 +5786,32 @@ class TestECGets(unittest.TestCase): # newer non-durable frags do not prevent proxy getting the durable obj1 node_state = { - 0: [dict(ref='obj3', frag_index=0, exts=('.data',)), - dict(ref='obj2', frag_index=0, exts=('.data',)), - dict(ref='obj1', frag_index=0, exts=('.data', '.durable'))], - 1: [dict(ref='obj3', frag_index=1, exts=('.data',)), - dict(ref='obj2', frag_index=1, exts=('.data',)), - dict(ref='obj1', frag_index=1, exts=('.data', '.durable'))], - 2: [dict(ref='obj3', frag_index=2, exts=('.data',)), - dict(ref='obj2', frag_index=2, exts=('.data',)), - dict(ref='obj1', frag_index=2, exts=('.data', '.durable'))], + 0: [dict(ref='obj3', frag_index=0, durable=False), + dict(ref='obj2', frag_index=0, durable=False), + dict(ref='obj1', frag_index=0, durable=True)], + 1: [dict(ref='obj3', frag_index=1, durable=False), + dict(ref='obj2', frag_index=1, durable=False), + dict(ref='obj1', frag_index=1, durable=True)], + 2: [dict(ref='obj3', frag_index=2, durable=False), + dict(ref='obj2', frag_index=2, durable=False), + dict(ref='obj1', frag_index=2, durable=True)], } resp = self._setup_nodes_and_do_GET(objs, node_state) self.assertEqual(resp.status_int, 200) self.assertEqual(resp.body, objs['obj1']['body']) - # .durables at two timestamps: in this scenario proxy is guaranteed + # durable frags at two timestamps: in this scenario proxy is guaranteed # to see the durable at ts_2 with one of the first 2 responses, so will # then prefer that when requesting from third obj server node_state = { - 0: [dict(ref='obj3', frag_index=0, exts=('.data',)), - dict(ref='obj2', frag_index=0, exts=('.data',)), - dict(ref='obj1', frag_index=0, exts=('.data', '.durable'))], - 1: [dict(ref='obj3', frag_index=1, exts=('.data',)), - dict(ref='obj2', frag_index=1, exts=('.data', '.durable'))], - 2: [dict(ref='obj3', frag_index=2, exts=('.data',)), - dict(ref='obj2', frag_index=2, exts=('.data', '.durable'))], + 0: [dict(ref='obj3', frag_index=0, durable=False), + dict(ref='obj2', frag_index=0, durable=False), + dict(ref='obj1', frag_index=0, durable=True)], + 1: [dict(ref='obj3', frag_index=1, durable=False), + dict(ref='obj2', frag_index=1, durable=True)], + 2: [dict(ref='obj3', frag_index=2, durable=False), + dict(ref='obj2', frag_index=2, durable=True)], } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5826,10 +5834,10 @@ class TestECGets(unittest.TestCase): # back two responses with frag index 1, and will then return to node 0 # for frag_index 0. node_state = { - 0: [dict(ref='obj1a', frag_index=0, exts=('.data',)), - dict(ref='obj1a', frag_index=1, exts=('.data',))], - 1: [dict(ref='obj1b', frag_index=1, exts=('.data', '.durable'))], - 2: [dict(ref='obj1c', frag_index=1, exts=('.data', '.durable'))] + 0: [dict(ref='obj1a', frag_index=0, durable=False), + dict(ref='obj1a', frag_index=1, durable=False)], + 1: [dict(ref='obj1b', frag_index=1, durable=True)], + 2: [dict(ref='obj1c', frag_index=1, durable=True)] } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5840,9 +5848,9 @@ class TestECGets(unittest.TestCase): # 404 (the third, 'extra', obj server GET will return 404 because it # will be sent frag prefs that exclude frag_index 1) node_state = { - 0: [dict(ref='obj1a', frag_index=1, exts=('.data',))], - 1: [dict(ref='obj1b', frag_index=1, exts=('.data', '.durable'))], - 2: [dict(ref='obj1c', frag_index=1, exts=('.data',))] + 0: [dict(ref='obj1a', frag_index=1, durable=False)], + 1: [dict(ref='obj1b', frag_index=1, durable=True)], + 2: [dict(ref='obj1c', frag_index=1, durable=False)] } resp = self._setup_nodes_and_do_GET(objs, node_state) @@ -5947,7 +5955,6 @@ class TestObjectDisconnectCleanup(unittest.TestCase): def test_ec_disconnect_cleans_up(self): self._check_disconnect_cleans_up('ec') found_files = self.find_files() - self.assertEqual(found_files['.durable'], []) self.assertEqual(found_files['.data'], []) def test_repl_chunked_transfer_disconnect_cleans_up(self): @@ -5958,7 +5965,6 @@ class TestObjectDisconnectCleanup(unittest.TestCase): def test_ec_chunked_transfer_disconnect_cleans_up(self): self._check_disconnect_cleans_up('ec', is_chunked=True) found_files = self.find_files() - self.assertEqual(found_files['.durable'], []) self.assertEqual(found_files['.data'], [])