From 40ba7f6172465c2f9bc63fb6f4d24f8bb5f96ef9 Mon Sep 17 00:00:00 2001 From: Kota Tsuyuzaki Date: Thu, 6 Aug 2015 01:06:47 -0700 Subject: [PATCH] EC Fragment Duplication - Foundational Global EC Cluster Support This patch enables efficent PUT/GET for global distributed cluster[1]. Problem: Erasure coding has the capability to decrease the amout of actual stored data less then replicated model. For example, ec_k=6, ec_m=3 parameter can be 1.5x of the original data which is smaller than 3x replicated. However, unlike replication, erasure coding requires availability of at least some ec_k fragments of the total ec_k + ec_m fragments to service read (e.g. 6 of 9 in the case above). As such, if we stored the EC object into a swift cluster on 2 geographically distributed data centers which have the same volume of disks, it is likely the fragments will be stored evenly (about 4 and 5) so we still need to access a faraway data center to decode the original object. In addition, if one of the data centers was lost in a disaster, the stored objects will be lost forever, and we have to cry a lot. To ensure highly durable storage, you would think of making *more* parity fragments (e.g. ec_k=6, ec_m=10), unfortunately this causes *significant* performance degradation due to the cost of mathmetical caluculation for erasure coding encode/decode. How this resolves the problem: EC Fragment Duplication extends on the initial solution to add *more* fragments from which to rebuild an object similar to the solution described above. The difference is making *copies* of encoded fragments. With experimental results[1][2], employing small ec_k and ec_m shows enough performance to store/retrieve objects. On PUT: - Encode incomming object with small ec_k and ec_m <- faster! - Make duplicated copies of the encoded fragments. The # of copies are determined by 'ec_duplication_factor' in swift.conf - Store all fragments in Swift Global EC Cluster The duplicated fragments increase pressure on existing requirements when decoding objects in service to a read request. All fragments are stored with their X-Object-Sysmeta-Ec-Frag-Index. In this change, the X-Object-Sysmeta-Ec-Frag-Index represents the actual fragment index encoded by PyECLib, there *will* be duplicates. Anytime we must decode the original object data, we must only consider the ec_k fragments as unique according to their X-Object-Sysmeta-Ec-Frag-Index. On decode no duplicate X-Object-Sysmeta-Ec-Frag-Index may be used when decoding an object, duplicate X-Object-Sysmeta-Ec-Frag-Index should be expected and avoided if possible. On GET: This patch inclues following changes: - Change GET Path to sort primary nodes grouping as subsets, so that each subset will includes unique fragments - Change Reconstructor to be more aware of possibly duplicate fragments For example, with this change, a policy could be configured such that swift.conf: ec_num_data_fragments = 2 ec_num_parity_fragments = 1 ec_duplication_factor = 2 (object ring must have 6 replicas) At Object-Server: node index (from object ring): 0 1 2 3 4 5 <- keep node index for reconstruct decision X-Object-Sysmeta-Ec-Frag-Index: 0 1 2 0 1 2 <- each object keeps actual fragment index for backend (PyEClib) Additional improvements to Global EC Cluster Support will require features such as Composite Rings, and more efficient fragment rebalance/reconstruction. 1: http://goo.gl/IYiNPk (Swift Design Spec Repository) 2: http://goo.gl/frgj6w (Slide Share for OpenStack Summit Tokyo) Doc-Impact Co-Authored-By: Clay Gerrard Change-Id: Idd155401982a2c48110c30b480966a863f6bd305 --- doc/source/overview_erasure_code.rst | 98 ++ etc/swift.conf-sample | 6 +- swift/common/storage_policy.py | 57 +- swift/common/utils.py | 15 + swift/obj/reconstructor.py | 139 +- swift/proxy/controllers/obj.py | 55 +- test/unit/__init__.py | 168 +- test/unit/common/test_storage_policy.py | 84 +- test/unit/common/test_utils.py | 27 + test/unit/helpers.py | 54 +- test/unit/obj/test_reconstructor.py | 609 ++++++- test/unit/proxy/controllers/test_obj.py | 933 +++++++++-- test/unit/proxy/test_server.py | 1984 ++++++++++++----------- 13 files changed, 2994 insertions(+), 1235 deletions(-) diff --git a/doc/source/overview_erasure_code.rst b/doc/source/overview_erasure_code.rst index 879f52d88f..7128c917e5 100644 --- a/doc/source/overview_erasure_code.rst +++ b/doc/source/overview_erasure_code.rst @@ -205,6 +205,11 @@ an EC policy can be setup is shown below:: ec_num_parity_fragments = 4 ec_object_segment_size = 1048576 + # Duplicated EC fragments is proof-of-concept experimental support to enable + # Global Erasure Coding policies with multiple regions acting as independent + # failure domains. Do not change the default except in development/testing. + ec_duplication_factor = 1 + Let's take a closer look at each configuration parameter: * ``name``: This is a standard storage policy parameter. @@ -223,6 +228,11 @@ Let's take a closer look at each configuration parameter: comprised of parity. * ``ec_object_segment_size``: The amount of data that will be buffered up before feeding a segment into the encoder/decoder. The default value is 1048576. +* ``ec_duplication_factor``: The number of duplicate copies for each fragment. + This is now experimental support to enable Global Erasure Coding policies with + multiple regions. Do not change the default except in development/testing. And + please read the "EC Duplication" section below before changing the default + value. When PyECLib encodes an object, it will break it into N fragments. However, what is important during configuration, is how many of those are data and how many @@ -273,6 +283,94 @@ For at least the initial version of EC, it is not recommended that an EC scheme span beyond a single region, neither performance nor functional validation has be been done in such a configuration. +EC Duplication +^^^^^^^^^^^^^^ + +ec_duplication_factor is an option to make duplicate copies of fragments +of erasure encoded Swift objects. The default value is 1 (not duplicate). +If an erasure code storage policy is configured with a non-default +ec_duplication_factor of N > 1, then the policy will create N duplicates of +each unique fragment that is returned from the configured EC engine. +Duplication of EC fragments is optimal for EC storage policies which require +dispersion of fragment data across failure domains. Without duplication, almost +of common ec parameters like 10-4 cause less assignments than 1/(the number +of failure domains) of the total unique fragments. And usually, it will be less +than the number of data fragments which are required to construct the original +data. To guarantee the number of fragments in a failure domain, the system +requires more parities. On the situation which needs more parity, empirical +testing has shown using duplication is more efficient in the PUT path than +encoding a schema with num_parity > num_data, and Swift EC supports this schema. +You should evaluate which strategy works best in your environment. + +e.g. 10-4 and duplication factor of 2 will store 28 fragments (i.e. +(``ec_num_data_fragments`` + ``ec_num_parity_fragments``) * +``ec_duplication_factor``). This \*can\* allow for a failure domain to rebuild +an object to full durability even when \*more\* than 14 fragments are +unavailable. + +.. note:: + + Current EC Duplication is a part work of EC region support so we still + have some known issues to get complete region supports: + + Known-Issues: + + - Unique fragment dispersion + + Currently, Swift \*doesn't\* guarantee the dispersion of unique + fragments' locations in the global distributed cluster being robust + in the disaster recovery case. While the goal is to have duplicates + of each unique fragment placed in each region, it is currently + possible for duplicates of the same unique fragment to be placed in + the same region. Since a set of ``ec_num_data_fragments`` unique + fragments is required to reconstruct an object, the suboptimal + distribution of duplicates across regions may, in some cases, make it + impossible to assemble such a set from a single region. + + For example, if we have a Swift cluster with 2 regions, the fragments may + be located like as: + + :: + + r1 + #0#d.data + #0#d.data + #2#d.data + #2#d.data + #4#d.data + #4#d.data + r2 + #1#d.data + #1#d.data + #3#d.data + #3#d.data + #5#d.data + #5#d.data + + In this case, r1 has only the fragments with index 0, 2, 4 and r2 has + the rest of indexes but we need 4 unique indexes to decode. To resolve + the case, the composite ring which enables the operator oriented location + mapping [1] is under development. + + 1: https://review.openstack.org/#/c/271920/ + + - Efficient node iteration for read + + Since EC fragment duplication requires a set of unique fragment indexes + to decode the original object, it needs efficient node iteration rather + than current. Current Swift is iterating the nodes ordered by sorting + method defined in proxy server config. (i.e. either shuffle, node_timing, + or read_affinity) However, the sorted result could include duplicate + indexes for the first primaries to try to connect even if \*we\* know + it obviously needs more nodes to get unique fragments. Hence, current + Swift may call more backend requests than ec_ndata times frequently even + if no node failures in the object-servers. + + The possible solution could be some refactoring work on NodeIter to + provide suitable nodes even if it's fragment duplication but it's still + under development yet. + + -------------- Under the Hood -------------- diff --git a/etc/swift.conf-sample b/etc/swift.conf-sample index 1d21ba20a8..e008274c40 100644 --- a/etc/swift.conf-sample +++ b/etc/swift.conf-sample @@ -84,7 +84,11 @@ aliases = yellow, orange #ec_num_data_fragments = 10 #ec_num_parity_fragments = 4 #ec_object_segment_size = 1048576 - +# +# Duplicated EC fragments is proof-of-concept experimental support to enable +# Global Erasure Coding policies with multiple regions acting as independent +# failure domains. Do not change the default except in development/testing. +#ec_duplication_factor = 1 # The swift-constraints section sets the basic constraints on data # saved in the swift cluster. These constraints are automatically diff --git a/swift/common/storage_policy.py b/swift/common/storage_policy.py index 7b55cfed9b..67034d021b 100644 --- a/swift/common/storage_policy.py +++ b/swift/common/storage_policy.py @@ -20,7 +20,8 @@ import textwrap import six from six.moves.configparser import ConfigParser from swift.common.utils import ( - config_true_value, SWIFT_CONF_FILE, whataremyips, list_from_csv) + config_true_value, SWIFT_CONF_FILE, whataremyips, list_from_csv, + config_positive_int_value) from swift.common.ring import Ring, RingData from swift.common.utils import quorum_size from swift.common.exceptions import RingLoadError @@ -406,7 +407,8 @@ class ECStoragePolicy(BaseStoragePolicy): def __init__(self, idx, name='', aliases='', is_default=False, is_deprecated=False, object_ring=None, ec_segment_size=DEFAULT_EC_OBJECT_SEGMENT_SIZE, - ec_type=None, ec_ndata=None, ec_nparity=None): + ec_type=None, ec_ndata=None, ec_nparity=None, + ec_duplication_factor=1): super(ECStoragePolicy, self).__init__( idx=idx, name=name, aliases=aliases, is_default=is_default, @@ -489,6 +491,9 @@ class ECStoragePolicy(BaseStoragePolicy): self._ec_ndata + self.pyeclib_driver.min_parity_fragments_needed() self._fragment_size = None + self._ec_duplication_factor = \ + config_positive_int_value(ec_duplication_factor) + @property def ec_type(self): return self._ec_type @@ -501,6 +506,10 @@ class ECStoragePolicy(BaseStoragePolicy): def ec_nparity(self): return self._ec_nparity + @property + def ec_n_unique_fragments(self): + return self._ec_ndata + self._ec_nparity + @property def ec_segment_size(self): return self._ec_segment_size @@ -538,11 +547,20 @@ class ECStoragePolicy(BaseStoragePolicy): """ return "%s %d+%d" % (self._ec_type, self._ec_ndata, self._ec_nparity) + @property + def ec_duplication_factor(self): + return self._ec_duplication_factor + def __repr__(self): + extra_info = '' + if self.ec_duplication_factor != 1: + extra_info = ', ec_duplication_factor=%d' % \ + self.ec_duplication_factor return ("%s, EC config(ec_type=%s, ec_segment_size=%d, " - "ec_ndata=%d, ec_nparity=%d)") % \ + "ec_ndata=%d, ec_nparity=%d%s)") % \ (super(ECStoragePolicy, self).__repr__(), self.ec_type, - self.ec_segment_size, self.ec_ndata, self.ec_nparity) + self.ec_segment_size, self.ec_ndata, self.ec_nparity, + extra_info) @classmethod def _config_options_map(cls): @@ -552,6 +570,7 @@ class ECStoragePolicy(BaseStoragePolicy): 'ec_object_segment_size': 'ec_segment_size', 'ec_num_data_fragments': 'ec_ndata', 'ec_num_parity_fragments': 'ec_nparity', + 'ec_duplication_factor': 'ec_duplication_factor', }) return options @@ -562,13 +581,14 @@ class ECStoragePolicy(BaseStoragePolicy): info.pop('ec_num_data_fragments') info.pop('ec_num_parity_fragments') info.pop('ec_type') + info.pop('ec_duplication_factor') return info @property def quorum(self): """ Number of successful backend requests needed for the proxy to consider - the client request successful. + the client PUT request successful. The quorum size for EC policies defines the minimum number of data + parity elements required to be able to guarantee @@ -584,7 +604,7 @@ class ECStoragePolicy(BaseStoragePolicy): for every erasure coding scheme, consult PyECLib for min_parity_fragments_needed() """ - return self._ec_quorum_size + return self._ec_quorum_size * self.ec_duplication_factor def load_ring(self, swift_dir): """ @@ -605,18 +625,35 @@ class ECStoragePolicy(BaseStoragePolicy): considering the number of nodes in the primary list from the ring. """ - nodes_configured = len(ring_data._replica2part2dev_id) - if nodes_configured != (self.ec_ndata + self.ec_nparity): + configured_fragment_count = len(ring_data._replica2part2dev_id) + required_fragment_count = \ + (self.ec_n_unique_fragments) * self.ec_duplication_factor + if configured_fragment_count != required_fragment_count: raise RingLoadError( 'EC ring for policy %s needs to be configured with ' 'exactly %d replicas. Got %d.' % ( - self.name, self.ec_ndata + self.ec_nparity, - nodes_configured)) + self.name, required_fragment_count, + configured_fragment_count)) self.object_ring = Ring( swift_dir, ring_name=self.ring_name, validation_hook=validate_ring_data) + def get_backend_index(self, node_index): + """ + Backend index for PyECLib + + :param node_index: integer of node index + :return: integer of actual fragment index. if param is not an integer, + return None instead + """ + try: + node_index = int(node_index) + except ValueError: + return None + + return node_index % self.ec_n_unique_fragments + class StoragePolicyCollection(object): """ diff --git a/swift/common/utils.py b/swift/common/utils.py index 4c55299779..7145bb84cc 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -345,6 +345,21 @@ def config_true_value(value): (isinstance(value, six.string_types) and value.lower() in TRUE_VALUES) +def config_positive_int_value(value): + """ + Returns positive int value if it can be cast by int() and it's an + integer > 0. (not including zero) Raises ValueError otherwise. + """ + try: + value = int(value) + if value < 1: + raise ValueError() + except (TypeError, ValueError): + raise ValueError( + 'Config option must be an positive int number, not "%s".' % value) + return value + + def config_auto_int_value(value, default): """ Returns default if value is None or 'auto'. diff --git a/swift/obj/reconstructor.py b/swift/obj/reconstructor.py index b0f35cb6d1..ca9949ad85 100644 --- a/swift/obj/reconstructor.py +++ b/swift/obj/reconstructor.py @@ -193,6 +193,8 @@ class ObjectReconstructor(Daemon): return True def _full_path(self, node, part, path, policy): + frag_index = (policy.get_backend_index(node['index']) + if 'index' in node else 'handoff') return '%(replication_ip)s:%(replication_port)s' \ '/%(device)s/%(part)s%(path)s ' \ 'policy#%(policy)d frag#%(frag_index)s' % { @@ -201,7 +203,7 @@ class ObjectReconstructor(Daemon): 'device': node['device'], 'part': part, 'path': path, 'policy': policy, - 'frag_index': node.get('index', 'handoff'), + 'frag_index': frag_index, } def _get_response(self, node, part, path, headers, policy): @@ -217,6 +219,7 @@ class ObjectReconstructor(Daemon): :class:`~swift.common.storage_policy.BaseStoragePolicy` :returns: response """ + full_path = self._full_path(node, part, path, policy) resp = None try: with ConnectionTimeout(self.conn_timeout): @@ -224,18 +227,18 @@ class ObjectReconstructor(Daemon): part, 'GET', path, headers=headers) with Timeout(self.node_timeout): resp = conn.getresponse() + resp.full_path = full_path if resp.status not in [HTTP_OK, HTTP_NOT_FOUND]: self.logger.warning( _("Invalid response %(resp)s from %(full_path)s"), - {'resp': resp.status, - 'full_path': self._full_path(node, part, path, policy)}) + {'resp': resp.status, 'full_path': full_path}) resp = None elif resp.status == HTTP_NOT_FOUND: resp = None except (Exception, Timeout): self.logger.exception( _("Trying to GET %(full_path)s"), { - 'full_path': self._full_path(node, part, path, policy)}) + 'full_path': full_path}) return resp def reconstruct_fa(self, job, node, datafile_metadata): @@ -259,7 +262,7 @@ class ObjectReconstructor(Daemon): # the fragment index we need to reconstruct is the position index # of the node we're rebuilding to within the primary part list - fi_to_rebuild = node['index'] + fi_to_rebuild = job['policy'].get_backend_index(node['index']) # KISS send out connection requests to all nodes, see what sticks. # Use fragment preferences header to tell other nodes that we want @@ -272,40 +275,96 @@ class ObjectReconstructor(Daemon): headers['X-Backend-Fragment-Preferences'] = json.dumps(frag_prefs) pile = GreenAsyncPile(len(part_nodes)) path = datafile_metadata['name'] - for node in part_nodes: - pile.spawn(self._get_response, node, job['partition'], + for _node in part_nodes: + pile.spawn(self._get_response, _node, job['partition'], path, headers, job['policy']) - responses = [] - etag = None + + buckets = defaultdict(dict) + etag_buckets = {} + error_resp_count = 0 for resp in pile: if not resp: + error_resp_count += 1 continue resp.headers = HeaderKeyDict(resp.getheaders()) - if str(fi_to_rebuild) == \ - resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index'): + frag_index = resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') + try: + unique_index = int(frag_index) + except (TypeError, ValueError): + # The successful response should include valid X-Object- + # Sysmeta-Ec-Frag-Index but for safety, catching the case + # either missing X-Object-Sysmeta-Ec-Frag-Index or invalid + # frag index to reconstruct and dump warning log for that + self.logger.warning( + 'Invalid resp from %s ' + '(invalid X-Object-Sysmeta-Ec-Frag-Index)', + resp.full_path) continue - if resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') in set( - r.headers.get('X-Object-Sysmeta-Ec-Frag-Index') - for r in responses): - continue - responses.append(resp) - etag = sorted(responses, reverse=True, - key=lambda r: Timestamp( - r.headers.get('X-Backend-Timestamp') - ))[0].headers.get('X-Object-Sysmeta-Ec-Etag') - responses = [r for r in responses if - r.headers.get('X-Object-Sysmeta-Ec-Etag') == etag] - if len(responses) >= job['policy'].ec_ndata: - break - else: - self.logger.error( - 'Unable to get enough responses (%s/%s) ' - 'to reconstruct %s with ETag %s' % ( - len(responses), job['policy'].ec_ndata, + if fi_to_rebuild == unique_index: + # TODO: With duplicated EC frags it's not unreasonable to find + # the very fragment we're trying to rebuild exists on another + # primary node. In this case we should stream it directly from + # the remote node to our target instead of rebuild. But + # instead we ignore it. + self.logger.debug( + 'Found existing frag #%s while rebuilding #%s from %s', + unique_index, fi_to_rebuild, self._full_path( + node, job['partition'], datafile_metadata['name'], + job['policy'])) + continue + + timestamp = resp.headers.get('X-Backend-Timestamp') + if not timestamp: + self.logger.warning( + 'Invalid resp from %s (missing X-Backend-Timestamp)', + resp.full_path) + continue + timestamp = Timestamp(timestamp) + + etag = resp.headers.get('X-Object-Sysmeta-Ec-Etag') + if not etag: + self.logger.warning('Invalid resp from %s (missing Etag)', + resp.full_path) + continue + + if etag != etag_buckets.setdefault(timestamp, etag): + self.logger.error( + 'Mixed Etag (%s, %s) for %s', + etag, etag_buckets[timestamp], self._full_path(node, job['partition'], - datafile_metadata['name'], job['policy']), - etag)) + datafile_metadata['name'], job['policy'])) + continue + + if unique_index not in buckets[timestamp]: + buckets[timestamp][unique_index] = resp + if len(buckets[timestamp]) >= job['policy'].ec_ndata: + responses = buckets[timestamp].values() + self.logger.debug( + 'Reconstruct frag #%s with frag indexes %s' + % (fi_to_rebuild, list(buckets[timestamp]))) + break + else: + for timestamp, resp in sorted(buckets.items()): + etag = etag_buckets[timestamp] + self.logger.error( + 'Unable to get enough responses (%s/%s) ' + 'to reconstruct %s with ETag %s' % ( + len(resp), job['policy'].ec_ndata, + self._full_path(node, job['partition'], + datafile_metadata['name'], + job['policy']), + etag)) + + if error_resp_count: + self.logger.error( + 'Unable to get enough responses (%s error responses) ' + 'to reconstruct %s' % ( + error_resp_count, + self._full_path(node, job['partition'], + datafile_metadata['name'], + job['policy']))) + raise DiskFileError('Unable to reconstruct EC archive') rebuilt_fragment_iter = self.make_rebuilt_fragment_iter( @@ -696,7 +755,7 @@ class ObjectReconstructor(Daemon): A partition may result in multiple jobs. Potentially many REVERT jobs, and zero or one SYNC job. - :param local_dev: the local device + :param local_dev: the local device (node dict) :param part_path: full path to partition :param partition: partition number :param policy: the policy @@ -756,7 +815,7 @@ class ObjectReconstructor(Daemon): for node in part_nodes: if node['id'] == local_dev['id']: # this partition belongs here, we'll need a sync job - frag_index = node['index'] + frag_index = policy.get_backend_index(node['index']) try: suffixes = data_fi_to_suffixes.pop(frag_index) except KeyError: @@ -765,7 +824,7 @@ class ObjectReconstructor(Daemon): job_type=SYNC, frag_index=frag_index, suffixes=suffixes, - sync_to=_get_partners(frag_index, part_nodes), + sync_to=_get_partners(node['index'], part_nodes), ) # ssync callback to rebuild missing fragment_archives sync_job['sync_diskfile_builder'] = self.reconstruct_fa @@ -776,11 +835,21 @@ class ObjectReconstructor(Daemon): ordered_fis = sorted((len(suffixes), fi) for fi, suffixes in data_fi_to_suffixes.items()) for count, fi in ordered_fis: + # In single region EC a revert job must sync to the specific + # primary who's node_index matches the data's frag_index. With + # duplicated EC frags a revert job must sync to all primary nodes + # that should be holding this frag_index. + nodes_sync_to = [] + node_index = fi + for n in range(policy.ec_duplication_factor): + nodes_sync_to.append(part_nodes[node_index]) + node_index += policy.ec_n_unique_fragments + revert_job = build_job( job_type=REVERT, frag_index=fi, suffixes=data_fi_to_suffixes[fi], - sync_to=[part_nodes[fi]], + sync_to=nodes_sync_to, ) jobs.append(revert_job) diff --git a/swift/proxy/controllers/obj.py b/swift/proxy/controllers/obj.py index 000c149c12..ba600ae1af 100644 --- a/swift/proxy/controllers/obj.py +++ b/swift/proxy/controllers/obj.py @@ -1775,8 +1775,9 @@ def chunk_transformer(policy, nstreams): frags_by_byte_order = [] for chunk_to_encode in chunks_to_encode: - frags_by_byte_order.append( - policy.pyeclib_driver.encode(chunk_to_encode)) + encoded_chunks = policy.pyeclib_driver.encode(chunk_to_encode) + send_chunks = encoded_chunks * policy.ec_duplication_factor + frags_by_byte_order.append(send_chunks) # Sequential calls to encode() have given us a list that # looks like this: # @@ -1801,7 +1802,7 @@ def chunk_transformer(policy, nstreams): last_bytes = ''.join(buf) if last_bytes: last_frags = policy.pyeclib_driver.encode(last_bytes) - yield last_frags + yield last_frags * policy.ec_duplication_factor else: yield [''] * nstreams @@ -2178,6 +2179,7 @@ class ECObjectController(BaseObjectController): range_specs = self._convert_range(req, policy) safe_iter = GreenthreadSafeIterator(node_iter) + # Sending the request concurrently to all nodes, and responding # with the first response isn't something useful for EC as all # nodes contain different fragments. Also EC has implemented it's @@ -2204,8 +2206,11 @@ class ECObjectController(BaseObjectController): # getters in case some unforeseen scenario, or a misbehaving object # server, causes us to otherwise make endless requests e.g. if an # object server were to ignore frag_prefs and always respond with - # a frag that is already in a bucket. - max_extra_requests = 2 * policy.ec_nparity + policy.ec_ndata + # a frag that is already in a bucket. Now we're assuming it should + # be limit at most 2 * replicas. + max_extra_requests = ( + (policy.object_ring.replica_count * 2) - policy.ec_ndata) + for get, parts_iter in pile: if get.last_status is None: # We may have spawned getters that find the node iterator @@ -2322,7 +2327,7 @@ class ECObjectController(BaseObjectController): logger=self.app.logger, need_multiphase=True) - def _determine_chunk_destinations(self, putters): + def _determine_chunk_destinations(self, putters, policy): """ Given a list of putters, return a dict where the key is the putter and the value is the node index to use. @@ -2331,6 +2336,10 @@ class ECObjectController(BaseObjectController): (in the primary part list) as the primary that the handoff is standing in for. This lets erasure-code fragment archives wind up on the preferred local primary nodes when possible. + + :param putters: a list of swift.proxy.controllers.obj.MIMEPutter + instance + :param policy: A policy instance """ # Give each putter a "chunk index": the index of the # transformed chunk that we'll send to it. @@ -2351,9 +2360,34 @@ class ECObjectController(BaseObjectController): # nodes. Holes occur when a storage node is down, in which # case the connection is not replaced, and when a storage node # returns 507, in which case a handoff is used to replace it. - holes = [x for x in range(len(putters)) - if x not in chunk_index.values()] + # lack_list is a dict of list to keep hole indexes + # e.g. if we have 2 holes for index 0 with ec_duplication_factor=2 + # lack_list is like {0: [0], 1: [0]}, and then, if 1 hole found + # for index 1, lack_list will be {0: [0, 1], 1: [0]}. + # After that, holes will be filled from bigger key + # (i.e. 1:[0] at first) + + # Grouping all missing fragment indexes for each unique_index + unique_index_to_holes = collections.defaultdict(list) + available_indexes = chunk_index.values() + for node_index in range(policy.object_ring.replica_count): + if node_index not in available_indexes: + unique_index = policy.get_backend_index(node_index) + unique_index_to_holes[unique_index].append(node_index) + + # Set the missing index to lack_list + lack_list = collections.defaultdict(list) + for unique_index, holes in unique_index_to_holes.items(): + for lack_tier, hole_node_index in enumerate(holes): + lack_list[lack_tier].append(hole_node_index) + + # Extract the lack_list to a flat list + holes = [] + for lack_tier, indexes in sorted(lack_list.items(), reverse=True): + holes.extend(indexes) + + # Fill chunk_index list with the hole list for hole, p in zip(holes, handoff_conns): chunk_index[p] = hole return chunk_index @@ -2405,7 +2439,8 @@ class ECObjectController(BaseObjectController): # build our chunk index dict to place handoffs in the # same part nodes index as the primaries they are covering - chunk_index = self._determine_chunk_destinations(putters) + chunk_index = self._determine_chunk_destinations( + putters, policy) for putter in putters: putter.spawn_sender_greenthread( @@ -2456,7 +2491,7 @@ class ECObjectController(BaseObjectController): # Update any footers set by middleware with EC footers trail_md = trailing_metadata( policy, etag_hasher, - bytes_transferred, ci) + bytes_transferred, policy.get_backend_index(ci)) trail_md.update(footers) # Etag footer must always be hash of what we sent trail_md['Etag'] = chunk_hashers[ci].hexdigest() diff --git a/test/unit/__init__.py b/test/unit/__init__.py index 90f2539074..8590a28da1 100644 --- a/test/unit/__init__.py +++ b/test/unit/__init__.py @@ -21,6 +21,7 @@ import copy import logging import errno from six.moves import range +from six import BytesIO import sys from contextlib import contextmanager, closing from collections import defaultdict, Iterable @@ -34,13 +35,14 @@ from tempfile import mkdtemp from shutil import rmtree import signal import json - +import random from swift.common.utils import Timestamp, NOTICE from test import get_config from swift.common import utils from swift.common.header_key_dict import HeaderKeyDict from swift.common.ring import Ring, RingData +from swift.obj import server from hashlib import md5 import logging.handlers @@ -48,6 +50,7 @@ from six.moves.http_client import HTTPException from swift.common import storage_policy from swift.common.storage_policy import (StoragePolicy, ECStoragePolicy, VALID_EC_TYPES) +from swift.common import swob import functools import six.moves.cPickle as pickle from gzip import GzipFile @@ -213,6 +216,7 @@ class FakeRing(Ring): self._base_port = base_port self.max_more_nodes = max_more_nodes self._part_shift = 32 - part_power + self._init_device_char() # 9 total nodes (6 more past the initial 3) is the cap, no matter if # this is set higher, or R^2 for R replicas self.set_replicas(replicas) @@ -221,9 +225,18 @@ class FakeRing(Ring): def _reload(self): self._rtime = time.time() + @property + def device_char(self): + return next(self._device_char_iter) + + def _init_device_char(self): + self._device_char_iter = itertools.cycle( + ['sd%s' % chr(ord('a') + x) for x in range(26)]) + def set_replicas(self, replicas): self.replicas = replicas self._devs = [] + self._init_device_char() for x in range(self.replicas): ip = '10.0.0.%s' % x port = self._base_port + x @@ -233,7 +246,7 @@ class FakeRing(Ring): 'replication_ip': ip, 'port': port, 'replication_port': port, - 'device': 'sd' + (chr(ord('a') + x)), + 'device': self.device_char, 'zone': x % 3, 'region': x % 2, 'id': x, @@ -290,7 +303,7 @@ class FabricatedRing(Ring): self.devices = devices self.nodes = nodes self.port = port - self.replicas = 6 + self.replicas = replicas self.part_power = part_power self._part_shift = 32 - self.part_power self._reload() @@ -1092,6 +1105,30 @@ def requires_o_tmpfile_support(func): return wrapper +class StubResponse(object): + + def __init__(self, status, body='', headers=None, frag_index=None): + self.status = status + self.body = body + self.readable = BytesIO(body) + self.headers = HeaderKeyDict(headers) + if frag_index is not None: + self.headers['X-Object-Sysmeta-Ec-Frag-Index'] = frag_index + fake_reason = ('Fake', 'This response is a lie.') + self.reason = swob.RESPONSE_REASONS.get(status, fake_reason)[0] + + def getheader(self, header_name, default=None): + return self.headers.get(header_name, default) + + def getheaders(self): + if 'Content-Length' not in self.headers: + self.headers['Content-Length'] = len(self.body) + return self.headers.items() + + def read(self, amt=0): + return self.readable.read(amt) + + def encode_frag_archive_bodies(policy, body): """ Given a stub body produce a list of complete frag_archive bodies as @@ -1119,3 +1156,128 @@ def encode_frag_archive_bodies(policy, body): ec_archive_bodies = [''.join(frags) for frags in zip(*fragment_payloads)] return ec_archive_bodies + + +def make_ec_object_stub(test_body, policy, timestamp): + segment_size = policy.ec_segment_size + test_body = test_body or ( + 'test' * segment_size)[:-random.randint(1, 1000)] + timestamp = timestamp or utils.Timestamp(time.time()) + etag = md5(test_body).hexdigest() + ec_archive_bodies = encode_frag_archive_bodies(policy, test_body) + + return { + 'body': test_body, + 'etag': etag, + 'frags': ec_archive_bodies, + 'timestamp': timestamp + } + + +def fake_ec_node_response(node_frags, policy): + """ + Given a list of entries for each node in ring order, where the entries + are a dict (or list of dicts) which describes the fragment (or + fragments) that are on the node; create a function suitable for use + with capture_http_requests that will accept a req object and return a + response that will suitably fake the behavior of an object server who + had the given fragments on disk at the time. + + :param node_frags: a list. Each item in the list describes the + fragments that are on a node; each item is a dict or list of dicts, + each dict describing a single fragment; where the item is a list, + repeated calls to get_response will return fragments in the order + of the list; each dict has keys: + - obj: an object stub, as generated by _make_ec_object_stub, + that defines all of the fragments that compose an object + at a specific timestamp. + - frag: the index of a fragment to be selected from the object + stub + - durable (optional): True if the selected fragment is durable + :param policy: storage policy to return + """ + node_map = {} # maps node ip and port to node index + all_nodes = [] + call_count = {} # maps node index to get_response call count for node + + def _build_node_map(req, policy): + node_key = lambda n: (n['ip'], n['port']) + part = utils.split_path(req['path'], 5, 5, True)[1] + all_nodes.extend(policy.object_ring.get_part_nodes(part)) + all_nodes.extend(policy.object_ring.get_more_nodes(part)) + for i, node in enumerate(all_nodes): + node_map[node_key(node)] = i + call_count[i] = 0 + + # normalize node_frags to a list of fragments for each node even + # if there's only one fragment in the dataset provided. + for i, frags in enumerate(node_frags): + if isinstance(frags, dict): + node_frags[i] = [frags] + + def get_response(req): + requested_policy = int( + req['headers']['X-Backend-Storage-Policy-Index']) + if int(policy) != requested_policy: + AssertionError( + "Requested polciy doesn't fit the fake response policy") + if not node_map: + _build_node_map(req, policy) + + try: + node_index = node_map[(req['ip'], req['port'])] + except KeyError: + raise Exception("Couldn't find node %s:%s in %r" % ( + req['ip'], req['port'], all_nodes)) + try: + frags = node_frags[node_index] + except IndexError: + raise Exception('Found node %r:%r at index %s - ' + 'but only got %s stub response nodes' % ( + req['ip'], req['port'], node_index, + len(node_frags))) + + if not frags: + return StubResponse(404) + + # determine response fragment (if any) for this call + resp_frag = frags[call_count[node_index]] + call_count[node_index] += 1 + frag_prefs = req['headers'].get('X-Backend-Fragment-Preferences') + if not (frag_prefs or resp_frag.get('durable', True)): + return StubResponse(404) + + # prepare durable timestamp and backend frags header for this node + obj_stub = resp_frag['obj'] + ts2frags = defaultdict(list) + durable_timestamp = None + for frag in frags: + ts_frag = frag['obj']['timestamp'] + if frag.get('durable', True): + durable_timestamp = ts_frag.internal + ts2frags[ts_frag].append(frag['frag']) + + try: + body = obj_stub['frags'][resp_frag['frag']] + except IndexError as err: + raise Exception( + 'Frag index %s not defined: node index %s, frags %r\n%s' % + (resp_frag['frag'], node_index, [f['frag'] for f in frags], + err)) + headers = { + 'X-Object-Sysmeta-Ec-Content-Length': len(obj_stub['body']), + 'X-Object-Sysmeta-Ec-Etag': obj_stub['etag'], + 'X-Object-Sysmeta-Ec-Frag-Index': + policy.get_backend_index(resp_frag['frag']), + 'X-Backend-Timestamp': obj_stub['timestamp'].internal, + 'X-Timestamp': obj_stub['timestamp'].normal, + 'X-Backend-Data-Timestamp': obj_stub['timestamp'].internal, + 'X-Backend-Fragments': + server._make_backend_fragments_header(ts2frags) + } + if durable_timestamp: + headers['X-Backend-Durable-Timestamp'] = durable_timestamp + + return StubResponse(200, body, headers) + + return get_response diff --git a/test/unit/common/test_storage_policy.py b/test/unit/common/test_storage_policy.py index aa6ad39436..c755ec612a 100644 --- a/test/unit/common/test_storage_policy.py +++ b/test/unit/common/test_storage_policy.py @@ -19,6 +19,7 @@ import unittest import os import mock from functools import partial + from six.moves.configparser import ConfigParser from tempfile import NamedTemporaryFile from test.unit import patch_policies, FakeRing, temptree, DEFAULT_TEST_EC_TYPE @@ -169,7 +170,11 @@ class TestStoragePolicies(unittest.TestCase): StoragePolicy(2, 'cee', False), ECStoragePolicy(10, 'ten', ec_type=DEFAULT_TEST_EC_TYPE, - ec_ndata=10, ec_nparity=3)] + ec_ndata=10, ec_nparity=3), + ECStoragePolicy(11, 'eleven', + ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=10, ec_nparity=3, + ec_duplication_factor=2)] policies = StoragePolicyCollection(test_policies) for policy in policies: policy_repr = repr(policy) @@ -185,6 +190,10 @@ class TestStoragePolicies(unittest.TestCase): policy.ec_nparity in policy_repr) self.assertTrue('ec_segment_size=%s' % policy.ec_segment_size in policy_repr) + if policy.ec_duplication_factor > 1: + self.assertTrue('ec_duplication_factor=%s' % + policy.ec_duplication_factor in + policy_repr) collection_repr = repr(policies) collection_repr_lines = collection_repr.splitlines() self.assertTrue( @@ -443,16 +452,21 @@ class TestStoragePolicies(unittest.TestCase): ECStoragePolicy(0, 'ec8-2', aliases='zeus, jupiter', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=8, ec_nparity=2, - object_ring=FakeRing(replicas=8), + object_ring=FakeRing(replicas=10), is_default=True), ECStoragePolicy(1, 'ec10-4', aliases='athena, minerva', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=10, ec_nparity=4, - object_ring=FakeRing(replicas=10)), + object_ring=FakeRing(replicas=14)), ECStoragePolicy(2, 'ec4-2', aliases='poseidon, neptune', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=4, ec_nparity=2, - object_ring=FakeRing(replicas=7)), + object_ring=FakeRing(replicas=6)), + ECStoragePolicy(3, 'ec4-2-dup', aliases='uzuki, rin', + ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=4, ec_nparity=2, + ec_duplication_factor=2, + object_ring=FakeRing(replicas=12)), ] ec_policies = StoragePolicyCollection(good_test_policies_EC) @@ -460,6 +474,10 @@ class TestStoragePolicies(unittest.TestCase): self.assertEqual(ec_policies.get_by_name(name), ec_policies[0]) for name in ('ec10-4', 'athena', 'minerva'): self.assertEqual(ec_policies.get_by_name(name), ec_policies[1]) + for name in ('ec4-2', 'poseidon', 'neptune'): + self.assertEqual(ec_policies.get_by_name(name), ec_policies[2]) + for name in ('ec4-2-dup', 'uzuki', 'rin'): + self.assertEqual(ec_policies.get_by_name(name), ec_policies[3]) # Testing parsing of conf files/text good_ec_conf = self._conf(""" @@ -478,6 +496,14 @@ class TestStoragePolicies(unittest.TestCase): ec_type = %(ec_type)s ec_num_data_fragments = 10 ec_num_parity_fragments = 4 + [storage-policy:2] + name = ec4-2-dup + aliases = uzuki, rin + policy_type = erasure_coding + ec_type = %(ec_type)s + ec_num_data_fragments = 4 + ec_num_parity_fragments = 2 + ec_duplication_factor = 2 """ % {'ec_type': DEFAULT_TEST_EC_TYPE}) ec_policies = parse_storage_policies(good_ec_conf) @@ -485,6 +511,8 @@ class TestStoragePolicies(unittest.TestCase): ec_policies[0]) self.assertEqual(ec_policies.get_by_name('ec10-4'), ec_policies.get_by_name('poseidon')) + self.assertEqual(ec_policies.get_by_name('ec4-2-dup'), + ec_policies.get_by_name('uzuki')) name_repeat_ec_conf = self._conf(""" [storage-policy:0] @@ -1243,11 +1271,16 @@ class TestStoragePolicies(unittest.TestCase): ec_ndata=8, ec_nparity=2), ECStoragePolicy(11, 'df10-6', ec_type='flat_xor_hd_4', ec_ndata=10, ec_nparity=6), + ECStoragePolicy(12, 'ec4-2-dup', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=4, ec_nparity=2, ec_duplication_factor=2), ] for ec_policy in test_ec_policies: k = ec_policy.ec_ndata - expected_size = \ - k + ec_policy.pyeclib_driver.min_parity_fragments_needed() + expected_size = ( + (k + ec_policy.pyeclib_driver.min_parity_fragments_needed()) + * ec_policy.ec_duplication_factor + ) + self.assertEqual(expected_size, ec_policy.quorum) def test_validate_ring(self): @@ -1259,25 +1292,27 @@ class TestStoragePolicies(unittest.TestCase): ec_ndata=10, ec_nparity=4), ECStoragePolicy(2, 'ec4-2', ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=4, ec_nparity=2), + ECStoragePolicy(3, 'ec4-2-2dup', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=4, ec_nparity=2, + ec_duplication_factor=2) ] - actual_load_ring_replicas = [8, 10, 7] + actual_load_ring_replicas = [8, 10, 7, 11] policies = StoragePolicyCollection(test_policies) - def create_mock_ring_data(num_replica): - class mock_ring_data_klass(object): - def __init__(self): - self._replica2part2dev_id = [0] * num_replica - - return mock_ring_data_klass() + class MockRingData(object): + def __init__(self, num_replica): + self._replica2part2dev_id = [0] * num_replica for policy, ring_replicas in zip(policies, actual_load_ring_replicas): with mock.patch('swift.common.ring.ring.RingData.load', - return_value=create_mock_ring_data(ring_replicas)): + return_value=MockRingData(ring_replicas)): + necessary_replica_num = \ + policy.ec_n_unique_fragments * policy.ec_duplication_factor with mock.patch( 'swift.common.ring.ring.validate_configuration'): msg = 'EC ring for policy %s needs to be configured with ' \ 'exactly %d replicas.' % \ - (policy.name, policy.ec_ndata + policy.ec_nparity) + (policy.name, necessary_replica_num) self.assertRaisesWithMessage(RingLoadError, msg, policy.load_ring, 'mock') @@ -1332,6 +1367,7 @@ class TestStoragePolicies(unittest.TestCase): 'ec_num_data_fragments': 10, 'ec_num_parity_fragments': 3, 'ec_object_segment_size': DEFAULT_EC_OBJECT_SEGMENT_SIZE, + 'ec_duplication_factor': 1, }, (10, False): { 'name': 'ten', @@ -1348,12 +1384,30 @@ class TestStoragePolicies(unittest.TestCase): 'ec_num_data_fragments': 10, 'ec_num_parity_fragments': 3, 'ec_object_segment_size': DEFAULT_EC_OBJECT_SEGMENT_SIZE, + 'ec_duplication_factor': 1, }, (11, False): { 'name': 'done', 'aliases': 'done', 'deprecated': True, }, + # enabled ec with ec_duplication + (12, True): { + 'name': 'twelve', + 'aliases': 'twelve', + 'default': False, + 'deprecated': False, + 'policy_type': EC_POLICY, + 'ec_type': DEFAULT_TEST_EC_TYPE, + 'ec_num_data_fragments': 10, + 'ec_num_parity_fragments': 3, + 'ec_object_segment_size': DEFAULT_EC_OBJECT_SEGMENT_SIZE, + 'ec_duplication_factor': 2, + }, + (12, False): { + 'name': 'twelve', + 'aliases': 'twelve', + }, } self.maxDiff = None for policy in policies: diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index e2289801d8..a34d4f0604 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -2577,6 +2577,33 @@ cluster_dfw1 = http://dfw1.host/v1/ finally: utils.TRUE_VALUES = orig_trues + def test_config_positive_int_value(self): + expectations = { + # value : expected, + '1': 1, + 1: 1, + '2': 2, + '1024': 1024, + '0x01': ValueError, + 'asdf': ValueError, + None: ValueError, + 0: ValueError, + -1: ValueError, + '1.2': ValueError, # string expresses float should be value error + } + for value, expected in expectations.items(): + try: + rv = utils.config_positive_int_value(value) + except Exception as e: + if e.__class__ is not expected: + raise + else: + self.assertEqual( + 'Config option must be an positive int number, ' + 'not "%s".' % value, e.message) + else: + self.assertEqual(expected, rv) + def test_config_auto_int_value(self): expectations = { # (value, default) : expected, diff --git a/test/unit/helpers.py b/test/unit/helpers.py index 46f4b80b1e..5e50932d1e 100644 --- a/test/unit/helpers.py +++ b/test/unit/helpers.py @@ -84,7 +84,8 @@ def setup_servers(the_object_server=object_server, extra_conf=None): mkdirs(_testdir) rmtree(_testdir) for drive in ('sda1', 'sdb1', 'sdc1', 'sdd1', 'sde1', - 'sdf1', 'sdg1', 'sdh1', 'sdi1'): + 'sdf1', 'sdg1', 'sdh1', 'sdi1', 'sdj1', + 'sdk1', 'sdl1'): mkdirs(os.path.join(_testdir, drive, 'tmp')) conf = {'devices': _testdir, 'swift_dir': _testdir, 'mount_check': 'false', 'allowed_headers': @@ -100,9 +101,13 @@ def setup_servers(the_object_server=object_server, extra_conf=None): obj1lis = listen(('localhost', 0)) obj2lis = listen(('localhost', 0)) obj3lis = listen(('localhost', 0)) - objsocks = [obj1lis, obj2lis, obj3lis] + obj4lis = listen(('localhost', 0)) + obj5lis = listen(('localhost', 0)) + obj6lis = listen(('localhost', 0)) + objsocks = [obj1lis, obj2lis, obj3lis, obj4lis, obj5lis, obj6lis] context["test_sockets"] = \ - (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, obj3lis) + (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, obj3lis, + obj4lis, obj5lis, obj6lis) account_ring_path = os.path.join(_testdir, 'account.ring.gz') account_devs = [ {'port': acc1lis.getsockname()[1]}, @@ -120,7 +125,10 @@ def setup_servers(the_object_server=object_server, extra_conf=None): StoragePolicy(1, 'one', False), StoragePolicy(2, 'two', False), ECStoragePolicy(3, 'ec', ec_type=DEFAULT_TEST_EC_TYPE, - ec_ndata=2, ec_nparity=1, ec_segment_size=4096)]) + ec_ndata=2, ec_nparity=1, ec_segment_size=4096), + ECStoragePolicy(4, 'ec-dup', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=2, ec_nparity=1, ec_segment_size=4096, + ec_duplication_factor=2)]) obj_rings = { 0: ('sda1', 'sdb1'), 1: ('sdc1', 'sdd1'), @@ -136,22 +144,41 @@ def setup_servers(the_object_server=object_server, extra_conf=None): write_fake_ring(obj_ring_path, *obj_devs) # write_fake_ring can't handle a 3-element ring, and the EC policy needs - # at least 3 devs to work with, so we do it manually + # at least 6 devs to work with (ec_k=2, ec_m=1, duplication_fuctor=2), + # so we do it manually devs = [{'id': 0, 'zone': 0, 'device': 'sdg1', 'ip': '127.0.0.1', 'port': obj1lis.getsockname()[1]}, {'id': 1, 'zone': 0, 'device': 'sdh1', 'ip': '127.0.0.1', 'port': obj2lis.getsockname()[1]}, {'id': 2, 'zone': 0, 'device': 'sdi1', 'ip': '127.0.0.1', - 'port': obj3lis.getsockname()[1]}] + 'port': obj3lis.getsockname()[1]}, + {'id': 3, 'zone': 0, 'device': 'sdj1', 'ip': '127.0.0.1', + 'port': obj4lis.getsockname()[1]}, + {'id': 4, 'zone': 0, 'device': 'sdk1', 'ip': '127.0.0.1', + 'port': obj5lis.getsockname()[1]}, + {'id': 5, 'zone': 0, 'device': 'sdl1', 'ip': '127.0.0.1', + 'port': obj6lis.getsockname()[1]}] pol3_replica2part2dev_id = [[0, 1, 2, 0], [1, 2, 0, 1], [2, 0, 1, 2]] + pol4_replica2part2dev_id = [[0, 1, 2, 3], + [1, 2, 3, 4], + [2, 3, 4, 5], + [3, 4, 5, 0], + [4, 5, 0, 1], + [5, 0, 1, 2]] obj3_ring_path = os.path.join( _testdir, storage_policy.POLICIES[3].ring_name + '.ring.gz') part_shift = 30 with closing(GzipFile(obj3_ring_path, 'wb')) as fh: pickle.dump(RingData(pol3_replica2part2dev_id, devs, part_shift), fh) + obj4_ring_path = os.path.join( + _testdir, storage_policy.POLICIES[4].ring_name + '.ring.gz') + part_shift = 30 + with closing(GzipFile(obj4_ring_path, 'wb')) as fh: + pickle.dump(RingData(pol4_replica2part2dev_id, devs, part_shift), fh) + prosrv = proxy_server.Application(conf, logger=debug_logger('proxy')) for policy in storage_policy.POLICIES: # make sure all the rings are loaded @@ -172,8 +199,15 @@ def setup_servers(the_object_server=object_server, extra_conf=None): conf, logger=debug_logger('obj2')) obj3srv = the_object_server.ObjectController( conf, logger=debug_logger('obj3')) + obj4srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj4')) + obj5srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj5')) + obj6srv = the_object_server.ObjectController( + conf, logger=debug_logger('obj6')) context["test_servers"] = \ - (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv) + (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, obj2srv, obj3srv, + obj4srv, obj5srv, obj6srv) nl = NullLogger() logging_prosv = proxy_logging.ProxyLoggingMiddleware(prosrv, conf, logger=prosrv.logger) @@ -185,8 +219,12 @@ def setup_servers(the_object_server=object_server, extra_conf=None): obj1spa = spawn(wsgi.server, obj1lis, obj1srv, nl) obj2spa = spawn(wsgi.server, obj2lis, obj2srv, nl) obj3spa = spawn(wsgi.server, obj3lis, obj3srv, nl) + obj4spa = spawn(wsgi.server, obj4lis, obj4srv, nl) + obj5spa = spawn(wsgi.server, obj5lis, obj5srv, nl) + obj6spa = spawn(wsgi.server, obj6lis, obj6srv, nl) context["test_coros"] = \ - (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa, obj3spa) + (prospa, acc1spa, acc2spa, con1spa, con2spa, obj1spa, obj2spa, obj3spa, + obj4spa, obj5spa, obj6spa) # Create account ts = normalize_timestamp(time.time()) partition, nodes = prosrv.account_ring.get_nodes('a') diff --git a/test/unit/obj/test_reconstructor.py b/test/unit/obj/test_reconstructor.py index 2b6872ce4f..0ef46cf9cb 100644 --- a/test/unit/obj/test_reconstructor.py +++ b/test/unit/obj/test_reconstructor.py @@ -25,6 +25,7 @@ import shutil import re import random import struct +import collections from eventlet import Timeout, sleep from contextlib import closing, contextmanager @@ -72,7 +73,8 @@ def make_ec_archive_bodies(policy, test_body): # encode the buffers into fragment payloads fragment_payloads = [] for chunk in chunks: - fragments = policy.pyeclib_driver.encode(chunk) + fragments = \ + policy.pyeclib_driver.encode(chunk) * policy.ec_duplication_factor if not fragments: break fragment_payloads.append(fragments) @@ -664,13 +666,13 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): def test_get_response(self): part = self.part_nums[0] - node = POLICIES[0].object_ring.get_part_nodes(int(part))[0] + node = POLICIES[1].object_ring.get_part_nodes(int(part))[0] for stat_code in (200, 400): with mocked_http_conn(stat_code): resp = self.reconstructor._get_response(node, part, path='nada', headers={}, - policy=POLICIES[0]) + policy=POLICIES[1]) if resp: self.assertEqual(resp.status, 200) else: @@ -679,12 +681,12 @@ class TestGlobalSetupObjectReconstructor(unittest.TestCase): def test_reconstructor_does_not_log_on_404(self): part = self.part_nums[0] - node = POLICIES[0].object_ring.get_part_nodes(int(part))[0] + node = POLICIES[1].object_ring.get_part_nodes(int(part))[0] with mocked_http_conn(404): self.reconstructor._get_response(node, part, path='some_path', headers={}, - policy=POLICIES[0]) + policy=POLICIES[1]) # Make sure that no warnings are emitted for a 404 len_warning_lines = len(self.logger.get_lines_for_level('warning')) @@ -1160,6 +1162,7 @@ class TestObjectReconstructor(unittest.TestCase): self.policy.object_ring.max_more_nodes = \ self.policy.object_ring.replicas self.ts_iter = make_timestamp_iter() + self.fabricated_ring = FabricatedRing() def _configure_reconstructor(self, **kwargs): self.conf.update(kwargs) @@ -1716,7 +1719,7 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(job['device'], self.local_dev['device']) def test_build_jobs_primary(self): - ring = self.policy.object_ring = FabricatedRing() + ring = self.policy.object_ring = self.fabricated_ring # find a partition for which we're a primary for partition in range(2 ** ring.part_power): part_nodes = ring.get_part_nodes(partition) @@ -1761,7 +1764,7 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(job['device'], self.local_dev['device']) def test_build_jobs_handoff(self): - ring = self.policy.object_ring = FabricatedRing() + ring = self.policy.object_ring = self.fabricated_ring # find a partition for which we're a handoff for partition in range(2 ** ring.part_power): part_nodes = ring.get_part_nodes(partition) @@ -1780,7 +1783,8 @@ class TestObjectReconstructor(unittest.TestCase): } # since this part doesn't belong on us it doesn't matter what # frag_index we have - frag_index = random.randint(0, ring.replicas - 1) + frag_index = self.policy.get_backend_index( + random.randint(0, ring.replicas - 1)) stub_hashes = { '123': {frag_index: 'hash', None: 'hash'}, 'abc': {None: 'hash'}, @@ -1793,7 +1797,17 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(job['job_type'], object_reconstructor.REVERT) self.assertEqual(job['frag_index'], frag_index) self.assertEqual(sorted(job['suffixes']), sorted(stub_hashes.keys())) - self.assertEqual(len(job['sync_to']), 1) + self.assertEqual( + self.policy.ec_duplication_factor, len(job['sync_to'])) + # the sync_to node should be different each other + node_ids = set([node['id'] for node in job['sync_to']]) + self.assertEqual(len(node_ids), + self.policy.ec_duplication_factor) + # but all the nodes have same backend index to sync + node_indexes = set( + self.policy.get_backend_index(node['index']) + for node in job['sync_to']) + self.assertEqual(1, len(node_indexes)) self.assertEqual(job['sync_to'][0]['index'], frag_index) self.assertEqual(job['path'], part_path) self.assertEqual(job['partition'], partition) @@ -1801,12 +1815,12 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(job['local_dev'], self.local_dev) def test_build_jobs_mixed(self): - ring = self.policy.object_ring = FabricatedRing() + ring = self.policy.object_ring = self.fabricated_ring # find a partition for which we're a primary for partition in range(2 ** ring.part_power): part_nodes = ring.get_part_nodes(partition) try: - frag_index = [n['id'] for n in part_nodes].index( + node_index = [n['id'] for n in part_nodes].index( self.local_dev['id']) except ValueError: pass @@ -1823,8 +1837,10 @@ class TestObjectReconstructor(unittest.TestCase): 'partition': partition, 'part_path': part_path, } - other_frag_index = random.choice([f for f in range(ring.replicas) - if f != frag_index]) + frag_index = self.policy.get_backend_index(node_index) + other_frag_index = self.policy.get_backend_index( + random.choice( + [f for f in range(ring.replicas) if f != node_index])) stub_hashes = { '123': {frag_index: 'hash', None: 'hash'}, '456': {other_frag_index: 'hash', None: 'hash'}, @@ -1858,11 +1874,12 @@ class TestObjectReconstructor(unittest.TestCase): job = revert_jobs[0] self.assertEqual(job['frag_index'], other_frag_index) self.assertEqual(job['suffixes'], ['456']) - self.assertEqual(len(job['sync_to']), 1) + self.assertEqual(len(job['sync_to']), + self.policy.ec_duplication_factor) self.assertEqual(job['sync_to'][0]['index'], other_frag_index) def test_build_jobs_revert_only_tombstones(self): - ring = self.policy.object_ring = FabricatedRing() + ring = self.policy.object_ring = self.fabricated_ring # find a partition for which we're a handoff for partition in range(2 ** ring.part_power): part_nodes = ring.get_part_nodes(partition) @@ -1945,7 +1962,8 @@ class TestObjectReconstructor(unittest.TestCase): def test_process_job_primary_in_sync(self): replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [n for n in self.policy.object_ring.devs if n != self.local_dev][:2] # setup left and right hashes @@ -2003,7 +2021,8 @@ class TestObjectReconstructor(unittest.TestCase): def test_process_job_primary_not_in_sync(self): replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [n for n in self.policy.object_ring.devs if n != self.local_dev][:2] # setup left and right hashes @@ -2065,7 +2084,8 @@ class TestObjectReconstructor(unittest.TestCase): def test_process_job_sync_missing_durable(self): replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [n for n in self.policy.object_ring.devs if n != self.local_dev][:2] # setup left and right hashes @@ -2133,7 +2153,8 @@ class TestObjectReconstructor(unittest.TestCase): def test_process_job_primary_some_in_sync(self): replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [n for n in self.policy.object_ring.devs if n != self.local_dev][:2] # setup left and right hashes @@ -2200,9 +2221,9 @@ class TestObjectReconstructor(unittest.TestCase): self.fail('unexpected call %r' % call) def test_process_job_primary_down(self): - replicas = self.policy.object_ring.replicas partition = 0 - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) stub_hashes = { '123': {frag_index: 'hash', None: 'hash'}, 'abc': {frag_index: 'hash', None: 'hash'}, @@ -2269,9 +2290,9 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(expected_ssync_calls, found_ssync_calls) def test_process_job_suffix_call_errors(self): - replicas = self.policy.object_ring.replicas partition = 0 - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) stub_hashes = { '123': {frag_index: 'hash', None: 'hash'}, 'abc': {frag_index: 'hash', None: 'hash'}, @@ -2318,8 +2339,8 @@ class TestObjectReconstructor(unittest.TestCase): self.assertFalse(ssync_calls) def test_process_job_handoff(self): - replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [random.choice([n for n in self.policy.object_ring.devs if n != self.local_dev])] sync_to[0]['index'] = frag_index @@ -2364,8 +2385,8 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(set(call['suffixes']), set(['123', 'abc'])) def test_process_job_will_not_revert_to_handoff(self): - replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [random.choice([n for n in self.policy.object_ring.devs if n != self.local_dev])] sync_to[0]['index'] = frag_index @@ -2423,8 +2444,8 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(set(call['suffixes']), set(['123', 'abc'])) def test_process_job_revert_is_handoff_fails(self): - replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [random.choice([n for n in self.policy.object_ring.devs if n != self.local_dev])] sync_to[0]['index'] = frag_index @@ -2482,8 +2503,8 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(self.reconstructor.handoffs_remaining, 1) def test_process_job_revert_cleanup(self): - replicas = self.policy.object_ring.replicas - frag_index = random.randint(0, replicas - 1) + frag_index = random.randint( + 0, self.policy.ec_n_unique_fragments - 1) sync_to = [random.choice([n for n in self.policy.object_ring.devs if n != self.local_dev])] sync_to[0]['index'] = frag_index @@ -2616,7 +2637,6 @@ class TestObjectReconstructor(unittest.TestCase): test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] etag = md5(test_data).hexdigest() ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) - broken_body = ec_archive_bodies.pop(1) responses = list() @@ -2662,6 +2682,9 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual( [{'timestamp': '1234567890.12345', 'exclude': []}], json.loads(called_header['X-Backend-Fragment-Preferences'])) + # no error and warning + self.assertFalse(self.logger.get_lines_for_level('error')) + self.assertFalse(self.logger.get_lines_for_level('warning')) def test_reconstruct_fa_errors_works(self): job = { @@ -2705,6 +2728,57 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) + def test_reconstruct_fa_error_with_invalid_header(self): + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[4] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345', + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + etag = md5(test_data).hexdigest() + ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + + broken_body = ec_archive_bodies.pop(4) + + base_responses = list() + for body in ec_archive_bodies: + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + base_responses.append((200, body, headers)) + + responses = base_responses + # force the test to exercise the handling of this bad response by + # sticking it in near the front + error_index = random.randint(0, self.policy.ec_ndata - 1) + status, body, headers = responses[error_index] + # one esoteric failure is a literal string 'None' in place of the + # X-Object-Sysmeta-EC-Frag-Index + stub_node_job = {'some_keys': 'foo', 'but_not': 'frag_index'} + headers['X-Object-Sysmeta-Ec-Frag-Index'] = str( + stub_node_job.get('frag_index')) + # oops! + self.assertEqual('None', + headers.get('X-Object-Sysmeta-Ec-Frag-Index')) + responses[error_index] = status, body, headers + codes, body_iter, headers_iter = zip(*responses) + with mocked_http_conn(*codes, body_iter=body_iter, + headers=headers_iter): + df = self.reconstructor.reconstruct_fa( + job, node, dict(metadata)) + fixed_body = ''.join(df.reader()) + # ... this bad request should be treated like any other failure + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + def test_reconstruct_parity_fa_with_data_node_failure(self): job = { 'partition': 0, @@ -2724,7 +2798,6 @@ class TestObjectReconstructor(unittest.TestCase): test_data = ('rebuild' * self.policy.ec_segment_size)[:-454] etag = md5(test_data).hexdigest() ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) - # the scheme is 10+4, so this gets a parity node broken_body = ec_archive_bodies.pop(-4) @@ -2748,7 +2821,7 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) - def test_reconstruct_fa_errors_fails(self): + def test_reconstruct_fa_exceptions_fails(self): job = { 'partition': 0, 'policy': self.policy, @@ -2763,12 +2836,53 @@ class TestObjectReconstructor(unittest.TestCase): 'X-Timestamp': '1234567890.12345' } - possible_errors = [404, Timeout(), Exception('kaboom!')] + possible_errors = [Timeout(), Exception('kaboom!')] codes = [random.choice(possible_errors) for i in range(policy.object_ring.replicas - 1)] with mocked_http_conn(*codes): self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa, job, node, metadata) + error_lines = self.logger.get_lines_for_level('error') + # # of replicas failed and one more error log to report no enough + # responses to reconstruct. + self.assertEqual(policy.object_ring.replicas, len(error_lines)) + self.assertIn( + 'Unable to get enough responses (%s error responses)' + % (policy.object_ring.replicas - 1), + error_lines[-1], + "Unexpected error line found: %s" % error_lines[-1]) + # no warning + self.assertFalse(self.logger.get_lines_for_level('warning')) + + def test_reconstruct_fa_all_404s_fails(self): + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[1] + policy = self.policy + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345' + } + + codes = [404 for i in range(policy.object_ring.replicas - 1)] + with mocked_http_conn(*codes): + self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa, + job, node, metadata) + error_lines = self.logger.get_lines_for_level('error') + # only 1 log to report no enough responses + self.assertEqual(1, len(error_lines)) + self.assertIn( + 'Unable to get enough responses (%s error responses)' + % (policy.object_ring.replicas - 1), + error_lines[0], + "Unexpected error line found: %s" % error_lines[0]) + # no warning + self.assertFalse(self.logger.get_lines_for_level('warning')) def test_reconstruct_fa_with_mixed_old_etag(self): job = { @@ -2788,13 +2902,14 @@ class TestObjectReconstructor(unittest.TestCase): etag = md5(test_data).hexdigest() ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + # bad response broken_body = ec_archive_bodies.pop(1) ts = (utils.Timestamp(t) for t in itertools.count(int(time.time()))) - # bad response - bad_headers = { + bad_headers = get_header_frag_index(self, broken_body) + bad_headers.update({ 'X-Object-Sysmeta-Ec-Etag': 'some garbage', 'X-Backend-Timestamp': next(ts).internal, - } + }) # good responses responses = list() @@ -2821,6 +2936,10 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) + # no error and warning + self.assertFalse(self.logger.get_lines_for_level('error')) + self.assertFalse(self.logger.get_lines_for_level('warning')) + def test_reconstruct_fa_with_mixed_new_etag(self): job = { 'partition': 0, @@ -2861,17 +2980,164 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) - # one newer etag can spoil the bunch - new_index = random.randint(0, len(responses) - self.policy.ec_nparity) + # one newer etag won't spoil the bunch + new_index = random.randint(0, self.policy.ec_ndata - 1) new_headers = get_header_frag_index(self, (responses[new_index])[1]) new_headers.update({'X-Object-Sysmeta-Ec-Etag': 'some garbage', 'X-Backend-Timestamp': next(ts).internal}) new_response = (200, '', new_headers) responses[new_index] = new_response + codes, body_iter, headers = zip(*responses) + with mocked_http_conn(*codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, dict(metadata)) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + + # no error and warning + self.assertFalse(self.logger.get_lines_for_level('error')) + self.assertFalse(self.logger.get_lines_for_level('warning')) + + def test_reconstruct_fa_with_mixed_etag_with_same_timestamp(self): + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[1] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345' + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + etag = md5(test_data).hexdigest() + ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + + broken_body = ec_archive_bodies.pop(1) + + # good responses + responses = list() + for body in ec_archive_bodies: + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + responses.append((200, body, headers)) + + # sanity check before negative test + codes, body_iter, headers = zip(*responses) + with mocked_http_conn(*codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, dict(metadata)) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + + # one newer timestamp but same etag won't spoil the bunch + # N.B. (FIXIME). we choose the first response as garbage, the + # reconstruction fails because all other *correct* frags will be + # assumed as garbage. To avoid the freaky failing set randint + # as [1, self.policy.ec_ndata - 1] to make the first response + # being the correct fragment to reconstruct + new_index = random.randint(1, self.policy.ec_ndata - 1) + new_headers = get_header_frag_index(self, (responses[new_index])[1]) + new_headers.update({'X-Object-Sysmeta-Ec-Etag': 'some garbage'}) + new_response = (200, '', new_headers) + responses[new_index] = new_response + codes, body_iter, headers = zip(*responses) + with mocked_http_conn(*codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, dict(metadata)) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + + # no error and warning + error_log_lines = self.logger.get_lines_for_level('error') + self.assertEqual(1, len(error_log_lines)) + self.assertIn( + 'Mixed Etag (some garbage, %s) for 10.0.0.1:1001/sdb/0/a/c/o ' + 'policy#%s frag#1' % (etag, int(self.policy)), + error_log_lines[0]) + self.assertFalse(self.logger.get_lines_for_level('warning')) + + def test_reconstruct_fa_with_mixed_not_enough_etags_fail(self): + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[1] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345' + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + ec_archive_dict = dict() + ts = (utils.Timestamp(t) for t in itertools.count(int(time.time()))) + # create 3 different ec bodies + for i in range(3): + body = test_data[i:] + archive_bodies = make_ec_archive_bodies(self.policy, body) + # pop the index to the destination node + archive_bodies.pop(1) + ec_archive_dict[ + (md5(body).hexdigest(), next(ts).internal)] = archive_bodies + + responses = list() + # fill out response list by 3 different etag bodies + for etag, ts in itertools.cycle(ec_archive_dict): + body = ec_archive_dict[(etag, ts)].pop(0) + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag, + 'X-Backend-Timestamp': ts}) + responses.append((200, body, headers)) + if len(responses) >= (self.policy.object_ring.replicas - 1): + break + + # sanity, there is 3 different etag and each etag + # doesn't have > ec_k bodies + etag_count = collections.Counter( + [in_resp_headers['X-Object-Sysmeta-Ec-Etag'] + for _, _, in_resp_headers in responses]) + self.assertEqual(3, len(etag_count)) + for etag, count in etag_count.items(): + self.assertLess(count, self.policy.ec_ndata) + codes, body_iter, headers = zip(*responses) with mocked_http_conn(*codes, body_iter=body_iter, headers=headers): self.assertRaises(DiskFileError, self.reconstructor.reconstruct_fa, - job, node, dict(metadata)) + job, node, metadata) + + error_lines = self.logger.get_lines_for_level('error') + # only 1 log to report no enough responses + self.assertEqual(3, len(error_lines)) + for error_line in error_lines: + for expected_etag, ts in ec_archive_dict: + if expected_etag in error_line: + break + else: + self.fail( + "no expected etag %s found: %s" % + (list(ec_archive_dict), error_line)) + + expected = 'Unable to get enough responses (%s/10) to ' \ + 'reconstruct 10.0.0.1:1001/sdb/0/a/c/o policy#0 ' \ + 'frag#1 with ETag' % etag_count[expected_etag] + self.assertIn( + expected, error_line, + "Unexpected error line found: Expected: %s Got: %s" + % (expected, error_line)) + # no warning + self.assertFalse(self.logger.get_lines_for_level('warning')) def test_reconstruct_fa_finds_itself_does_not_fail(self): job = { @@ -2897,12 +3163,9 @@ class TestObjectReconstructor(unittest.TestCase): ec_archive_bodies = ec_archive_bodies[:-1] def make_header(body): - metadata = self.policy.pyeclib_driver.get_metadata(body) - frag_index = struct.unpack('h', metadata[:2])[0] - return { - 'X-Object-Sysmeta-Ec-Frag-Index': frag_index, - 'X-Object-Sysmeta-Ec-Etag': etag, - } + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + return headers responses = [(200, body, make_header(body)) for body in ec_archive_bodies] @@ -2915,6 +3178,31 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) + # no error and warning + self.assertFalse(self.logger.get_lines_for_level('error')) + self.assertFalse(self.logger.get_lines_for_level('warning')) + # the found own frag will be reported in the debug message + debug_log_lines = self.logger.get_lines_for_level('debug') + self.assertEqual(2, len(debug_log_lines)) + self.assertIn( + 'Found existing frag #1 while rebuilding #1 from', + debug_log_lines[0]) + # ... and then, it should be skipped in the responses + + # N.B. in the future, we could avoid those check because + # definately sending the copy rather than reconstruct will + # save resources. But one more reason, we're avoiding to + # use the dest index fragment even if it goes to reconstruct + # function is that it will cause a bunch of warning log from + # liberasurecode[1]. + # 1: https://github.com/openstack/liberasurecode/blob/ + # master/src/erasurecode.c#L870 + expected_prefix = 'Reconstruct frag #1 with frag indexes' + self.assertIn(expected_prefix, debug_log_lines[1]) + got_frag_index_list = json.loads( + debug_log_lines[1][len(expected_prefix):]) + self.assertNotIn(1, got_frag_index_list) + def test_reconstruct_fa_finds_duplicate_does_not_fail(self): job = { 'partition': 0, @@ -2940,12 +3228,9 @@ class TestObjectReconstructor(unittest.TestCase): ec_archive_bodies)[:-num_duplicates] def make_header(body): - metadata = self.policy.pyeclib_driver.get_metadata(body) - frag_index = struct.unpack('h', metadata[:2])[0] - return { - 'X-Object-Sysmeta-Ec-Frag-Index': frag_index, - 'X-Object-Sysmeta-Ec-Etag': etag, - } + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + return headers responses = [(200, body, make_header(body)) for body in ec_archive_bodies] @@ -2958,6 +3243,224 @@ class TestObjectReconstructor(unittest.TestCase): self.assertEqual(md5(fixed_body).hexdigest(), md5(broken_body).hexdigest()) + # no error and warning + self.assertFalse(self.logger.get_lines_for_level('error')) + self.assertFalse(self.logger.get_lines_for_level('warning')) + debug_log_lines = self.logger.get_lines_for_level('debug') + self.assertEqual(1, len(debug_log_lines)) + expected_prefix = 'Reconstruct frag #1 with frag indexes' + self.assertIn(expected_prefix, debug_log_lines[0]) + got_frag_index_list = json.loads( + debug_log_lines[0][len(expected_prefix):]) + self.assertNotIn(1, got_frag_index_list) + + def test_reconstruct_fa_missing_headers(self): + # This is much negative tests asserting when the expected + # headers are missing in the responses to gather fragments + # to reconstruct + + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[1] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345' + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + etag = md5(test_data).hexdigest() + ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + + broken_body = ec_archive_bodies.pop(1) + + def make_header(body): + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + return headers + + def test_missing_header(missing_header, expected_warning): + self.logger._clear() + responses = [(200, body, make_header(body)) + for body in ec_archive_bodies] + + responses[0][2].update({missing_header: None}) + + codes, body_iter, headers = zip(*responses) + with mocked_http_conn( + *codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, metadata) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + + # no errorg + self.assertFalse(self.logger.get_lines_for_level('error')) + # ...but warning for the missing header + warning_log_lines = self.logger.get_lines_for_level('warning') + self.assertEqual(1, len(warning_log_lines)) + self.assertIn(expected_warning, warning_log_lines) + + message_base = \ + "Invalid resp from 10.0.0.0:1000/sda/0/a/c/o policy#0 frag#0" + + test_missing_header( + 'X-Object-Sysmeta-Ec-Etag', + "%s %s" % (message_base, "(missing Etag)")) + test_missing_header( + 'X-Object-Sysmeta-Ec-Frag-Index', + "%s %s" % (message_base, + "(invalid X-Object-Sysmeta-Ec-Frag-Index)")) + test_missing_header( + 'X-Backend-Timestamp', + "%s %s" % (message_base, "(missing X-Backend-Timestamp)")) + + def test_reconstruct_fa_invalid_frag_index_headers(self): + # This is much negative tests asserting when the expected + # ec frag index header has invalid value in the responses + # to gather fragments to reconstruct + + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[1] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345' + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + etag = md5(test_data).hexdigest() + ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + + broken_body = ec_archive_bodies.pop(1) + + def make_header(body): + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + return headers + + def test_invalid_ec_frag_index_header(invalid_frag_index): + self.logger._clear() + responses = [(200, body, make_header(body)) + for body in ec_archive_bodies] + + responses[0][2].update({ + 'X-Object-Sysmeta-Ec-Frag-Index': invalid_frag_index}) + + codes, body_iter, headers = zip(*responses) + with mocked_http_conn( + *codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, metadata) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + + # no errorg + self.assertFalse(self.logger.get_lines_for_level('error')) + # ...but warning for the invalid header + warning_log_lines = self.logger.get_lines_for_level('warning') + self.assertEqual(1, len(warning_log_lines)) + expected_message = \ + "Invalid resp from 10.0.0.0:1000/sda/0/a/c/o " \ + "policy#0 frag#0 (invalid X-Object-Sysmeta-Ec-Frag-Index)" + self.assertIn(expected_message, warning_log_lines) + + for value in ('None', 'invalid'): + test_invalid_ec_frag_index_header(value) + + +@patch_policies([ECStoragePolicy(0, name='ec', is_default=True, + ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=10, ec_nparity=4, + ec_segment_size=4096, + ec_duplication_factor=2)], + fake_ring_args=[{'replicas': 28}]) +class TestObjectReconstructorECDuplicationFactor(TestObjectReconstructor): + def setUp(self): + super(TestObjectReconstructorECDuplicationFactor, self).setUp() + self.fabricated_ring = FabricatedRing(replicas=28, devices=56) + + def _test_reconstruct_with_duplicate_frags_no_errors(self, index): + job = { + 'partition': 0, + 'policy': self.policy, + } + part_nodes = self.policy.object_ring.get_part_nodes(0) + node = part_nodes[index] + metadata = { + 'name': '/a/c/o', + 'Content-Length': 0, + 'ETag': 'etag', + 'X-Timestamp': '1234567890.12345', + } + + test_data = ('rebuild' * self.policy.ec_segment_size)[:-777] + etag = md5(test_data).hexdigest() + ec_archive_bodies = make_ec_archive_bodies(self.policy, test_data) + + broken_body = ec_archive_bodies.pop(index) + + responses = list() + for body in ec_archive_bodies: + headers = get_header_frag_index(self, body) + headers.update({'X-Object-Sysmeta-Ec-Etag': etag}) + responses.append((200, body, headers)) + + # make a hook point at + # swift.obj.reconstructor.ObjectReconstructor._get_response + called_headers = [] + orig_func = object_reconstructor.ObjectReconstructor._get_response + + def _get_response_hook(self, node, part, path, headers, policy): + called_headers.append(headers) + return orig_func(self, node, part, path, headers, policy) + + # need to m + 1 node failures to reach 2nd set of duplicated fragments + failed_start_at = ( + self.policy.ec_n_unique_fragments - self.policy.ec_nparity - 1) + + # set Timeout for node #10, #11, #12, #13, #14 + for i in range(self.policy.ec_nparity + 1): + responses[failed_start_at + i] = (Timeout(), '', '') + + codes, body_iter, headers = zip(*responses) + get_response_path = \ + 'swift.obj.reconstructor.ObjectReconstructor._get_response' + with mock.patch(get_response_path, _get_response_hook): + with mocked_http_conn( + *codes, body_iter=body_iter, headers=headers): + df = self.reconstructor.reconstruct_fa( + job, node, metadata) + fixed_body = ''.join(df.reader()) + self.assertEqual(len(fixed_body), len(broken_body)) + self.assertEqual(md5(fixed_body).hexdigest(), + md5(broken_body).hexdigest()) + for called_header in called_headers: + called_header = HeaderKeyDict(called_header) + self.assertTrue('Content-Length' in called_header) + self.assertEqual(called_header['Content-Length'], '0') + self.assertTrue('User-Agent' in called_header) + user_agent = called_header['User-Agent'] + self.assertTrue(user_agent.startswith('obj-reconstructor')) + + def test_reconstruct_with_duplicate_frags_no_errors(self): + # any fragments can be broken + for index in range(28): + self._test_reconstruct_with_duplicate_frags_no_errors(index) + if __name__ == '__main__': unittest.main() diff --git a/test/unit/proxy/controllers/test_obj.py b/test/unit/proxy/controllers/test_obj.py index 4ceccb61b0..59545ae65e 100644 --- a/test/unit/proxy/controllers/test_obj.py +++ b/test/unit/proxy/controllers/test_obj.py @@ -33,18 +33,18 @@ from six.moves import range import swift from swift.common import utils, swob, exceptions from swift.common.exceptions import ChunkWriteTimeout -from swift.common.header_key_dict import HeaderKeyDict from swift.common.utils import Timestamp -from swift.obj import server from swift.proxy import server as proxy_server from swift.proxy.controllers import obj from swift.proxy.controllers.base import \ get_container_info as _real_get_container_info -from swift.common.storage_policy import POLICIES, ECDriverError, StoragePolicy +from swift.common.storage_policy import POLICIES, ECDriverError, \ + StoragePolicy, ECStoragePolicy from test.unit import FakeRing, FakeMemcache, fake_http_connect, \ debug_logger, patch_policies, SlowBody, FakeStatus, \ - encode_frag_archive_bodies + DEFAULT_TEST_EC_TYPE, encode_frag_archive_bodies, make_ec_object_stub, \ + fake_ec_node_response, StubResponse from test.unit.proxy.test_server import node_error_count @@ -199,7 +199,7 @@ class BaseObjectControllerMixin(object): controller = self.controller_cls( self.app, 'a', 'c', 'o') self.app.write_affinity_is_local_fn = None - object_ring = self.app.get_object_ring(None) + object_ring = self.policy.object_ring all_nodes = object_ring.get_part_nodes(1) all_nodes.extend(object_ring.get_more_nodes(1)) @@ -218,7 +218,7 @@ class BaseObjectControllerMixin(object): # we'll write to one more than replica count local nodes self.app.write_affinity_node_count = lambda r: r + 1 - object_ring = self.app.get_object_ring(None) + object_ring = self.policy.object_ring # make our fake ring have plenty of nodes, and not get limited # artificially by the proxy max request node count object_ring.max_more_nodes = 100000 @@ -255,7 +255,7 @@ class BaseObjectControllerMixin(object): self.app.write_affinity_is_local_fn = ( lambda node: node['region'] == 1) - object_ring = self.app.get_object_ring(None) + object_ring = self.policy.object_ring all_nodes = object_ring.get_part_nodes(1) all_nodes.extend(object_ring.get_more_nodes(1)) @@ -1373,30 +1373,6 @@ class TestReplicatedObjControllerVariousReplicas(BaseObjectControllerMixin, controller_cls = obj.ReplicatedObjectController -class StubResponse(object): - - def __init__(self, status, body='', headers=None, frag_index=None): - self.status = status - self.body = body - self.readable = BytesIO(body) - self.headers = HeaderKeyDict(headers) - if frag_index is not None: - self.headers['X-Object-Sysmeta-Ec-Frag-Index'] = frag_index - fake_reason = ('Fake', 'This response is a lie.') - self.reason = swob.RESPONSE_REASONS.get(status, fake_reason)[0] - - def getheader(self, header_name, default=None): - return self.headers.get(header_name, default) - - def getheaders(self): - if 'Content-Length' not in self.headers: - self.headers['Content-Length'] = len(self.body) - return self.headers.items() - - def read(self, amt=0): - return self.readable.read(amt) - - @contextmanager def capture_http_requests(get_response): @@ -1473,37 +1449,38 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): # create a dummy list of putters, check no handoffs putters = [] - for index in range(0, 4): + for index in range(self.policy.object_ring.replica_count): putters.append(FakePutter(index)) - got = controller._determine_chunk_destinations(putters) + got = controller._determine_chunk_destinations(putters, self.policy) expected = {} for i, p in enumerate(putters): expected[p] = i self.assertEqual(got, expected) # now lets make a handoff at the end - putters[3].node_index = None - got = controller._determine_chunk_destinations(putters) + orig_index = putters[-1].node_index = None + putters[-1].node_index = None + got = controller._determine_chunk_destinations(putters, self.policy) self.assertEqual(got, expected) - putters[3].node_index = 3 + putters[-1].node_index = orig_index # now lets make a handoff at the start putters[0].node_index = None - got = controller._determine_chunk_destinations(putters) + got = controller._determine_chunk_destinations(putters, self.policy) self.assertEqual(got, expected) putters[0].node_index = 0 # now lets make a handoff in the middle putters[2].node_index = None - got = controller._determine_chunk_destinations(putters) + got = controller._determine_chunk_destinations(putters, self.policy) self.assertEqual(got, expected) - putters[2].node_index = 0 + putters[2].node_index = 2 # now lets make all of them handoffs - for index in range(0, 4): + for index in range(self.policy.object_ring.replica_count): putters[index].node_index = None - got = controller._determine_chunk_destinations(putters) - self.assertEqual(got, expected) + got = controller._determine_chunk_destinations(putters, self.policy) + self.assertEqual(sorted(got), sorted(expected)) def test_GET_simple(self): req = swift.common.swob.Request.blank('/v1/a/c/o') @@ -1643,7 +1620,8 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): fragments = self.policy.pyeclib_driver.encode(chunk) if not fragments: break - fragment_payloads.append(fragments) + fragment_payloads.append( + fragments * self.policy.ec_duplication_factor) # sanity sanity_body = '' for fragment_payload in fragment_payloads: @@ -1802,6 +1780,8 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): req = swift.common.swob.Request.blank('/v1/a/c/o', method='PUT', body='') codes = [(100, 503, Exception('not used'))] * self.quorum() + if isinstance(self.policy, ECStoragePolicy): + codes *= self.policy.ec_duplication_factor codes += [201] * (self.replicas() - len(codes)) random.shuffle(codes) expect_headers = { @@ -2211,123 +2191,10 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): def _make_ec_object_stub(self, test_body=None, policy=None, timestamp=None): policy = policy or self.policy - segment_size = policy.ec_segment_size - test_body = test_body or ( - 'test' * segment_size)[:-random.randint(1, 1000)] - timestamp = timestamp or Timestamp(time.time()) - etag = md5(test_body).hexdigest() - ec_archive_bodies = self._make_ec_archive_bodies(test_body, - policy=policy) - return { - 'body': test_body, - 'etag': etag, - 'frags': ec_archive_bodies, - 'timestamp': timestamp - } + return make_ec_object_stub(test_body, policy, timestamp) def _fake_ec_node_response(self, node_frags): - """ - Given a list of entries for each node in ring order, where the entries - are a dict (or list of dicts) which describes the fragment (or - fragments) that are on the node; create a function suitable for use - with capture_http_requests that will accept a req object and return a - response that will suitably fake the behavior of an object server who - had the given fragments on disk at the time. - - :param node_frags: a list. Each item in the list describes the - fragments that are on a node; each item is a dict or list of dicts, - each dict describing a single fragment; where the item is a list, - repeated calls to get_response will return fragments in the order - of the list; each dict has keys: - - obj: an object stub, as generated by _make_ec_object_stub, - that defines all of the fragments that compose an object - at a specific timestamp. - - frag: the index of a fragment to be selected from the object - stub - - durable (optional): True if the selected fragment is durable - - """ - node_map = {} # maps node ip and port to node index - all_nodes = [] - call_count = {} # maps node index to get_response call count for node - - def _build_node_map(req): - node_key = lambda n: (n['ip'], n['port']) - part = utils.split_path(req['path'], 5, 5, True)[1] - policy = POLICIES[int( - req['headers']['X-Backend-Storage-Policy-Index'])] - all_nodes.extend(policy.object_ring.get_part_nodes(part)) - all_nodes.extend(policy.object_ring.get_more_nodes(part)) - for i, node in enumerate(all_nodes): - node_map[node_key(node)] = i - call_count[i] = 0 - - # normalize node_frags to a list of fragments for each node even - # if there's only one fragment in the dataset provided. - for i, frags in enumerate(node_frags): - if isinstance(frags, dict): - node_frags[i] = [frags] - - def get_response(req): - if not node_map: - _build_node_map(req) - - try: - node_index = node_map[(req['ip'], req['port'])] - except KeyError: - raise Exception("Couldn't find node %s:%s in %r" % ( - req['ip'], req['port'], all_nodes)) - try: - frags = node_frags[node_index] - except IndexError: - raise Exception('Found node %r:%r at index %s - ' - 'but only got %s stub response nodes' % ( - req['ip'], req['port'], node_index, - len(node_frags))) - - if not frags: - return StubResponse(404) - - # determine response fragment (if any) for this call - resp_frag = frags[call_count[node_index]] - call_count[node_index] += 1 - frag_prefs = req['headers'].get('X-Backend-Fragment-Preferences') - if not (frag_prefs or resp_frag.get('durable', True)): - return StubResponse(404) - - # prepare durable timestamp and backend frags header for this node - obj_stub = resp_frag['obj'] - ts2frags = defaultdict(list) - durable_timestamp = None - for frag in frags: - ts_frag = frag['obj']['timestamp'] - if frag.get('durable', True): - durable_timestamp = ts_frag.internal - ts2frags[ts_frag].append(frag['frag']) - - try: - body = obj_stub['frags'][resp_frag['frag']] - except IndexError as err: - raise Exception( - 'Frag index %s not defined: node index %s, frags %r\n%s' % - (resp_frag['frag'], node_index, [f['frag'] for f in frags], - err)) - headers = { - 'X-Object-Sysmeta-Ec-Content-Length': len(obj_stub['body']), - 'X-Object-Sysmeta-Ec-Etag': obj_stub['etag'], - 'X-Object-Sysmeta-Ec-Frag-Index': resp_frag['frag'], - 'X-Backend-Timestamp': obj_stub['timestamp'].internal, - 'X-Timestamp': obj_stub['timestamp'].normal, - 'X-Backend-Data-Timestamp': obj_stub['timestamp'].internal, - 'X-Backend-Fragments': - server._make_backend_fragments_header(ts2frags) - } - if durable_timestamp: - headers['X-Backend-Durable-Timestamp'] = durable_timestamp - - return StubResponse(200, body, headers) - - return get_response + return fake_ec_node_response(node_frags, self.policy) def test_GET_with_frags_swapped_around(self): segment_size = self.policy.ec_segment_size @@ -2338,21 +2205,25 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): _part, primary_nodes = self.obj_ring.get_nodes('a', 'c', 'o') node_key = lambda n: (n['ip'], n['port']) - + backend_index = lambda index: self.policy.get_backend_index(index) ts = self._ts_iter.next() + response_map = { - node_key(n): StubResponse(200, ec_archive_bodies[i], { - 'X-Object-Sysmeta-Ec-Content-Length': len(test_data), - 'X-Object-Sysmeta-Ec-Etag': etag, - 'X-Object-Sysmeta-Ec-Frag-Index': i, - 'X-Timestamp': ts.normal, - 'X-Backend-Timestamp': ts.internal - }) for i, n in enumerate(primary_nodes) + node_key(n): StubResponse( + 200, ec_archive_bodies[backend_index(i)], { + 'X-Object-Sysmeta-Ec-Content-Length': len(test_data), + 'X-Object-Sysmeta-Ec-Etag': etag, + 'X-Object-Sysmeta-Ec-Frag-Index': backend_index(i), + 'X-Timestamp': ts.normal, + 'X-Backend-Timestamp': ts.internal + }) for i, n in enumerate(primary_nodes) } # swap a parity response into a data node data_node = random.choice(primary_nodes[:self.policy.ec_ndata]) - parity_node = random.choice(primary_nodes[self.policy.ec_ndata:]) + parity_node = random.choice( + primary_nodes[ + self.policy.ec_ndata:self.policy.ec_n_unique_fragments]) (response_map[node_key(data_node)], response_map[node_key(parity_node)]) = \ (response_map[node_key(parity_node)], @@ -2386,7 +2257,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): def test_GET_with_only_handoffs(self): obj1 = self._make_ec_object_stub() - node_frags = [[]] * 14 # all primaries missing + node_frags = [[]] * self.replicas() # all primaries missing node_frags = node_frags + [ # handoffs {'obj': obj1, 'frag': 0}, {'obj': obj1, 'frag': 1}, @@ -2423,7 +2294,8 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): # GETS would be required to all primaries and then ndata handoffs self.assertEqual(len(log), self.replicas() + self.policy.ec_ndata) self.assertEqual(2, len(collected_responses)) - self.assertEqual(14, len(collected_responses[None])) # 404s + # 404s + self.assertEqual(self.replicas(), len(collected_responses[None])) self.assertEqual(self.policy.ec_ndata, len(collected_responses[obj1['etag']])) @@ -2783,7 +2655,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): obj1 = self._make_ec_object_stub() obj2 = self._make_ec_object_stub() # we get a 503 when all the handoffs return 200 - node_frags = [[]] * 14 # primaries have no frags + node_frags = [[]] * self.replicas() # primaries have no frags node_frags = node_frags + [ # handoffs all have frags {'obj': obj1, 'frag': 0}, {'obj': obj2, 'frag': 0}, @@ -3133,6 +3005,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): # non-durable frag of obj2. The proxy will need to do at least 10 # GETs to see all the obj3 frags plus 1 more to GET a durable frag. # The proxy may also do one more GET if the obj2 frag is found. + # i.e. 10 + 1 durable for obj3, 2 for obj1 and 1 more if obj2 found obj2 = self._make_ec_object_stub(timestamp=self._ts_iter.next()) obj3 = self._make_ec_object_stub(timestamp=self._ts_iter.next()) obj1 = self._make_ec_object_stub(timestamp=self._ts_iter.next()) @@ -3167,7 +3040,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(resp.headers['etag'], obj3['etag']) self.assertEqual(md5(resp.body).hexdigest(), obj3['etag']) self.assertGreaterEqual(len(log), self.policy.ec_ndata + 1) - self.assertLessEqual(len(log), self.policy.ec_ndata + 3) + self.assertLessEqual(len(log), self.policy.ec_ndata + 4) def test_GET_with_missing_durables_and_older_non_durables(self): # scenario: non-durable frags of newer obj1 obscure all frags @@ -3582,9 +3455,10 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): ec_archive_bodies = self._make_ec_archive_bodies(test_data) headers = {'X-Object-Sysmeta-Ec-Etag': etag} self.app.recoverable_node_timeout = 0.01 - responses = [(200, SlowBody(body, 0.1), - self._add_frag_index(i, headers)) - for i, body in enumerate(ec_archive_bodies)] + responses = [ + (200, SlowBody(body, 0.1), self._add_frag_index(i, headers)) + for i, body in enumerate(ec_archive_bodies) + ] * self.policy.ec_duplication_factor req = swob.Request.blank('/v1/a/c/o') @@ -3697,7 +3571,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): codes = [201] * (self.policy.ec_ndata + 1) codes += [503] * (self.policy.ec_nparity - 1) - self.assertEqual(len(codes), self.replicas()) + self.assertEqual(len(codes), self.policy.ec_n_unique_fragments) random.shuffle(codes) expect_headers = { 'X-Obj-Metadata-Footer': 'yes', @@ -3713,7 +3587,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): codes = [201] * (self.policy.ec_ndata) codes += [503] * (self.policy.ec_nparity) - self.assertEqual(len(codes), self.replicas()) + self.assertEqual(len(codes), self.policy.ec_n_unique_fragments) random.shuffle(codes) expect_headers = { 'X-Obj-Metadata-Footer': 'yes', @@ -3744,7 +3618,7 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): 'Range': 'bytes=%s' % (req_range)}) fragments = self.policy.pyeclib_driver.encode(real_body) - fragment_payloads = [fragments] + fragment_payloads = [fragments * self.policy.ec_duplication_factor] node_fragments = zip(*fragment_payloads) self.assertEqual(len(node_fragments), self.replicas()) # sanity @@ -3778,5 +3652,710 @@ class TestECObjController(BaseObjectControllerMixin, unittest.TestCase): self.assertEqual(resp.headers['Accept-Ranges'], 'bytes') +class TestECFunctions(unittest.TestCase): + def test_chunk_transformer(self): + segment_size = 1024 + orig_chunk = 'a' * segment_size + policy = ECStoragePolicy(0, 'ec8-2', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=8, ec_nparity=2, + object_ring=FakeRing(replicas=10), + ec_segment_size=segment_size) + expected = policy.pyeclib_driver.encode(orig_chunk) + transform = obj.chunk_transformer( + policy, policy.object_ring.replica_count) + transform.send(None) + + backend_chunks = transform.send(orig_chunk) + self.assertNotEqual(None, backend_chunks) # sanity + self.assertEqual( + len(backend_chunks), policy.object_ring.replica_count) + self.assertEqual(expected, backend_chunks) + + def test_chunk_transformer_duplication_factor(self): + segment_size = 1024 + orig_chunk = 'a' * segment_size + policy = ECStoragePolicy(0, 'ec8-2', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=8, ec_nparity=2, + object_ring=FakeRing(replicas=20), + ec_segment_size=segment_size, + ec_duplication_factor=2) + expected = policy.pyeclib_driver.encode(orig_chunk) + transform = obj.chunk_transformer( + policy, policy.object_ring.replica_count) + transform.send(None) + + backend_chunks = transform.send(orig_chunk) + self.assertNotEqual(None, backend_chunks) # sanity + self.assertEqual( + len(backend_chunks), policy.object_ring.replica_count) + self.assertEqual(expected * 2, backend_chunks) + + # flush out last chunk buffer + backend_chunks = transform.send('') + self.assertEqual( + len(backend_chunks), policy.object_ring.replica_count) + self.assertEqual([''] * policy.object_ring.replica_count, + backend_chunks) + + def test_chunk_transformer_duplication_factor_non_aligned_last_chunk(self): + last_chunk = 'a' * 128 + policy = ECStoragePolicy(0, 'ec8-2', ec_type=DEFAULT_TEST_EC_TYPE, + ec_ndata=8, ec_nparity=2, + object_ring=FakeRing(replicas=20), + ec_segment_size=1024, + ec_duplication_factor=2) + expected = policy.pyeclib_driver.encode(last_chunk) + transform = obj.chunk_transformer( + policy, policy.object_ring.replica_count) + transform.send(None) + + transform.send(last_chunk) + # flush out last chunk buffer + backend_chunks = transform.send('') + + self.assertEqual( + len(backend_chunks), policy.object_ring.replica_count) + self.assertEqual(expected * 2, backend_chunks) + + +@patch_policies([ECStoragePolicy(0, name='ec', is_default=True, + ec_type=DEFAULT_TEST_EC_TYPE, ec_ndata=10, + ec_nparity=4, ec_segment_size=4096, + ec_duplication_factor=2), + StoragePolicy(1, name='unu')], + fake_ring_args=[{'replicas': 28}, {}]) +class TestECDuplicationObjController( + BaseObjectControllerMixin, unittest.TestCase): + container_info = { + 'status': 200, + 'read_acl': None, + 'write_acl': None, + 'sync_key': None, + 'versions': None, + 'storage_policy': '0', + } + + controller_cls = obj.ECObjectController + + def _make_ec_object_stub(self, test_body=None, policy=None, + timestamp=None): + policy = policy or self.policy + return make_ec_object_stub(test_body, policy, timestamp) + + def _make_ec_archive_bodies(self, test_body, policy=None): + policy = policy or self.policy + return encode_frag_archive_bodies(policy, test_body) + + def _fake_ec_node_response(self, node_frags): + return fake_ec_node_response(node_frags, self.policy) + + def _test_GET_with_duplication_factor(self, node_frags, obj): + # This is basic tests in the healthy backends status + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.headers['etag'], obj['etag']) + self.assertEqual(md5(resp.body).hexdigest(), obj['etag']) + + collected_responses = defaultdict(set) + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + index = conn.resp.headers['X-Object-Sysmeta-Ec-Frag-Index'] + collected_responses[etag].add(index) + + # the backend requests should be >= num_data_fragmetns + self.assertGreaterEqual(len(log), self.policy.ec_ndata) + # but <= # of replicas + self.assertLessEqual(len(log), self.replicas()) + self.assertEqual(len(collected_responses), 1) + + etag, frags = collected_responses.items()[0] + # the backend requests will stop at enough ec_ndata responses + self.assertEqual( + len(frags), self.policy.ec_ndata, + 'collected %s frags for etag %s' % (len(frags), etag)) + + # TODO: actually "frags" in node_frags is meaning "node_index" right now + # in following tests. Reconsidering the name and semantics change needed. + # Or, just mapping to be correct as frag_index is enough?. + def test_GET_with_duplication_factor(self): + obj = self._make_ec_object_stub() + node_frags = [ + {'obj': obj, 'frag': 0}, + {'obj': obj, 'frag': 1}, + {'obj': obj, 'frag': 2}, + {'obj': obj, 'frag': 3}, + {'obj': obj, 'frag': 4}, + {'obj': obj, 'frag': 5}, + {'obj': obj, 'frag': 6}, + {'obj': obj, 'frag': 7}, + {'obj': obj, 'frag': 8}, + {'obj': obj, 'frag': 9}, + {'obj': obj, 'frag': 10}, + {'obj': obj, 'frag': 11}, + {'obj': obj, 'frag': 12}, + {'obj': obj, 'frag': 13}, + ] * 2 # duplicated! + self._test_GET_with_duplication_factor(node_frags, obj) + + def test_GET_with_duplication_factor_almost_duplicate_dispersion(self): + obj = self._make_ec_object_stub() + node_frags = [ + # first half of # of replicas are 0, 1, 2, 3, 4, 5, 6 + {'obj': obj, 'frag': 0}, + {'obj': obj, 'frag': 0}, + {'obj': obj, 'frag': 1}, + {'obj': obj, 'frag': 1}, + {'obj': obj, 'frag': 2}, + {'obj': obj, 'frag': 2}, + {'obj': obj, 'frag': 3}, + {'obj': obj, 'frag': 3}, + {'obj': obj, 'frag': 4}, + {'obj': obj, 'frag': 4}, + {'obj': obj, 'frag': 5}, + {'obj': obj, 'frag': 5}, + {'obj': obj, 'frag': 6}, + {'obj': obj, 'frag': 6}, + {'obj': obj, 'frag': 7}, + {'obj': obj, 'frag': 7}, + # second half of # of replicas are 7, 8, 9, 10, 11, 12, 13 + {'obj': obj, 'frag': 8}, + {'obj': obj, 'frag': 8}, + {'obj': obj, 'frag': 9}, + {'obj': obj, 'frag': 9}, + {'obj': obj, 'frag': 10}, + {'obj': obj, 'frag': 10}, + {'obj': obj, 'frag': 11}, + {'obj': obj, 'frag': 11}, + {'obj': obj, 'frag': 12}, + {'obj': obj, 'frag': 12}, + {'obj': obj, 'frag': 13}, + {'obj': obj, 'frag': 13}, + ] + # ...but it still works! + self._test_GET_with_duplication_factor(node_frags, obj) + + def test_GET_with_missing_and_mixed_frags_will_dig_deep_but_stop(self): + obj1 = self._make_ec_object_stub() + obj2 = self._make_ec_object_stub() + + # both of obj1 and obj2 has only 9 frags which is not able to decode + node_frags = [ + {'obj': obj1, 'frag': 0}, + {'obj': obj2, 'frag': 0}, + {'obj': obj1, 'frag': 1}, + {'obj': obj2, 'frag': 1}, + {'obj': obj1, 'frag': 2}, + {'obj': obj2, 'frag': 2}, + {'obj': obj1, 'frag': 3}, + {'obj': obj2, 'frag': 3}, + {'obj': obj1, 'frag': 4}, + {'obj': obj2, 'frag': 4}, + {'obj': obj1, 'frag': 5}, + {'obj': obj2, 'frag': 5}, + {'obj': obj1, 'frag': 6}, + {'obj': obj2, 'frag': 6}, + {'obj': obj1, 'frag': 7}, + {'obj': obj2, 'frag': 7}, + {'obj': obj1, 'frag': 8}, + {'obj': obj2, 'frag': 8}, + ] + # ... and the rests are 404s which is limited by request_count + # (2 * replicas in default) rather than max_extra_count limitation + # because the retries will be in ResumingGetter if the responses + # are 404s + node_frags += [[]] * (self.replicas() * 2 - len(node_frags)) + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 404) + + collected_responses = defaultdict(set) + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + index = conn.resp.headers['X-Object-Sysmeta-Ec-Frag-Index'] + collected_responses[etag].add(index) + + # default node_iter will exhaust to the last of handoffs + self.assertEqual(len(log), self.replicas() * 2) + # we have obj1, obj2, and 404 NotFound in collected_responses + self.assertEqual(len(collected_responses), 3) + + # ... regardless we should never need to fetch more than ec_ndata + # frags for any given etag + for etag, frags in collected_responses.items(): + self.assertLessEqual(len(frags), self.policy.ec_ndata, + 'collected %s frags for etag %s' % ( + len(frags), etag)) + + def test_GET_with_duplicate_but_insufficient_frag(self): + obj1 = self._make_ec_object_stub() + # proxy should ignore duplicated frag indexes and continue search for + # a set of unique indexes, but fails to find one + node_frags = [ + {'obj': obj1, 'frag': 0}, + {'obj': obj1, 'frag': 0}, # duplicate frag + {'obj': obj1, 'frag': 1}, + {'obj': obj1, 'frag': 1}, # duplicate frag + {'obj': obj1, 'frag': 2}, + {'obj': obj1, 'frag': 2}, # duplicate frag + {'obj': obj1, 'frag': 3}, + {'obj': obj1, 'frag': 3}, # duplicate frag + {'obj': obj1, 'frag': 4}, + {'obj': obj1, 'frag': 4}, # duplicate frag + {'obj': obj1, 'frag': 10}, + {'obj': obj1, 'frag': 11}, + {'obj': obj1, 'frag': 12}, + {'obj': obj1, 'frag': 13}, + ] + + # ... and the rests are 404s which is limited by request_count + # (2 * replicas in default) rather than max_extra_count limitation + # because the retries will be in ResumingGetter if the responses + # are 404s + node_frags += [[]] * (self.replicas() * 2 - len(node_frags)) + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 404) + + # expect a request to all nodes + self.assertEqual(2 * self.replicas(), len(log)) + collected_indexes = defaultdict(list) + for conn in log: + fi = conn.resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') + if fi is not None: + collected_indexes[fi].append(conn) + self.assertEqual(len(collected_indexes), self.policy.ec_ndata - 1) + + def test_GET_with_many_missed_overwrite_will_need_handoff(self): + obj1 = self._make_ec_object_stub() + obj2 = self._make_ec_object_stub() + # primaries + node_frags = [ + {'obj': obj2, 'frag': 0}, + {'obj': obj2, 'frag': 1}, + {'obj': obj1, 'frag': 2}, # missed + {'obj': obj2, 'frag': 3}, + {'obj': obj2, 'frag': 4}, + {'obj': obj2, 'frag': 5}, + {'obj': obj1, 'frag': 6}, # missed + {'obj': obj2, 'frag': 7}, + {'obj': obj2, 'frag': 8}, + {'obj': obj1, 'frag': 9}, # missed + {'obj': obj1, 'frag': 10}, # missed + {'obj': obj1, 'frag': 11}, # missed + {'obj': obj2, 'frag': 12}, + {'obj': obj2, 'frag': 13}, + ] + + node_frags = node_frags * 2 # 2 duplication + + # so the primaries have indexes 0, 1, 3, 4, 5, 7, 8, 12, 13 + # (9 indexes) for obj2 and then a handoff has index 6 + node_frags += [ + {'obj': obj2, 'frag': 6}, # handoff + ] + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.headers['etag'], obj2['etag']) + self.assertEqual(md5(resp.body).hexdigest(), obj2['etag']) + + collected_responses = defaultdict(set) + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + index = conn.resp.headers['X-Object-Sysmeta-Ec-Frag-Index'] + collected_responses[etag].add(index) + + # there's not enough of the obj2 etag on the primaries, we would + # have collected responses for both etags, and would have made + # one more request to the handoff node + self.assertEqual(len(log), self.replicas() + 1) + self.assertEqual(len(collected_responses), 2) + + # ... regardless we should never need to fetch more than ec_ndata + # frags for any given etag + for etag, frags in collected_responses.items(): + self.assertTrue(len(frags) <= self.policy.ec_ndata, + 'collected %s frags for etag %s' % ( + len(frags), etag)) + + def test_GET_with_duplicate_but_sufficient_frag_indexes(self): + obj1 = self._make_ec_object_stub() + # proxy should ignore duplicated frag indexes and continue search for + # a set of unique indexes, finding last one on a handoff + node_frags = [ + {'obj': obj1, 'frag': 0}, + {'obj': obj1, 'frag': 0}, # duplicate frag + {'obj': obj1, 'frag': 1}, + {'obj': obj1, 'frag': 1}, # duplicate frag + {'obj': obj1, 'frag': 2}, + {'obj': obj1, 'frag': 2}, # duplicate frag + {'obj': obj1, 'frag': 3}, + {'obj': obj1, 'frag': 3}, # duplicate frag + {'obj': obj1, 'frag': 4}, + {'obj': obj1, 'frag': 4}, # duplicate frag + {'obj': obj1, 'frag': 10}, + {'obj': obj1, 'frag': 11}, + {'obj': obj1, 'frag': 12}, + {'obj': obj1, 'frag': 13}, + ] + + # proxy will access randomly to a node in the second set + # so to ensure the GET fragment meets what it needs. + node_frags += [{'obj': obj1, 'frag': 5}] + # rests are 404s + node_frags += [[]] * (self.replicas() - len(node_frags)) + + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.headers['etag'], obj1['etag']) + self.assertEqual(md5(resp.body).hexdigest(), obj1['etag']) + + # expect a request to all primaries plus one handoff + self.assertGreaterEqual( + len(log), self.policy.ec_n_unique_fragments + 1) + self.assertLessEqual(len(log), self.replicas()) + collected_indexes = defaultdict(list) + for conn in log: + fi = conn.resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') + if fi is not None: + collected_indexes[fi].append(conn) + self.assertEqual(len(collected_indexes), self.policy.ec_ndata) + + def test_GET_with_missing_and_mixed_frags_will_dig_deep_but_succeed(self): + obj1 = self._make_ec_object_stub(timestamp=self.ts()) + obj2 = self._make_ec_object_stub(timestamp=self.ts()) + + # 28 nodes are here + node_frags = [ + {'obj': obj1, 'frag': 0}, + {'obj': obj2, 'frag': 0}, + [], + {'obj': obj1, 'frag': 1}, + {'obj': obj2, 'frag': 1}, + [], + {'obj': obj1, 'frag': 2}, + {'obj': obj2, 'frag': 2}, + [], + {'obj': obj1, 'frag': 3}, + {'obj': obj2, 'frag': 3}, + [], + {'obj': obj1, 'frag': 4}, + {'obj': obj2, 'frag': 4}, + [], + {'obj': obj1, 'frag': 5}, + {'obj': obj2, 'frag': 5}, + [], + {'obj': obj1, 'frag': 6}, + {'obj': obj2, 'frag': 6}, + [], + {'obj': obj1, 'frag': 7}, + {'obj': obj2, 'frag': 7}, + [], + {'obj': obj1, 'frag': 8}, + {'obj': obj2, 'frag': 8}, + [], + [], + ] + + node_frags += [[]] * 13 # Plus 13 nodes in handoff + + # finally 10th fragment for obj2 found + node_frags += [[{'obj': obj2, 'frag': 9}]] + + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.headers['etag'], obj2['etag']) + self.assertEqual(md5(resp.body).hexdigest(), obj2['etag']) + + collected_responses = defaultdict(set) + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + index = conn.resp.headers['X-Object-Sysmeta-Ec-Frag-Index'] + collected_responses[etag].add(index) + + # we go exactly as long as we have to, finding two different + # etags and some 404's (i.e. collected_responses[None]) + self.assertEqual(len(log), len(node_frags)) + self.assertEqual(len(collected_responses), 3) + + # ... regardless we should never need to fetch more than ec_ndata + # frags for any given etag + for etag, frags in collected_responses.items(): + self.assertTrue(len(frags) <= self.policy.ec_ndata, + 'collected %s frags for etag %s' % ( + len(frags), etag)) + + def test_GET_with_mixed_frags_and_no_quorum_will_503(self): + # all nodes have a frag but there is no one set that reaches quorum, + # which means there is no backend 404 response, but proxy should still + # return 404 rather than 503 + obj1 = self._make_ec_object_stub() + obj2 = self._make_ec_object_stub() + obj3 = self._make_ec_object_stub() + obj4 = self._make_ec_object_stub() + obj5 = self._make_ec_object_stub() + obj6 = self._make_ec_object_stub() + obj7 = self._make_ec_object_stub() + + # primaries and handoffs for required nodes + # this is 10-4 * 2 case so that 56 requests (2 * replicas) required + # to give up. we prepares 7 different objects above so responses + # will have 8 fragments for each object + required_nodes = self.replicas() * 2 + # fill them out to the primary and handoff nodes + node_frags = [] + for frag in range(8): + for stub_obj in (obj1, obj2, obj3, obj4, obj5, obj6, obj7): + if len(node_frags) >= required_nodes: + # we already have enough responses + break + node_frags.append({'obj': stub_obj, 'frag': frag}) + + # sanity + self.assertEqual(required_nodes, len(node_frags)) + + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 503) + + collected_etags = set() + collected_status = set() + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + collected_etags.add(etag) + collected_status.add(conn.resp.status) + + self.assertEqual(required_nodes, len(log)) + self.assertEqual(len(collected_etags), 7) + self.assertEqual({200}, collected_status) + + def test_GET_with_no_durable_files(self): + # verify that at least one durable is necessary for a successful GET + obj1 = self._make_ec_object_stub() + node_frags = [ + {'obj': obj1, 'frag': 0, 'durable': False}, + {'obj': obj1, 'frag': 1, 'durable': False}, + {'obj': obj1, 'frag': 2, 'durable': False}, + {'obj': obj1, 'frag': 3, 'durable': False}, + {'obj': obj1, 'frag': 4, 'durable': False}, + {'obj': obj1, 'frag': 5, 'durable': False}, + {'obj': obj1, 'frag': 6, 'durable': False}, + {'obj': obj1, 'frag': 7, 'durable': False}, + {'obj': obj1, 'frag': 8, 'durable': False}, + {'obj': obj1, 'frag': 9, 'durable': False}, + {'obj': obj1, 'frag': 10, 'durable': False}, # parity + {'obj': obj1, 'frag': 11, 'durable': False}, # parity + {'obj': obj1, 'frag': 12, 'durable': False}, # parity + {'obj': obj1, 'frag': 13, 'durable': False}, # parity + ] + + node_frags = node_frags * 2 # 2 duplications + + node_frags += [[]] * self.replicas() # handoffs + + fake_response = self._fake_ec_node_response(list(node_frags)) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 404) + # all 28 nodes tried with an optimistic get, none are durable and none + # report having a durable timestamp + self.assertEqual(self.replicas() * 2, len(log)) + + def test_GET_with_missing_and_mixed_frags_may_503(self): + obj1 = self._make_ec_object_stub() + obj2 = self._make_ec_object_stub() + obj3 = self._make_ec_object_stub() + obj4 = self._make_ec_object_stub() + # we get a 503 when all the handoffs return 200 + node_frags = [[]] * self.replicas() # primaries have no frags + # plus, 4 different objects and 7 indexes will b 28 node responses + # here for handoffs + node_frags = node_frags + [ # handoffs all have frags + {'obj': obj1, 'frag': 0}, + {'obj': obj2, 'frag': 0}, + {'obj': obj3, 'frag': 0}, + {'obj': obj4, 'frag': 0}, + {'obj': obj1, 'frag': 1}, + {'obj': obj2, 'frag': 1}, + {'obj': obj3, 'frag': 1}, + {'obj': obj4, 'frag': 1}, + {'obj': obj1, 'frag': 2}, + {'obj': obj2, 'frag': 2}, + {'obj': obj3, 'frag': 2}, + {'obj': obj4, 'frag': 2}, + {'obj': obj1, 'frag': 3}, + {'obj': obj2, 'frag': 3}, + {'obj': obj3, 'frag': 3}, + {'obj': obj4, 'frag': 3}, + {'obj': obj1, 'frag': 4}, + {'obj': obj2, 'frag': 4}, + {'obj': obj3, 'frag': 4}, + {'obj': obj4, 'frag': 4}, + {'obj': obj1, 'frag': 5}, + {'obj': obj2, 'frag': 5}, + {'obj': obj3, 'frag': 5}, + {'obj': obj4, 'frag': 5}, + {'obj': obj1, 'frag': 6}, + {'obj': obj2, 'frag': 6}, + {'obj': obj3, 'frag': 6}, + {'obj': obj4, 'frag': 6}, + ] + + fake_response = self._fake_ec_node_response(node_frags) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 503) + # never get a quorum so all nodes are searched + self.assertEqual(len(log), 2 * self.replicas()) + collected_indexes = defaultdict(list) + for conn in log: + fi = conn.resp.headers.get('X-Object-Sysmeta-Ec-Frag-Index') + if fi is not None: + collected_indexes[fi].append(conn) + self.assertEqual(len(collected_indexes), 7) + + def test_GET_with_mixed_etags_at_same_timestamp(self): + # the difference from parent class is only handoff stub length + + ts = self.ts() # force equal timestamps for two objects + obj1 = self._make_ec_object_stub(timestamp=ts) + obj2 = self._make_ec_object_stub(timestamp=ts) + + node_frags = [ + # 7 frags of obj2 are available and durable + {'obj': obj2, 'frag': 0, 'durable': True}, + {'obj': obj2, 'frag': 1, 'durable': True}, + {'obj': obj2, 'frag': 2, 'durable': True}, + {'obj': obj2, 'frag': 3, 'durable': True}, + {'obj': obj2, 'frag': 4, 'durable': True}, + {'obj': obj2, 'frag': 5, 'durable': True}, + {'obj': obj2, 'frag': 6, 'durable': True}, + # 7 frags of obj1 are available and durable + {'obj': obj1, 'frag': 7, 'durable': True}, + {'obj': obj1, 'frag': 8, 'durable': True}, + {'obj': obj1, 'frag': 9, 'durable': True}, + {'obj': obj1, 'frag': 10, 'durable': True}, + {'obj': obj1, 'frag': 11, 'durable': True}, + {'obj': obj1, 'frag': 12, 'durable': True}, + {'obj': obj1, 'frag': 13, 'durable': True}, + # handoffs + ] + + node_frags += [[]] * (self.replicas() * 2 - len(node_frags)) + + fake_response = self._fake_ec_node_response(list(node_frags)) + + req = swob.Request.blank('/v1/a/c/o') + with capture_http_requests(fake_response) as log: + resp = req.get_response(self.app) + # read body to provoke any EC decode errors + self.assertFalse(resp.body) + + self.assertEqual(resp.status_int, 404) + self.assertEqual(len(log), self.replicas() * 2) + collected_etags = set() + for conn in log: + etag = conn.resp.headers['X-Object-Sysmeta-Ec-Etag'] + collected_etags.add(etag) # will be None from handoffs + self.assertEqual({obj1['etag'], obj2['etag'], None}, collected_etags) + log_lines = self.app.logger.get_lines_for_level('error') + self.assertEqual(log_lines, + ['Problem with fragment response: ETag mismatch'] * 7) + + def _test_determine_chunk_destinations_prioritize( + self, unique, duplicated, one_more_missing): + # This scenario is only in ec_duplication_factor >= 2 likely, + # If we have multiple failures such that frag 0 is missing + # two copies and frag 1 is missing 1, we should prioritize + # finding a handoff for frag 0. + + class FakePutter(object): + def __init__(self, index): + self.node_index = index + + controller = self.controller_cls( + self.app, 'a', 'c', 'o') + + # create a dummy list of putters, check no handoffs + putters = [] + for index in range(self.policy.object_ring.replica_count): + putters.append(FakePutter(index)) + got = controller._determine_chunk_destinations(putters, self.policy) + expected = {} + for i, p in enumerate(putters): + expected[p] = i + # sanity + self.assertEqual(got, expected) + + # now lets make an unique fragment as handoffs + putters[unique].node_index = None + + # and then, pop a fragment which has same fragment index with unique + self.assertEqual( + unique, self.policy.get_backend_index(duplicated)) # sanity + putters.pop(duplicated) + + # pop one more frag ment too to make one missing hole + putters.pop(one_more_missing) + + # then determine chunk, we have 26 putters here and unique frag + # index 0 missing 2 copies and unique frag index 1 missing 1 copy + # i.e. the handoff node should be assigned to unique frag index 1 + got = controller._determine_chunk_destinations(putters, self.policy) + self.assertEqual(len(expected) - 2, len(got)) + + # index one_more_missing should not be choosen + self.assertNotIn(one_more_missing, got.values()) + # either index unique or duplicated should be in the got dict + self.assertTrue( + any([unique in got.values(), duplicated in got.values()])) + # but it's not both + self.assertFalse( + all([unique in got.values(), duplicated in got.values()])) + + def test_determine_chunk_destinations_prioritize_more_missing(self): + # drop 0, 14 and 1 should work + self._test_determine_chunk_destinations_prioritize(0, 14, 1) + # drop 1, 15 and 0 should work, too + self._test_determine_chunk_destinations_prioritize(1, 15, 0) + + if __name__ == '__main__': unittest.main() diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py index 08e9e2abd3..183682ac7a 100644 --- a/test/unit/proxy/test_server.py +++ b/test/unit/proxy/test_server.py @@ -1143,25 +1143,7 @@ class TestProxyServerLoading(unittest.TestCase): self.assertTrue(policy.object_ring) -@patch_policies([StoragePolicy(0, 'zero', True, - object_ring=FakeRing(base_port=3000))]) -class TestObjectController(unittest.TestCase): - - def setUp(self): - self.app = proxy_server.Application( - None, FakeMemcache(), - logger=debug_logger('proxy-ut'), - account_ring=FakeRing(), - container_ring=FakeRing()) - # clear proxy logger result for each test - _test_servers[0].logger._clear() - - def tearDown(self): - self.app.account_ring.set_replicas(3) - self.app.container_ring.set_replicas(3) - for policy in POLICIES: - policy.object_ring = FakeRing(base_port=3000) - +class ECTestMixin(object): def put_container(self, policy_name, container_name): # Note: only works if called with unpatched policies prolis = _test_sockets[0] @@ -1179,6 +1161,943 @@ class TestObjectController(unittest.TestCase): exp = 'HTTP/1.1 2' self.assertEqual(headers[:len(exp)], exp) + @unpatch_policies + def test_PUT_ec(self): + policy = POLICIES[self.ec_policy_index] + self.put_container(policy.name, policy.name) + + obj = 'abCD' * 10 # small, so we don't get multiple EC stripes + prolis = _test_sockets[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/o1 HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: "%s"\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (policy.name, md5(obj).hexdigest(), + len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + ecd = policy.pyeclib_driver + expected_pieces = set(ecd.encode(obj)) + + # go to disk to make sure it's there and all erasure-coded + partition, nodes = policy.object_ring.get_nodes('a', policy.name, 'o1') + conf = {'devices': _testdir, 'mount_check': 'false'} + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy] + + got_pieces = set() + got_indices = set() + got_durable = [] + for node_index, node in enumerate(nodes): + df = df_mgr.get_diskfile(node['device'], partition, + 'a', policy.name, 'o1', + policy=policy) + with df.open(): + meta = df.get_metadata() + contents = ''.join(df.reader()) + got_pieces.add(contents) + + lmeta = dict((k.lower(), v) for k, v in meta.items()) + got_indices.add( + lmeta['x-object-sysmeta-ec-frag-index']) + + self.assertEqual( + lmeta['x-object-sysmeta-ec-etag'], + md5(obj).hexdigest()) + self.assertEqual( + lmeta['x-object-sysmeta-ec-content-length'], + str(len(obj))) + self.assertEqual( + lmeta['x-object-sysmeta-ec-segment-size'], + '4096') + self.assertEqual( + lmeta['x-object-sysmeta-ec-scheme'], + '%s 2+1' % DEFAULT_TEST_EC_TYPE) + self.assertEqual( + lmeta['etag'], + md5(contents).hexdigest()) + + # check presence for a durable data file for the timestamp + durable_file = ( + utils.Timestamp(df.timestamp).internal + + '#%s' % lmeta['x-object-sysmeta-ec-frag-index'] + + '#d.data') + durable_file = os.path.join( + _testdir, node['device'], storage_directory( + diskfile.get_data_dir(policy), + partition, hash_path('a', policy.name, 'o1')), + durable_file) + if os.path.isfile(durable_file): + got_durable.append(True) + + self.assertEqual(expected_pieces, got_pieces) + self.assertEqual(set(('0', '1', '2')), got_indices) + + # verify at least 2 puts made it all the way to the end of 2nd + # phase, ie at least 2 durable statuses were written + num_durable_puts = sum(d is True for d in got_durable) + self.assertGreaterEqual(num_durable_puts, 2) + + @unpatch_policies + def test_PUT_ec_multiple_segments(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + pyeclib_header_size = len(ec_policy.pyeclib_driver.encode("")[0]) + segment_size = ec_policy.ec_segment_size + + # Big enough to have multiple segments. Also a multiple of the + # segment size to get coverage of that path too. + obj = 'ABC' * segment_size + + prolis = _test_sockets[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/o2 HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + # it's a 2+1 erasure code, so each fragment archive should be half + # the length of the object, plus three inline pyeclib metadata + # things (one per segment) + expected_length = (len(obj) / 2 + pyeclib_header_size * 3) + + partition, nodes = ec_policy.object_ring.get_nodes( + 'a', ec_policy.name, 'o2') + + conf = {'devices': _testdir, 'mount_check': 'false'} + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] + + got_durable = [] + fragment_archives = [] + for node in nodes: + df = df_mgr.get_diskfile( + node['device'], partition, 'a', + ec_policy.name, 'o2', policy=ec_policy) + with df.open(): + meta = df.get_metadata() + contents = ''.join(df.reader()) + fragment_archives.append(contents) + self.assertEqual(len(contents), expected_length) + + durable_file = ( + utils.Timestamp(df.timestamp).internal + + '#%s' % meta['X-Object-Sysmeta-Ec-Frag-Index'] + + '#d.data') + durable_file = os.path.join( + _testdir, node['device'], storage_directory( + diskfile.get_data_dir(ec_policy), + partition, hash_path('a', ec_policy.name, 'o2')), + durable_file) + if os.path.isfile(durable_file): + got_durable.append(True) + + # Verify that we can decode each individual fragment and that they + # are all the correct size + fragment_size = ec_policy.fragment_size + nfragments = int( + math.ceil(float(len(fragment_archives[0])) / fragment_size)) + + for fragment_index in range(nfragments): + fragment_start = fragment_index * fragment_size + fragment_end = (fragment_index + 1) * fragment_size + + try: + frags = [fa[fragment_start:fragment_end] + for fa in fragment_archives] + seg = ec_policy.pyeclib_driver.decode(frags) + except ECDriverError: + self.fail("Failed to decode fragments %d; this probably " + "means the fragments are not the sizes they " + "should be" % fragment_index) + + segment_start = fragment_index * segment_size + segment_end = (fragment_index + 1) * segment_size + + self.assertEqual(seg, obj[segment_start:segment_end]) + + # verify at least 2 puts made it all the way to the end of 2nd + # phase, ie at least 2 .durable statuses were written + num_durable_puts = sum(d is True for d in got_durable) + self.assertGreaterEqual(num_durable_puts, 2) + + @unpatch_policies + def test_PUT_ec_object_etag_mismatch(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + obj = '90:6A:02:60:B1:08-96da3e706025537fc42464916427727e' + prolis = _test_sockets[0] + prosrv = _test_servers[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/o3 HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: %s\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, + md5('something else').hexdigest(), + len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 422' + self.assertEqual(headers[:len(exp)], exp) + + # nothing should have made it to disk on the object servers + partition, nodes = prosrv.get_object_ring( + int(ec_policy)).get_nodes('a', ec_policy.name, 'o3') + conf = {'devices': _testdir, 'mount_check': 'false'} + + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] + + for node in nodes: + df = df_mgr.get_diskfile(node['device'], partition, + 'a', ec_policy.name, 'o3', + policy=ec_policy) + self.assertRaises(DiskFileNotExist, df.open) + + @unpatch_policies + def test_PUT_ec_fragment_archive_etag_mismatch(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + # Cause a hash mismatch by feeding one particular MD5 hasher some + # extra data. The goal here is to get exactly more than one of the + # hashers in an object server. + count = (ec_policy.object_ring.replica_count - ec_policy.ec_ndata) + countdown = [count] + + def busted_md5_constructor(initial_str=""): + hasher = md5(initial_str) + if countdown[0] > 0: + hasher.update('wrong') + countdown[0] -= 1 + return hasher + + obj = 'uvarovite-esurience-cerated-symphysic' + prolis = _test_sockets[0] + prosrv = _test_servers[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + with mock.patch('swift.obj.server.md5', busted_md5_constructor): + fd = sock.makefile() + fd.write('PUT /v1/a/%s/pimento HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: %s\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, md5(obj).hexdigest(), + len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 503' # no quorum + self.assertEqual(headers[:len(exp)], exp) + + # replica count - 1 of the fragment archives should have + # landed on disk + partition, nodes = prosrv.get_object_ring( + int(ec_policy)).get_nodes('a', ec_policy.name, 'pimento') + conf = {'devices': _testdir, 'mount_check': 'false'} + + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] + + found = 0 + for node in nodes: + df = df_mgr.get_diskfile(node['device'], partition, + 'a', ec_policy.name, 'pimento', + policy=ec_policy) + try: + # diskfile open won't succeed because no durable was written, + # so look under the hood for data files. + files = os.listdir(df._datadir) + if len(files) > 0: + # Although the third fragment archive hasn't landed on + # disk, the directory df._datadir is pre-maturely created + # and is empty when we use O_TMPFILE + linkat() + num_data_files = \ + len([f for f in files if f.endswith('.data')]) + self.assertEqual(1, num_data_files) + found += 1 + except OSError: + pass + self.assertEqual(found, ec_policy.ec_ndata) + + @unpatch_policies + def test_PUT_ec_fragment_quorum_archive_etag_mismatch(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container("ec", "ec-con") + + def busted_md5_constructor(initial_str=""): + hasher = md5(initial_str) + hasher.update('wrong') + return hasher + + obj = 'uvarovite-esurience-cerated-symphysic' + prolis = _test_sockets[0] + prosrv = _test_servers[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + + call_count = [0] + + def mock_committer(self): + call_count[0] += 1 + + commit_confirmation = \ + 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' + + with mock.patch('swift.obj.server.md5', busted_md5_constructor), \ + mock.patch(commit_confirmation, mock_committer): + fd = sock.makefile() + fd.write('PUT /v1/a/ec-con/quorum HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: %s\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 503' # no quorum + self.assertEqual(headers[:len(exp)], exp) + # Don't send commit to object-server if quorum responses consist of 4xx + self.assertEqual(0, call_count[0]) + + # no fragment archives should have landed on disk + partition, nodes = prosrv.get_object_ring(3).get_nodes( + 'a', 'ec-con', 'quorum') + conf = {'devices': _testdir, 'mount_check': 'false'} + + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] + + for node in nodes: + df = df_mgr.get_diskfile(node['device'], partition, + 'a', 'ec-con', 'quorum', + policy=POLICIES[3]) + if os.path.exists(df._datadir): + self.assertFalse(os.listdir(df._datadir)) # should be empty + + @unpatch_policies + def test_PUT_ec_fragment_quorum_bad_request(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container("ec", "ec-con") + + obj = 'uvarovite-esurience-cerated-symphysic' + prolis = _test_sockets[0] + prosrv = _test_servers[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + + call_count = [0] + + def mock_committer(self): + call_count[0] += 1 + + read_footer = \ + 'swift.obj.server.ObjectController._read_metadata_footer' + commit_confirmation = \ + 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' + + with mock.patch(read_footer) as read_footer_call, \ + mock.patch(commit_confirmation, mock_committer): + # Emulate missing footer MIME doc in all object-servers + read_footer_call.side_effect = HTTPBadRequest( + body="couldn't find footer MIME doc") + + fd = sock.makefile() + fd.write('PUT /v1/a/ec-con/quorum HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: %s\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + + # Don't show a result of the bad conversation between proxy-server + # and object-server + exp = 'HTTP/1.1 503' + self.assertEqual(headers[:len(exp)], exp) + # Don't send commit to object-server if quorum responses consist of 4xx + self.assertEqual(0, call_count[0]) + + # no fragment archives should have landed on disk + partition, nodes = prosrv.get_object_ring(3).get_nodes( + 'a', 'ec-con', 'quorum') + conf = {'devices': _testdir, 'mount_check': 'false'} + + df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] + + for node in nodes: + df = df_mgr.get_diskfile(node['device'], partition, + 'a', 'ec-con', 'quorum', + policy=POLICIES[3]) + if os.path.exists(df._datadir): + self.assertFalse(os.listdir(df._datadir)) # should be empty + + @unpatch_policies + def test_PUT_ec_if_none_match(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + obj = 'ananepionic-lepidophyllous-ropewalker-neglectful' + prolis = _test_sockets[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/inm HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Etag: "%s"\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, md5(obj).hexdigest(), + len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/inm HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'If-None-Match: *\r\n' + 'Etag: "%s"\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, md5(obj).hexdigest(), + len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 412' + self.assertEqual(headers[:len(exp)], exp) + + @unpatch_policies + def test_GET_ec(self): + prolis = _test_sockets[0] + prosrv = _test_servers[0] + + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + obj = '0123456' * 11 * 17 + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/go-get-it HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'X-Object-Meta-Color: chartreuse\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('GET /v1/a/%s/go-get-it HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 200' + self.assertEqual(headers[:len(exp)], exp) + + headers = parse_headers_string(headers) + self.assertEqual(str(len(obj)), headers['Content-Length']) + self.assertEqual(md5(obj).hexdigest(), headers['Etag']) + self.assertEqual('chartreuse', headers['X-Object-Meta-Color']) + + gotten_obj = '' + while True: + buf = fd.read(64) + if not buf: + break + gotten_obj += buf + self.assertEqual(gotten_obj, obj) + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + def _test_conditional_GET(self, policy): + container_name = uuid.uuid4().hex + object_path = '/v1/a/%s/conditionals' % container_name + self.put_container(policy.name, container_name) + + obj = 'this object has an etag and is otherwise unimportant' + etag = md5(obj).hexdigest() + not_etag = md5(obj + "blahblah").hexdigest() + + prolis = _test_sockets[0] + prosrv = _test_servers[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT %s HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (object_path, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + for verb, body in (('GET', obj), ('HEAD', '')): + # If-Match + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-Match': etag}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, body) + self.assertEqual(etag, resp.headers.get('etag')) + self.assertEqual('bytes', resp.headers.get('accept-ranges')) + + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-Match': not_etag}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 412) + self.assertEqual(etag, resp.headers.get('etag')) + + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-Match': "*"}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, body) + self.assertEqual(etag, resp.headers.get('etag')) + self.assertEqual('bytes', resp.headers.get('accept-ranges')) + + # If-None-Match + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-None-Match': etag}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 304) + self.assertEqual(etag, resp.headers.get('etag')) + self.assertEqual('bytes', resp.headers.get('accept-ranges')) + + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-None-Match': not_etag}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, body) + self.assertEqual(etag, resp.headers.get('etag')) + self.assertEqual('bytes', resp.headers.get('accept-ranges')) + + req = Request.blank( + object_path, + environ={'REQUEST_METHOD': verb}, + headers={'If-None-Match': "*"}) + resp = req.get_response(prosrv) + self.assertEqual(resp.status_int, 304) + self.assertEqual(etag, resp.headers.get('etag')) + self.assertEqual('bytes', resp.headers.get('accept-ranges')) + + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + @unpatch_policies + def test_conditional_GET_ec(self): + policy = POLICIES[self.ec_policy_index] + self.assertEqual('erasure_coding', policy.policy_type) # sanity + self._test_conditional_GET(policy) + + @unpatch_policies + def test_GET_ec_big(self): + prolis = _test_sockets[0] + prosrv = _test_servers[0] + + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + # our EC segment size is 4 KiB, so this is multiple (3) segments; + # we'll verify that with a sanity check + obj = 'a moose once bit my sister' * 400 + self.assertGreater( + len(obj), ec_policy.ec_segment_size * 2, + "object is too small for proper testing") + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/big-obj-get HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('GET /v1/a/%s/big-obj-get HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 200' + self.assertEqual(headers[:len(exp)], exp) + + headers = parse_headers_string(headers) + self.assertEqual(str(len(obj)), headers['Content-Length']) + self.assertEqual(md5(obj).hexdigest(), headers['Etag']) + + gotten_obj = '' + while True: + buf = fd.read(64) + if not buf: + break + gotten_obj += buf + # This may look like a redundant test, but when things fail, this + # has a useful failure message while the subsequent one spews piles + # of garbage and demolishes your terminal's scrollback buffer. + self.assertEqual(len(gotten_obj), len(obj)) + self.assertEqual(gotten_obj, obj) + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + @unpatch_policies + def test_GET_ec_failure_handling(self): + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + obj = 'look at this object; it is simply amazing ' * 500 + prolis = _test_sockets[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/crash-test-dummy HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + def explodey_iter(inner_iter): + yield next(inner_iter) + raise Exception("doom ba doom") + + def explodey_doc_parts_iter(inner_iter_iter): + try: + for item in inner_iter_iter: + item = item.copy() # paranoia about mutable data + item['part_iter'] = explodey_iter(item['part_iter']) + yield item + except GeneratorExit: + inner_iter_iter.close() + raise + + real_ec_app_iter = swift.proxy.controllers.obj.ECAppIter + + def explodey_ec_app_iter(path, policy, iterators, *a, **kw): + # Each thing in `iterators` here is a document-parts iterator, + # and we want to fail after getting a little into each part. + # + # That way, we ensure we've started streaming the response to + # the client when things go wrong. + return real_ec_app_iter( + path, policy, + [explodey_doc_parts_iter(i) for i in iterators], + *a, **kw) + + with mock.patch("swift.proxy.controllers.obj.ECAppIter", + explodey_ec_app_iter): + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('GET /v1/a/%s/crash-test-dummy HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 200' + self.assertEqual(headers[:len(exp)], exp) + + headers = parse_headers_string(headers) + self.assertEqual(str(len(obj)), headers['Content-Length']) + self.assertEqual(md5(obj).hexdigest(), headers['Etag']) + + gotten_obj = '' + try: + # don't hang the test run when this fails + with Timeout(300): + while True: + buf = fd.read(64) + if not buf: + break + gotten_obj += buf + except Timeout: + self.fail("GET hung when connection failed") + + # Ensure we failed partway through, otherwise the mocks could + # get out of date without anyone noticing + self.assertTrue(0 < len(gotten_obj) < len(obj)) + + @unpatch_policies + def test_HEAD_ec(self): + prolis = _test_sockets[0] + prosrv = _test_servers[0] + + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + obj = '0123456' * 11 * 17 + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('PUT /v1/a/%s/go-head-it HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'Content-Length: %d\r\n' + 'X-Storage-Token: t\r\n' + 'X-Object-Meta-Color: chartreuse\r\n' + 'Content-Type: application/octet-stream\r\n' + '\r\n%s' % (ec_policy.name, len(obj), obj)) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 201' + self.assertEqual(headers[:len(exp)], exp) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('HEAD /v1/a/%s/go-head-it HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 200' + self.assertEqual(headers[:len(exp)], exp) + + headers = parse_headers_string(headers) + self.assertEqual(str(len(obj)), headers['Content-Length']) + self.assertEqual(md5(obj).hexdigest(), headers['Etag']) + self.assertEqual('chartreuse', headers['X-Object-Meta-Color']) + + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + @unpatch_policies + def test_GET_ec_404(self): + prolis = _test_sockets[0] + prosrv = _test_servers[0] + + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('GET /v1/a/%s/yes-we-have-no-bananas HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 404' + self.assertEqual(headers[:len(exp)], exp) + + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + @unpatch_policies + def test_HEAD_ec_404(self): + prolis = _test_sockets[0] + prosrv = _test_servers[0] + + ec_policy = POLICIES[self.ec_policy_index] + self.put_container(ec_policy.name, ec_policy.name) + + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write('HEAD /v1/a/%s/yes-we-have-no-bananas HTTP/1.1\r\n' + 'Host: localhost\r\n' + 'Connection: close\r\n' + 'X-Storage-Token: t\r\n' + '\r\n' % ec_policy.name) + fd.flush() + headers = readuntil2crlfs(fd) + exp = 'HTTP/1.1 404' + self.assertEqual(headers[:len(exp)], exp) + + error_lines = prosrv.logger.get_lines_for_level('error') + warn_lines = prosrv.logger.get_lines_for_level('warning') + self.assertEqual(len(error_lines), 0) # sanity + self.assertEqual(len(warn_lines), 0) # sanity + + @unpatch_policies + def test_reload_ring_ec(self): + policy = POLICIES[self.ec_policy_index] + self.put_container("ec", "ec-con") + + orig_rtime = policy.object_ring._rtime + orig_replica_count = policy.object_ring.replica_count + # save original file as back up + copyfile(policy.object_ring.serialized_path, + policy.object_ring.serialized_path + '.bak') + + try: + # overwrite with 2 replica, 2 devices ring + obj_devs = [] + obj_devs.append( + {'port': _test_sockets[-3].getsockname()[1], + 'device': 'sdg1'}) + obj_devs.append( + {'port': _test_sockets[-2].getsockname()[1], + 'device': 'sdh1'}) + write_fake_ring(policy.object_ring.serialized_path, + *obj_devs) + + def get_ring_reloaded_response(method): + # force to reload at the request + policy.object_ring._rtime = 0 + + trans_data = ['%s /v1/a/ec-con/o2 HTTP/1.1\r\n' % method, + 'Host: localhost\r\n', + 'Connection: close\r\n', + 'X-Storage-Token: t\r\n'] + + if method == 'PUT': + # small, so we don't get multiple EC stripes + obj = 'abCD' * 10 + + extra_trans_data = [ + 'Etag: "%s"\r\n' % md5(obj).hexdigest(), + 'Content-Length: %d\r\n' % len(obj), + 'Content-Type: application/octet-stream\r\n', + '\r\n%s' % obj + ] + trans_data.extend(extra_trans_data) + else: + trans_data.append('\r\n') + + prolis = _test_sockets[0] + sock = connect_tcp(('localhost', prolis.getsockname()[1])) + fd = sock.makefile() + fd.write(''.join(trans_data)) + fd.flush() + headers = readuntil2crlfs(fd) + + # use older ring with rollbacking + return headers + + for method in ('PUT', 'HEAD', 'GET', 'POST', 'DELETE'): + headers = get_ring_reloaded_response(method) + exp = 'HTTP/1.1 20' + self.assertEqual(headers[:len(exp)], exp) + + # proxy didn't load newest ring, use older one + self.assertEqual(orig_replica_count, + policy.object_ring.replica_count) + + if method == 'POST': + # Take care fast post here! + orig_post_as_copy = getattr( + _test_servers[0], 'object_post_as_copy', None) + try: + _test_servers[0].object_post_as_copy = False + with mock.patch.object( + _test_servers[0], + 'object_post_as_copy', False): + headers = get_ring_reloaded_response(method) + finally: + if orig_post_as_copy is None: + del _test_servers[0].object_post_as_copy + else: + _test_servers[0].object_post_as_copy = \ + orig_post_as_copy + + exp = 'HTTP/1.1 20' + self.assertEqual(headers[:len(exp)], exp) + # sanity + self.assertEqual(orig_replica_count, + policy.object_ring.replica_count) + + finally: + policy.object_ring._rtime = orig_rtime + os.rename(policy.object_ring.serialized_path + '.bak', + policy.object_ring.serialized_path) + + +@patch_policies([StoragePolicy(0, 'zero', True, + object_ring=FakeRing(base_port=3000))]) +class TestObjectController(ECTestMixin, unittest.TestCase): + ec_policy_index = 3 + + def setUp(self): + self.app = proxy_server.Application( + None, FakeMemcache(), + logger=debug_logger('proxy-ut'), + account_ring=FakeRing(), + container_ring=FakeRing()) + # clear proxy logger result for each test + _test_servers[0].logger._clear() + + def tearDown(self): + self.app.account_ring.set_replicas(3) + self.app.container_ring.set_replicas(3) + for policy in POLICIES: + policy.object_ring = FakeRing(base_port=3000) + def assert_status_map(self, method, statuses, expected, raise_exc=False): with save_globals(): kwargs = {} @@ -1783,812 +2702,12 @@ class TestObjectController(unittest.TestCase): self.assertEqual(len(got_byteranges[2]), 501) self.assertEqual(got_byteranges[2], obj[2000:2501]) - @unpatch_policies - def test_PUT_ec(self): - policy = POLICIES[3] - self.put_container("ec", "ec-con") - - obj = 'abCD' * 10 # small, so we don't get multiple EC stripes - prolis = _test_sockets[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/o1 HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: "%s"\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - ecd = policy.pyeclib_driver - expected_pieces = set(ecd.encode(obj)) - - # go to disk to make sure it's there and all erasure-coded - partition, nodes = policy.object_ring.get_nodes('a', 'ec-con', 'o1') - conf = {'devices': _testdir, 'mount_check': 'false'} - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[policy] - - got_pieces = set() - got_indices = set() - got_durable = [] - for node_index, node in enumerate(nodes): - df = df_mgr.get_diskfile(node['device'], partition, - 'a', 'ec-con', 'o1', - policy=policy) - with df.open(): - meta = df.get_metadata() - contents = ''.join(df.reader()) - got_pieces.add(contents) - - lmeta = dict((k.lower(), v) for k, v in meta.items()) - got_indices.add( - lmeta['x-object-sysmeta-ec-frag-index']) - - self.assertEqual( - lmeta['x-object-sysmeta-ec-etag'], - md5(obj).hexdigest()) - self.assertEqual( - lmeta['x-object-sysmeta-ec-content-length'], - str(len(obj))) - self.assertEqual( - lmeta['x-object-sysmeta-ec-segment-size'], - '4096') - self.assertEqual( - lmeta['x-object-sysmeta-ec-scheme'], - '%s 2+1' % DEFAULT_TEST_EC_TYPE) - self.assertEqual( - lmeta['etag'], - md5(contents).hexdigest()) - - # check presence for a durable data file for the timestamp - durable_file = ( - utils.Timestamp(df.timestamp).internal + - '#%s' % lmeta['x-object-sysmeta-ec-frag-index'] + - '#d.data') - durable_file = os.path.join( - _testdir, node['device'], storage_directory( - diskfile.get_data_dir(policy), - partition, hash_path('a', 'ec-con', 'o1')), - durable_file) - if os.path.isfile(durable_file): - got_durable.append(True) - - self.assertEqual(expected_pieces, got_pieces) - self.assertEqual(set(('0', '1', '2')), got_indices) - - # verify at least 2 puts made it all the way to the end of 2nd - # phase, ie at least 2 durable statuses were written - num_durable_puts = sum(d is True for d in got_durable) - self.assertGreaterEqual(num_durable_puts, 2) - - @unpatch_policies - def test_PUT_ec_multiple_segments(self): - ec_policy = POLICIES[3] - self.put_container("ec", "ec-con") - - pyeclib_header_size = len(ec_policy.pyeclib_driver.encode("")[0]) - segment_size = ec_policy.ec_segment_size - - # Big enough to have multiple segments. Also a multiple of the - # segment size to get coverage of that path too. - obj = 'ABC' * segment_size - - prolis = _test_sockets[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/o2 HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - # it's a 2+1 erasure code, so each fragment archive should be half - # the length of the object, plus three inline pyeclib metadata - # things (one per segment) - expected_length = (len(obj) / 2 + pyeclib_header_size * 3) - - partition, nodes = ec_policy.object_ring.get_nodes( - 'a', 'ec-con', 'o2') - - conf = {'devices': _testdir, 'mount_check': 'false'} - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] - - got_durable = [] - fragment_archives = [] - for node in nodes: - df = df_mgr.get_diskfile( - node['device'], partition, 'a', - 'ec-con', 'o2', policy=ec_policy) - with df.open(): - meta = df.get_metadata() - contents = ''.join(df.reader()) - fragment_archives.append(contents) - self.assertEqual(len(contents), expected_length) - - # check presence for a durable data file for the timestamp - durable_file = ( - utils.Timestamp(df.timestamp).internal + - '#%s' % meta['X-Object-Sysmeta-Ec-Frag-Index'] + - '#d.data') - durable_file = os.path.join( - _testdir, node['device'], storage_directory( - diskfile.get_data_dir(ec_policy), - partition, hash_path('a', 'ec-con', 'o2')), - durable_file) - - if os.path.isfile(durable_file): - got_durable.append(True) - - # Verify that we can decode each individual fragment and that they - # are all the correct size - fragment_size = ec_policy.fragment_size - nfragments = int( - math.ceil(float(len(fragment_archives[0])) / fragment_size)) - - for fragment_index in range(nfragments): - fragment_start = fragment_index * fragment_size - fragment_end = (fragment_index + 1) * fragment_size - - try: - frags = [fa[fragment_start:fragment_end] - for fa in fragment_archives] - seg = ec_policy.pyeclib_driver.decode(frags) - except ECDriverError: - self.fail("Failed to decode fragments %d; this probably " - "means the fragments are not the sizes they " - "should be" % fragment_index) - - segment_start = fragment_index * segment_size - segment_end = (fragment_index + 1) * segment_size - - self.assertEqual(seg, obj[segment_start:segment_end]) - - # verify at least 2 puts made it all the way to the end of 2nd - # phase, ie at least 2 durable statuses were written - num_durable_puts = sum(d is True for d in got_durable) - self.assertGreaterEqual(num_durable_puts, 2) - - @unpatch_policies - def test_PUT_ec_object_etag_mismatch(self): - ec_policy = POLICIES[3] - self.put_container("ec", "ec-con") - - obj = '90:6A:02:60:B1:08-96da3e706025537fc42464916427727e' - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/o3 HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: %s\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5('something else').hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 422' - self.assertEqual(headers[:len(exp)], exp) - - # nothing should have made it to disk on the object servers - partition, nodes = prosrv.get_object_ring(3).get_nodes( - 'a', 'ec-con', 'o3') - conf = {'devices': _testdir, 'mount_check': 'false'} - - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] - - for node in nodes: - df = df_mgr.get_diskfile(node['device'], partition, - 'a', 'ec-con', 'o3', policy=POLICIES[3]) - self.assertRaises(DiskFileNotExist, df.open) - - @unpatch_policies - def test_PUT_ec_fragment_archive_etag_mismatch(self): - ec_policy = POLICIES[3] - self.put_container("ec", "ec-con") - - # Cause a hash mismatch by feeding one particular MD5 hasher some - # extra data. The goal here is to get exactly one of the hashers in - # an object server. - countdown = [1] - - def busted_md5_constructor(initial_str=""): - hasher = md5(initial_str) - if countdown[0] == 0: - hasher.update('wrong') - countdown[0] -= 1 - return hasher - - obj = 'uvarovite-esurience-cerated-symphysic' - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - with mock.patch('swift.obj.server.md5', busted_md5_constructor): - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/pimento HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: %s\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 503' # no quorum - self.assertEqual(headers[:len(exp)], exp) - - # 2/3 of the fragment archives should have landed on disk - partition, nodes = prosrv.get_object_ring(3).get_nodes( - 'a', 'ec-con', 'pimento') - conf = {'devices': _testdir, 'mount_check': 'false'} - - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] - - found = 0 - for node in nodes: - df = df_mgr.get_diskfile(node['device'], partition, - 'a', 'ec-con', 'pimento', - policy=POLICIES[3]) - try: - # diskfile open won't succeed because no durable was written, - # so look under the hood for data files. - files = os.listdir(df._datadir) - if len(files) > 0: - # Although the third fragment archive hasn't landed on - # disk, the directory df._datadir is pre-maturely created - # and is empty when we use O_TMPFILE + linkat() - num_data_files = \ - len([f for f in files if f.endswith('.data')]) - self.assertEqual(1, num_data_files) - found += 1 - except OSError: - pass - self.assertEqual(found, 2) - - @unpatch_policies - def test_PUT_ec_fragment_quorum_archive_etag_mismatch(self): - ec_policy = POLICIES[3] - self.put_container("ec", "ec-con") - - def busted_md5_constructor(initial_str=""): - hasher = md5(initial_str) - hasher.update('wrong') - return hasher - - obj = 'uvarovite-esurience-cerated-symphysic' - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - - call_count = [0] - - def mock_committer(self): - call_count[0] += 1 - - commit_confirmation = \ - 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' - - with mock.patch('swift.obj.server.md5', busted_md5_constructor), \ - mock.patch(commit_confirmation, mock_committer): - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/quorum HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: %s\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 503' # no quorum - self.assertEqual(headers[:len(exp)], exp) - # Don't send commit to object-server if quorum responses consist of 4xx - self.assertEqual(0, call_count[0]) - - # no fragment archives should have landed on disk - partition, nodes = prosrv.get_object_ring(3).get_nodes( - 'a', 'ec-con', 'quorum') - conf = {'devices': _testdir, 'mount_check': 'false'} - - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] - - for node in nodes: - df = df_mgr.get_diskfile(node['device'], partition, - 'a', 'ec-con', 'quorum', - policy=POLICIES[3]) - if os.path.exists(df._datadir): - self.assertFalse(os.listdir(df._datadir)) # should be empty - - @unpatch_policies - def test_PUT_ec_fragment_quorum_bad_request(self): - ec_policy = POLICIES[3] - self.put_container("ec", "ec-con") - - obj = 'uvarovite-esurience-cerated-symphysic' - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - - call_count = [0] - - def mock_committer(self): - call_count[0] += 1 - - read_footer = \ - 'swift.obj.server.ObjectController._read_metadata_footer' - commit_confirmation = \ - 'swift.proxy.controllers.obj.MIMEPutter.send_commit_confirmation' - - with mock.patch(read_footer) as read_footer_call, \ - mock.patch(commit_confirmation, mock_committer): - # Emulate missing footer MIME doc in all object-servers - read_footer_call.side_effect = HTTPBadRequest( - body="couldn't find footer MIME doc") - - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/quorum HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: %s\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - - # Don't show a result of the bad conversation between proxy-server - # and object-server - exp = 'HTTP/1.1 503' - self.assertEqual(headers[:len(exp)], exp) - # Don't send commit to object-server if quorum responses consist of 4xx - self.assertEqual(0, call_count[0]) - - # no fragment archives should have landed on disk - partition, nodes = prosrv.get_object_ring(3).get_nodes( - 'a', 'ec-con', 'quorum') - conf = {'devices': _testdir, 'mount_check': 'false'} - - df_mgr = diskfile.DiskFileRouter(conf, FakeLogger())[ec_policy] - - for node in nodes: - df = df_mgr.get_diskfile(node['device'], partition, - 'a', 'ec-con', 'quorum', - policy=POLICIES[3]) - if os.path.exists(df._datadir): - self.assertFalse(os.listdir(df._datadir)) # should be empty - - @unpatch_policies - def test_PUT_ec_if_none_match(self): - self.put_container("ec", "ec-con") - - obj = 'ananepionic-lepidophyllous-ropewalker-neglectful' - prolis = _test_sockets[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/inm HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Etag: "%s"\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/inm HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'If-None-Match: *\r\n' - 'Etag: "%s"\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (md5(obj).hexdigest(), len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 412' - self.assertEqual(headers[:len(exp)], exp) - - @unpatch_policies - def test_GET_ec(self): - self.put_container("ec", "ec-con") - - obj = '0123456' * 11 * 17 - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/go-get-it HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'X-Object-Meta-Color: chartreuse\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('GET /v1/a/ec-con/go-get-it HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 200' - self.assertEqual(headers[:len(exp)], exp) - - headers = parse_headers_string(headers) - self.assertEqual(str(len(obj)), headers['Content-Length']) - self.assertEqual(md5(obj).hexdigest(), headers['Etag']) - self.assertEqual('chartreuse', headers['X-Object-Meta-Color']) - - gotten_obj = '' - while True: - buf = fd.read(64) - if not buf: - break - gotten_obj += buf - self.assertEqual(gotten_obj, obj) - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - - def _test_conditional_GET(self, policy): - container_name = uuid.uuid4().hex - object_path = '/v1/a/%s/conditionals' % container_name - self.put_container(policy.name, container_name) - - obj = 'this object has an etag and is otherwise unimportant' - etag = md5(obj).hexdigest() - not_etag = md5(obj + "blahblah").hexdigest() - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT %s HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (object_path, len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - for verb, body in (('GET', obj), ('HEAD', '')): - # If-Match - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-Match': etag}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body, body) - self.assertEqual(etag, resp.headers.get('etag')) - self.assertEqual('bytes', resp.headers.get('accept-ranges')) - - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-Match': not_etag}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 412) - self.assertEqual(etag, resp.headers.get('etag')) - - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-Match': "*"}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body, body) - self.assertEqual(etag, resp.headers.get('etag')) - self.assertEqual('bytes', resp.headers.get('accept-ranges')) - - # If-None-Match - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-None-Match': etag}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 304) - self.assertEqual(etag, resp.headers.get('etag')) - self.assertEqual('bytes', resp.headers.get('accept-ranges')) - - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-None-Match': not_etag}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 200) - self.assertEqual(resp.body, body) - self.assertEqual(etag, resp.headers.get('etag')) - self.assertEqual('bytes', resp.headers.get('accept-ranges')) - - req = Request.blank( - object_path, - environ={'REQUEST_METHOD': verb}, - headers={'If-None-Match': "*"}) - resp = req.get_response(prosrv) - self.assertEqual(resp.status_int, 304) - self.assertEqual(etag, resp.headers.get('etag')) - self.assertEqual('bytes', resp.headers.get('accept-ranges')) - - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - - @unpatch_policies - def test_conditional_GET_ec(self): - policy = POLICIES[3] - self.assertEqual('erasure_coding', policy.policy_type) # sanity - self._test_conditional_GET(policy) - @unpatch_policies def test_conditional_GET_replication(self): policy = POLICIES[0] self.assertEqual('replication', policy.policy_type) # sanity self._test_conditional_GET(policy) - @unpatch_policies - def test_GET_ec_big(self): - self.put_container("ec", "ec-con") - - # our EC segment size is 4 KiB, so this is multiple (3) segments; - # we'll verify that with a sanity check - obj = 'a moose once bit my sister' * 400 - self.assertGreater( - len(obj), POLICIES.get_by_name("ec").ec_segment_size * 2, - "object is too small for proper testing") - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/big-obj-get HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('GET /v1/a/ec-con/big-obj-get HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 200' - self.assertEqual(headers[:len(exp)], exp) - - headers = parse_headers_string(headers) - self.assertEqual(str(len(obj)), headers['Content-Length']) - self.assertEqual(md5(obj).hexdigest(), headers['Etag']) - - gotten_obj = '' - while True: - buf = fd.read(64) - if not buf: - break - gotten_obj += buf - # This may look like a redundant test, but when things fail, this - # has a useful failure message while the subsequent one spews piles - # of garbage and demolishes your terminal's scrollback buffer. - self.assertEqual(len(gotten_obj), len(obj)) - self.assertEqual(gotten_obj, obj) - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - - @unpatch_policies - def test_GET_ec_failure_handling(self): - self.put_container("ec", "ec-con") - - obj = 'look at this object; it is simply amazing ' * 500 - prolis = _test_sockets[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/crash-test-dummy HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - def explodey_iter(inner_iter): - yield next(inner_iter) - raise Exception("doom ba doom") - - def explodey_doc_parts_iter(inner_iter_iter): - try: - for item in inner_iter_iter: - item = item.copy() # paranoia about mutable data - item['part_iter'] = explodey_iter(item['part_iter']) - yield item - except GeneratorExit: - inner_iter_iter.close() - raise - - real_ec_app_iter = swift.proxy.controllers.obj.ECAppIter - - def explodey_ec_app_iter(path, policy, iterators, *a, **kw): - # Each thing in `iterators` here is a document-parts iterator, - # and we want to fail after getting a little into each part. - # - # That way, we ensure we've started streaming the response to - # the client when things go wrong. - return real_ec_app_iter( - path, policy, - [explodey_doc_parts_iter(i) for i in iterators], - *a, **kw) - - with mock.patch("swift.proxy.controllers.obj.ECAppIter", - explodey_ec_app_iter): - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('GET /v1/a/ec-con/crash-test-dummy HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 200' - self.assertEqual(headers[:len(exp)], exp) - - headers = parse_headers_string(headers) - self.assertEqual(str(len(obj)), headers['Content-Length']) - self.assertEqual(md5(obj).hexdigest(), headers['Etag']) - - gotten_obj = '' - try: - with Timeout(300): # don't hang the testrun when this fails - while True: - buf = fd.read(64) - if not buf: - break - gotten_obj += buf - except Timeout: - self.fail("GET hung when connection failed") - - # Ensure we failed partway through, otherwise the mocks could - # get out of date without anyone noticing - self.assertTrue(0 < len(gotten_obj) < len(obj)) - - @unpatch_policies - def test_HEAD_ec(self): - self.put_container("ec", "ec-con") - - obj = '0123456' * 11 * 17 - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('PUT /v1/a/ec-con/go-head-it HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'Content-Length: %d\r\n' - 'X-Storage-Token: t\r\n' - 'X-Object-Meta-Color: chartreuse\r\n' - 'Content-Type: application/octet-stream\r\n' - '\r\n%s' % (len(obj), obj)) - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 201' - self.assertEqual(headers[:len(exp)], exp) - - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('HEAD /v1/a/ec-con/go-head-it HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 200' - self.assertEqual(headers[:len(exp)], exp) - - headers = parse_headers_string(headers) - self.assertEqual(str(len(obj)), headers['Content-Length']) - self.assertEqual(md5(obj).hexdigest(), headers['Etag']) - self.assertEqual('chartreuse', headers['X-Object-Meta-Color']) - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - - @unpatch_policies - def test_GET_ec_404(self): - self.put_container("ec", "ec-con") - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('GET /v1/a/ec-con/yes-we-have-no-bananas HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 404' - self.assertEqual(headers[:len(exp)], exp) - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - - @unpatch_policies - def test_HEAD_ec_404(self): - self.put_container("ec", "ec-con") - - prolis = _test_sockets[0] - prosrv = _test_servers[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write('HEAD /v1/a/ec-con/yes-we-have-no-bananas HTTP/1.1\r\n' - 'Host: localhost\r\n' - 'Connection: close\r\n' - 'X-Storage-Token: t\r\n' - '\r\n') - fd.flush() - headers = readuntil2crlfs(fd) - exp = 'HTTP/1.1 404' - self.assertEqual(headers[:len(exp)], exp) - error_lines = prosrv.logger.get_lines_for_level('error') - warn_lines = prosrv.logger.get_lines_for_level('warning') - self.assertEqual(len(error_lines), 0) # sanity - self.assertEqual(len(warn_lines), 0) # sanity - def test_PUT_expect_header_zero_content_length(self): test_errors = [] @@ -3038,96 +3157,6 @@ class TestObjectController(unittest.TestCase): test_content_type('test.css', iter(['', '', 'text/css', 'text/css', 'text/css'])) - @unpatch_policies - def test_reload_ring_ec(self): - policy = POLICIES[3] - self.put_container("ec", "ec-con") - - orig_rtime = policy.object_ring._rtime - # save original file as back up - copyfile(policy.object_ring.serialized_path, - policy.object_ring.serialized_path + '.bak') - - try: - # overwrite with 2 replica, 2 devices ring - obj_devs = [] - obj_devs.append( - {'port': _test_sockets[-3].getsockname()[1], - 'device': 'sdg1'}) - obj_devs.append( - {'port': _test_sockets[-2].getsockname()[1], - 'device': 'sdh1'}) - write_fake_ring(policy.object_ring.serialized_path, - *obj_devs) - - def get_ring_reloaded_response(method): - # force to reload at the request - policy.object_ring._rtime = 0 - - trans_data = ['%s /v1/a/ec-con/o2 HTTP/1.1\r\n' % method, - 'Host: localhost\r\n', - 'Connection: close\r\n', - 'X-Storage-Token: t\r\n'] - - if method == 'PUT': - # small, so we don't get multiple EC stripes - obj = 'abCD' * 10 - - extra_trans_data = [ - 'Etag: "%s"\r\n' % md5(obj).hexdigest(), - 'Content-Length: %d\r\n' % len(obj), - 'Content-Type: application/octet-stream\r\n', - '\r\n%s' % obj - ] - trans_data.extend(extra_trans_data) - else: - trans_data.append('\r\n') - - prolis = _test_sockets[0] - sock = connect_tcp(('localhost', prolis.getsockname()[1])) - fd = sock.makefile() - fd.write(''.join(trans_data)) - fd.flush() - headers = readuntil2crlfs(fd) - - # use older ring with rollbacking - return headers - - for method in ('PUT', 'HEAD', 'GET', 'POST', 'DELETE'): - headers = get_ring_reloaded_response(method) - exp = 'HTTP/1.1 20' - self.assertEqual(headers[:len(exp)], exp) - - # proxy didn't load newest ring, use older one - self.assertEqual(3, policy.object_ring.replica_count) - - if method == 'POST': - # Take care fast post here! - orig_post_as_copy = getattr( - _test_servers[0], 'object_post_as_copy', None) - try: - _test_servers[0].object_post_as_copy = False - with mock.patch.object( - _test_servers[0], - 'object_post_as_copy', False): - headers = get_ring_reloaded_response(method) - finally: - if orig_post_as_copy is None: - del _test_servers[0].object_post_as_copy - else: - _test_servers[0].object_post_as_copy = \ - orig_post_as_copy - - exp = 'HTTP/1.1 20' - self.assertEqual(headers[:len(exp)], exp) - # sanity - self.assertEqual(3, policy.object_ring.replica_count) - - finally: - policy.object_ring._rtime = orig_rtime - os.rename(policy.object_ring.serialized_path + '.bak', - policy.object_ring.serialized_path) - def test_custom_mime_types_files(self): swift_dir = mkdtemp() try: @@ -4434,7 +4463,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_bad_version(self): # Check bad version (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('GET /v0 HTTP/1.1\r\nHost: localhost\r\n' @@ -4448,7 +4477,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_bad_path(self): # Check bad path (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('GET invalid HTTP/1.1\r\nHost: localhost\r\n' @@ -4462,7 +4491,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_bad_utf8(self): # Check invalid utf-8 (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('GET /v1/a%80 HTTP/1.1\r\nHost: localhost\r\n' @@ -4477,7 +4506,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_bad_path_no_controller(self): # Check bad path, no controller (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('GET /v1 HTTP/1.1\r\nHost: localhost\r\n' @@ -4492,7 +4521,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_bad_method(self): # Check bad method (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('LICK /v1/a HTTP/1.1\r\nHost: localhost\r\n' @@ -4507,9 +4536,9 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_unhandled_exception(self): # Check unhandled exception (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, - obj2srv, obj3srv) = _test_servers + obj2srv, obj3srv, obj4srv, obj5srv, obj6srv) = _test_servers (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets orig_update_request = prosrv.update_request def broken_update_request(*args, **kwargs): @@ -4533,7 +4562,7 @@ class TestObjectController(unittest.TestCase): # the part Application.log_request that 'enforces' a # content_length on the response. (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('HEAD /v1/a HTTP/1.1\r\nHost: localhost\r\n' @@ -4557,7 +4586,7 @@ class TestObjectController(unittest.TestCase): ustr_short = '\xe1\xbc\xb8\xce\xbf\xe1\xbd\xbatest' # Create ustr container (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() fd.write('PUT /v1/a/%s HTTP/1.1\r\nHost: localhost\r\n' @@ -4669,7 +4698,7 @@ class TestObjectController(unittest.TestCase): def test_chunked_put_chunked_put(self): # Do chunked object put (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) fd = sock.makefile() # Also happens to assert that x-storage-token is taken as a @@ -4698,7 +4727,7 @@ class TestObjectController(unittest.TestCase): @unpatch_policies def test_conditional_range_get(self): (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, obj2lis, - obj3lis) = _test_sockets + obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets sock = connect_tcp(('localhost', prolis.getsockname()[1])) # make a container @@ -5520,6 +5549,19 @@ class TestObjectController(unittest.TestCase): ]) +class TestObjectControllerECDuplication(ECTestMixin, unittest.TestCase): + ec_policy_index = 4 + + def setUp(self): + self.app = proxy_server.Application( + None, FakeMemcache(), + logger=debug_logger('proxy-ut'), + account_ring=FakeRing(), + container_ring=FakeRing()) + # clear proxy logger result for each test + _test_servers[0].logger._clear() + + class TestECMismatchedFA(unittest.TestCase): def tearDown(self): prosrv = _test_servers[0] @@ -5528,7 +5570,8 @@ class TestECMismatchedFA(unittest.TestCase): def test_mixing_different_objects_fragment_archives(self): (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, - obj2srv, obj3srv) = _test_servers + obj2srv, obj3srv, obj4srv, obj5srv, obj6srv) = _test_servers + ec_policy = POLICIES[3] @public @@ -5607,6 +5650,101 @@ class TestECMismatchedFA(unittest.TestCase): resp = get_req.get_response(prosrv) self.assertEqual(resp.status_int, 503) + def test_mixing_different_objects_fragment_archives_with_dup_factor(self): + (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, + obj2srv, obj3srv, obj4srv, obj5srv, obj6srv) = _test_servers + + ec_policy = POLICIES[4] + + @public + def bad_disk(req): + return Response(status=507, body="borken") + + ensure_container = Request.blank( + "/v1/a/ec-dup-crazytown", + environ={"REQUEST_METHOD": "PUT"}, + headers={"X-Storage-Policy": "ec-dup", "X-Auth-Token": "t"}) + resp = ensure_container.get_response(prosrv) + self.assertTrue(resp.status_int in (201, 202)) + + obj1 = "first version..." + put_req1 = Request.blank( + "/v1/a/ec-dup-crazytown/obj", + environ={"REQUEST_METHOD": "PUT"}, + headers={"X-Auth-Token": "t"}) + put_req1.body = obj1 + + obj2 = u"versiĆ³n segundo".encode("utf-8") + put_req2 = Request.blank( + "/v1/a/ec-dup-crazytown/obj", + environ={"REQUEST_METHOD": "PUT"}, + headers={"X-Auth-Token": "t"}) + put_req2.body = obj2 + + # pyeclib has checks for unequal-length; we don't want to trip those + self.assertEqual(len(obj1), len(obj2)) + + # First subset of object server will have the first version of the + # object + prosrv._error_limiting = {} + with mock.patch.object(obj4srv, 'PUT', bad_disk), \ + mock.patch.object(obj5srv, 'PUT', bad_disk), \ + mock.patch.object(obj6srv, 'PUT', bad_disk), \ + mock.patch( + 'swift.common.storage_policy.ECStoragePolicy.quorum'): + type(ec_policy).quorum = mock.PropertyMock(return_value=3) + resp = put_req1.get_response(prosrv) + self.assertEqual(resp.status_int, 201) + + # Second subset will have the second version of the object. + prosrv._error_limiting = {} + with mock.patch.object(obj1srv, 'PUT', bad_disk), \ + mock.patch.object(obj2srv, 'PUT', bad_disk), \ + mock.patch.object(obj3srv, 'PUT', bad_disk), \ + mock.patch( + 'swift.common.storage_policy.ECStoragePolicy.quorum'): + type(ec_policy).quorum = mock.PropertyMock(return_value=3) + resp = put_req2.get_response(prosrv) + self.assertEqual(resp.status_int, 201) + + # A GET that only sees 1 fragment archive should fail + get_req = Request.blank("/v1/a/ec-dup-crazytown/obj", + environ={"REQUEST_METHOD": "GET"}, + headers={"X-Auth-Token": "t"}) + prosrv._error_limiting = {} + with mock.patch.object(obj2srv, 'GET', bad_disk), \ + mock.patch.object(obj3srv, 'GET', bad_disk), \ + mock.patch.object(obj4srv, 'GET', bad_disk), \ + mock.patch.object(obj5srv, 'GET', bad_disk), \ + mock.patch.object(obj6srv, 'GET', bad_disk): + resp = get_req.get_response(prosrv) + self.assertEqual(resp.status_int, 503) + + # A GET that sees 2 matching FAs will work + get_req = Request.blank("/v1/a/ec-dup-crazytown/obj", + environ={"REQUEST_METHOD": "GET"}, + headers={"X-Auth-Token": "t"}) + prosrv._error_limiting = {} + with mock.patch.object(obj1srv, 'GET', bad_disk), \ + mock.patch.object(obj2srv, 'GET', bad_disk), \ + mock.patch.object(obj3srv, 'GET', bad_disk), \ + mock.patch.object(obj6srv, 'GET', bad_disk): + resp = get_req.get_response(prosrv) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.body, obj2) + + # A GET that sees 2 mismatching FAs will fail + get_req = Request.blank("/v1/a/ec-dup-crazytown/obj", + environ={"REQUEST_METHOD": "GET"}, + headers={"X-Auth-Token": "t"}) + prosrv._error_limiting = {} + with mock.patch.object(obj2srv, 'GET', bad_disk), \ + mock.patch.object(obj3srv, 'GET', bad_disk), \ + mock.patch.object(obj4srv, 'GET', bad_disk), \ + mock.patch.object(obj6srv, 'GET', bad_disk): + resp = get_req.get_response(prosrv) + self.assertEqual(resp.status_int, 503) + class TestECGets(unittest.TestCase): def tearDown(self): @@ -5630,7 +5768,7 @@ class TestECGets(unittest.TestCase): hash_dir should be marked as durable or not. """ (prosrv, acc1srv, acc2srv, con1srv, con2srv, obj1srv, - obj2srv, obj3srv) = _test_servers + obj2srv, obj3srv, _obj4srv, _obj5srv, _obj6srv) = _test_servers ec_policy = POLICIES[3] container_name = uuid.uuid4().hex obj_name = uuid.uuid4().hex @@ -8612,7 +8750,7 @@ class TestSocketObjectVersions(unittest.TestCase): versions_to_create = 3 # Create a container for our versioned object testing (prolis, acc1lis, acc2lis, con1lis, con2lis, obj1lis, - obj2lis, obj3lis) = _test_sockets + obj2lis, obj3lis, obj4lis, obj5lis, obj6lis) = _test_sockets pre = quote('%03x' % len(o)) osub = '%s/sub' % o presub = quote('%03x' % len(osub))