#!/usr/bin/python -u # Copyright (c) 2010-2012 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import main from uuid import uuid4 import random from hashlib import md5 from collections import defaultdict import os from swiftclient import client from swift.common import direct_client from swift.common.exceptions import ClientException from swift.common.manager import Manager from test.probe.common import (kill_server, start_server, ReplProbeTest, ECProbeTest, Body) class TestObjectHandoff(ReplProbeTest): def test_main(self): # Create container container = 'container-%s' % uuid4() client.put_container(self.url, self.token, container, headers={'X-Storage-Policy': self.policy.name}) # Kill one container/obj primary server cpart, cnodes = self.container_ring.get_nodes(self.account, container) cnode = cnodes[0] obj = 'object-%s' % uuid4() opart, onodes = self.object_ring.get_nodes( self.account, container, obj) onode = onodes[0] kill_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Create container/obj (goes to two primary servers and one handoff) client.put_object(self.url, self.token, container, obj, 'VERIFY') odata = client.get_object(self.url, self.token, container, obj)[-1] if odata != 'VERIFY': raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) # Kill other two container/obj primary servers # to ensure GET handoff works for node in onodes[1:]: kill_server((node['ip'], node['port']), self.ipport2server, self.pids) # Indirectly through proxy assert we can get container/obj odata = client.get_object(self.url, self.token, container, obj)[-1] if odata != 'VERIFY': raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) # Restart those other two container/obj primary servers for node in onodes[1:]: start_server((node['ip'], node['port']), self.ipport2server, self.pids) # We've indirectly verified the handoff node has the container/object, # but let's directly verify it. another_onode = next(self.object_ring.get_more_nodes(opart)) odata = direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] if odata != 'VERIFY': raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) # drop a tempfile in the handoff's datadir, like it might have # had if there was an rsync failure while it was previously a # primary handoff_device_path = self.device_dir('object', another_onode) data_filename = None for root, dirs, files in os.walk(handoff_device_path): for filename in files: if filename.endswith('.data'): data_filename = filename temp_filename = '.%s.6MbL6r' % data_filename temp_filepath = os.path.join(root, temp_filename) if not data_filename: self.fail('Did not find any data files on %r' % handoff_device_path) open(temp_filepath, 'w') # Assert container listing (via proxy and directly) has container/obj objs = [o['name'] for o in client.get_container(self.url, self.token, container)[1]] if obj not in objs: raise Exception('Container listing did not know about object') for cnode in cnodes: objs = [o['name'] for o in direct_client.direct_get_container( cnode, cpart, self.account, container)[1]] if obj not in objs: raise Exception( 'Container server %s:%s did not know about object' % (cnode['ip'], cnode['port'])) # Bring the first container/obj primary server back up start_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Assert that it doesn't have container/obj yet try: direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Run object replication, ensuring we run the handoff node last so it # will remove its extra handoff partition for node in onodes: try: port_num = node['replication_port'] except KeyError: port_num = node['port'] node_id = (port_num - 6000) / 10 Manager(['object-replicator']).once(number=node_id) try: another_port_num = another_onode['replication_port'] except KeyError: another_port_num = another_onode['port'] another_num = (another_port_num - 6000) / 10 Manager(['object-replicator']).once(number=another_num) # Assert the first container/obj primary server now has container/obj odata = direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] if odata != 'VERIFY': raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) # and that it does *not* have a temporary rsync dropping! found_data_filename = False primary_device_path = self.device_dir('object', onode) for root, dirs, files in os.walk(primary_device_path): for filename in files: if filename.endswith('.6MbL6r'): self.fail('Found unexpected file %s' % os.path.join(root, filename)) if filename == data_filename: found_data_filename = True self.assertTrue(found_data_filename, 'Did not find data file %r on %r' % ( data_filename, primary_device_path)) # Assert the handoff server no longer has container/obj try: direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Kill the first container/obj primary server again (we have two # primaries and the handoff up now) kill_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Delete container/obj try: client.delete_object(self.url, self.token, container, obj) except client.ClientException as err: if self.object_ring.replica_count > 2: raise # Object DELETE returning 503 for (404, 204) # remove this with fix for # https://bugs.launchpad.net/swift/+bug/1318375 self.assertEqual(503, err.http_status) # Assert we can't head container/obj try: client.head_object(self.url, self.token, container, obj) except client.ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Assert container/obj is not in the container listing, both indirectly # and directly objs = [o['name'] for o in client.get_container(self.url, self.token, container)[1]] if obj in objs: raise Exception('Container listing still knew about object') for cnode in cnodes: objs = [o['name'] for o in direct_client.direct_get_container( cnode, cpart, self.account, container)[1]] if obj in objs: raise Exception( 'Container server %s:%s still knew about object' % (cnode['ip'], cnode['port'])) # Restart the first container/obj primary server again start_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Assert it still has container/obj direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) # Run object replication, ensuring we run the handoff node last so it # will remove its extra handoff partition for node in onodes: try: port_num = node['replication_port'] except KeyError: port_num = node['port'] node_id = (port_num - 6000) / 10 Manager(['object-replicator']).once(number=node_id) another_node_id = (another_port_num - 6000) / 10 Manager(['object-replicator']).once(number=another_node_id) # Assert primary node no longer has container/obj try: direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") class TestECObjectHandoffOverwrite(ECProbeTest): def get_object(self, container_name, object_name): headers, body = client.get_object(self.url, self.token, container_name, object_name, resp_chunk_size=64 * 2 ** 10) resp_checksum = md5() for chunk in body: resp_checksum.update(chunk) return resp_checksum.hexdigest() def test_ec_handoff_overwrite(self): container_name = 'container-%s' % uuid4() object_name = 'object-%s' % uuid4() # create EC container headers = {'X-Storage-Policy': self.policy.name} client.put_container(self.url, self.token, container_name, headers=headers) # PUT object old_contents = Body() client.put_object(self.url, self.token, container_name, object_name, contents=old_contents) # get our node lists opart, onodes = self.object_ring.get_nodes( self.account, container_name, object_name) # shutdown one of the primary data nodes failed_primary = random.choice(onodes) failed_primary_device_path = self.device_dir('object', failed_primary) self.kill_drive(failed_primary_device_path) # overwrite our object with some new data new_contents = Body() client.put_object(self.url, self.token, container_name, object_name, contents=new_contents) self.assertNotEqual(new_contents.etag, old_contents.etag) # restore failed primary device self.revive_drive(failed_primary_device_path) # sanity - failed node has old contents req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)} headers = direct_client.direct_head_object( failed_primary, opart, self.account, container_name, object_name, headers=req_headers) self.assertEqual(headers['X-Object-Sysmeta-EC-Etag'], old_contents.etag) # we have 1 primary with wrong old etag, and we should have 5 with # new etag plus a handoff with the new etag, so killing 2 other # primaries forces proxy to try to GET from all primaries plus handoff. other_nodes = [n for n in onodes if n != failed_primary] random.shuffle(other_nodes) for node in other_nodes[:2]: self.kill_drive(self.device_dir('object', node)) # sanity, after taking out two primaries we should be down to # only four primaries, one of which has the old etag - but we # also have a handoff with the new etag out there found_frags = defaultdict(int) req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)} for node in onodes + list(self.object_ring.get_more_nodes(opart)): try: headers = direct_client.direct_head_object( node, opart, self.account, container_name, object_name, headers=req_headers) except Exception: continue found_frags[headers['X-Object-Sysmeta-EC-Etag']] += 1 self.assertEqual(found_frags, { new_contents.etag: 4, # this should be enough to rebuild! old_contents.etag: 1, }) # clear node error limiting Manager(['proxy']).restart() resp_etag = self.get_object(container_name, object_name) self.assertEqual(resp_etag, new_contents.etag) if __name__ == '__main__': main()