#!/usr/bin/python -u # Copyright (c) 2010-2012 OpenStack Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest import main from uuid import uuid4 import random from hashlib import md5 from collections import defaultdict from swiftclient import client from swift.common import direct_client from swift.common.exceptions import ClientException from swift.common.manager import Manager from test.probe.common import (kill_server, start_server, ReplProbeTest, ECProbeTest, Body) class TestObjectHandoff(ReplProbeTest): def test_main(self): # Create container container = 'container-%s' % uuid4() client.put_container(self.url, self.token, container, headers={'X-Storage-Policy': self.policy.name}) # Kill one container/obj primary server cpart, cnodes = self.container_ring.get_nodes(self.account, container) cnode = cnodes[0] obj = 'object-%s' % uuid4() opart, onodes = self.object_ring.get_nodes( self.account, container, obj) onode = onodes[0] kill_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Create container/obj (goes to two primary servers and one handoff) client.put_object(self.url, self.token, container, obj, 'VERIFY') odata = client.get_object(self.url, self.token, container, obj)[-1] if odata != 'VERIFY': raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) # Kill other two container/obj primary servers # to ensure GET handoff works for node in onodes[1:]: kill_server((node['ip'], node['port']), self.ipport2server, self.pids) # Indirectly through proxy assert we can get container/obj odata = client.get_object(self.url, self.token, container, obj)[-1] if odata != 'VERIFY': raise Exception('Object GET did not return VERIFY, instead it ' 'returned: %s' % repr(odata)) # Restart those other two container/obj primary servers for node in onodes[1:]: start_server((node['ip'], node['port']), self.ipport2server, self.pids) # We've indirectly verified the handoff node has the container/object, # but let's directly verify it. another_onode = next(self.object_ring.get_more_nodes(opart)) odata = direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] if odata != 'VERIFY': raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) # Assert container listing (via proxy and directly) has container/obj objs = [o['name'] for o in client.get_container(self.url, self.token, container)[1]] if obj not in objs: raise Exception('Container listing did not know about object') for cnode in cnodes: objs = [o['name'] for o in direct_client.direct_get_container( cnode, cpart, self.account, container)[1]] if obj not in objs: raise Exception( 'Container server %s:%s did not know about object' % (cnode['ip'], cnode['port'])) # Bring the first container/obj primary server back up start_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Assert that it doesn't have container/obj yet try: direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Run object replication, ensuring we run the handoff node last so it # will remove its extra handoff partition for node in onodes: try: port_num = node['replication_port'] except KeyError: port_num = node['port'] node_id = (port_num - 6000) / 10 Manager(['object-replicator']).once(number=node_id) try: another_port_num = another_onode['replication_port'] except KeyError: another_port_num = another_onode['port'] another_num = (another_port_num - 6000) / 10 Manager(['object-replicator']).once(number=another_num) # Assert the first container/obj primary server now has container/obj odata = direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx})[-1] if odata != 'VERIFY': raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) # Assert the handoff server no longer has container/obj try: direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Kill the first container/obj primary server again (we have two # primaries and the handoff up now) kill_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Delete container/obj try: client.delete_object(self.url, self.token, container, obj) except client.ClientException as err: if self.object_ring.replica_count > 2: raise # Object DELETE returning 503 for (404, 204) # remove this with fix for # https://bugs.launchpad.net/swift/+bug/1318375 self.assertEqual(503, err.http_status) # Assert we can't head container/obj try: client.head_object(self.url, self.token, container, obj) except client.ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") # Assert container/obj is not in the container listing, both indirectly # and directly objs = [o['name'] for o in client.get_container(self.url, self.token, container)[1]] if obj in objs: raise Exception('Container listing still knew about object') for cnode in cnodes: objs = [o['name'] for o in direct_client.direct_get_container( cnode, cpart, self.account, container)[1]] if obj in objs: raise Exception( 'Container server %s:%s still knew about object' % (cnode['ip'], cnode['port'])) # Restart the first container/obj primary server again start_server((onode['ip'], onode['port']), self.ipport2server, self.pids) # Assert it still has container/obj direct_client.direct_get_object( onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) # Run object replication, ensuring we run the handoff node last so it # will remove its extra handoff partition for node in onodes: try: port_num = node['replication_port'] except KeyError: port_num = node['port'] node_id = (port_num - 6000) / 10 Manager(['object-replicator']).once(number=node_id) another_node_id = (another_port_num - 6000) / 10 Manager(['object-replicator']).once(number=another_node_id) # Assert primary node no longer has container/obj try: direct_client.direct_get_object( another_onode, opart, self.account, container, obj, headers={ 'X-Backend-Storage-Policy-Index': self.policy.idx}) except ClientException as err: self.assertEqual(err.http_status, 404) else: self.fail("Expected ClientException but didn't get it") class TestECObjectHandoffOverwrite(ECProbeTest): def get_object(self, container_name, object_name): headers, body = client.get_object(self.url, self.token, container_name, object_name, resp_chunk_size=64 * 2 ** 10) resp_checksum = md5() for chunk in body: resp_checksum.update(chunk) return resp_checksum.hexdigest() def test_ec_handoff_overwrite(self): container_name = 'container-%s' % uuid4() object_name = 'object-%s' % uuid4() # create EC container headers = {'X-Storage-Policy': self.policy.name} client.put_container(self.url, self.token, container_name, headers=headers) # PUT object old_contents = Body() client.put_object(self.url, self.token, container_name, object_name, contents=old_contents) # get our node lists opart, onodes = self.object_ring.get_nodes( self.account, container_name, object_name) # shutdown one of the primary data nodes failed_primary = random.choice(onodes) failed_primary_device_path = self.device_dir('object', failed_primary) self.kill_drive(failed_primary_device_path) # overwrite our object with some new data new_contents = Body() client.put_object(self.url, self.token, container_name, object_name, contents=new_contents) self.assertNotEqual(new_contents.etag, old_contents.etag) # restore failed primary device self.revive_drive(failed_primary_device_path) # sanity - failed node has old contents req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)} headers = direct_client.direct_head_object( failed_primary, opart, self.account, container_name, object_name, headers=req_headers) self.assertEqual(headers['X-Object-Sysmeta-EC-Etag'], old_contents.etag) # we have 1 primary with wrong old etag, and we should have 5 with # new etag plus a handoff with the new etag, so killing 2 other # primaries forces proxy to try to GET from all primaries plus handoff. other_nodes = [n for n in onodes if n != failed_primary] random.shuffle(other_nodes) for node in other_nodes[:2]: self.kill_drive(self.device_dir('object', node)) # sanity, after taking out two primaries we should be down to # only four primaries, one of which has the old etag - but we # also have a handoff with the new etag out there found_frags = defaultdict(int) req_headers = {'X-Backend-Storage-Policy-Index': int(self.policy)} for node in onodes + list(self.object_ring.get_more_nodes(opart)): try: headers = direct_client.direct_head_object( node, opart, self.account, container_name, object_name, headers=req_headers) except Exception: continue found_frags[headers['X-Object-Sysmeta-EC-Etag']] += 1 self.assertEqual(found_frags, { new_contents.etag: 4, # this should be enough to rebuild! old_contents.etag: 1, }) # clear node error limiting Manager(['proxy']).restart() resp_etag = self.get_object(container_name, object_name) self.assertEqual(resp_etag, new_contents.etag) if __name__ == '__main__': main()