diff --git a/bin/swift-ring-builder b/bin/swift-ring-builder index b13fa4b406..fb1649f430 100755 --- a/bin/swift-ring-builder +++ b/bin/swift-ring-builder @@ -610,13 +610,11 @@ swift-ring-builder rebalance print '-' * 79 status = EXIT_WARNING ts = time() - pickle.dump(builder.get_ring().to_dict(), - GzipFile(pathjoin(backup_dir, '%d.' % ts + - basename(ring_file)), 'wb'), protocol=2) + builder.get_ring().save( + pathjoin(backup_dir, '%d.' % ts + basename(ring_file))) pickle.dump(builder.to_dict(), open(pathjoin(backup_dir, '%d.' % ts + basename(argv[1])), 'wb'), protocol=2) - pickle.dump(builder.get_ring().to_dict(), GzipFile(ring_file, 'wb'), - protocol=2) + builder.get_ring().save(ring_file) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(status) @@ -644,10 +642,9 @@ swift-ring-builder write_ring '"rebalance"?' else: print 'Warning: Writing an empty ring' - pickle.dump(ring_data.to_dict(), - GzipFile(pathjoin(backup_dir, '%d.' % time() + - basename(ring_file)), 'wb'), protocol=2) - pickle.dump(ring_data.to_dict(), GzipFile(ring_file, 'wb'), protocol=2) + ring_data.save( + pathjoin(backup_dir, '%d.' % time() + basename(ring_file))) + ring_data.save(ring_file) exit(EXIT_SUCCESS) def pretend_min_part_hours_passed(): diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index cd24942ae7..89ae9d2ec1 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -29,9 +29,9 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree class RingBuilder(object): """ - Used to build swift.common.RingData instances to be written to disk and - used with swift.common.ring.Ring instances. See bin/ring-builder.py for - example usage. + Used to build swift.common.ring.RingData instances to be written to disk + and used with swift.common.ring.Ring instances. See bin/swift-ring-builder + for example usage. The instance variable devs_changed indicates if the device information has changed since the last balancing. This can be used by tools to know whether diff --git a/swift/common/ring/ring.py b/swift/common/ring/ring.py index e7a1ea66fb..c2eccae3f8 100644 --- a/swift/common/ring/ring.py +++ b/swift/common/ring/ring.py @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import array import cPickle as pickle from collections import defaultdict from gzip import GzipFile -from os.path import getmtime -from struct import unpack_from +import json +from os.path import getmtime, join as pathjoin +import struct from time import time import os from io import BufferedReader @@ -34,6 +36,70 @@ class RingData(object): self._replica2part2dev_id = replica2part2dev_id self._part_shift = part_shift + @classmethod + def deserialize_v1(cls, gz_file): + json_len, = struct.unpack('!I', gz_file.read(4)) + ring_dict = json.loads(gz_file.read(json_len)) + ring_dict['replica2part2dev_id'] = [] + partition_count = 1 << (32 - ring_dict['part_shift']) + for x in xrange(ring_dict['replica_count']): + ring_dict['replica2part2dev_id'].append( + array.array('H', gz_file.read(2 * partition_count))) + return ring_dict + + @classmethod + def load(cls, filename): + """ + Load ring data from a file. + + :param filename: Path to a file serialized by the save() method. + :returns: A RingData instance containing the loaded data. + """ + gz_file = GzipFile(filename, 'rb') + # Python 2.6 GzipFile doesn't support BufferedIO + if hasattr(gz_file, '_checkReadable'): + gz_file = BufferedReader(gz_file) + + # See if the file is in the new format + magic = gz_file.read(4) + if magic == 'R1NG': + version, = struct.unpack('!H', gz_file.read(2)) + if version == 1: + ring_data = cls.deserialize_v1(gz_file) + else: + raise Exception('Unknown ring format version %d' % version) + else: + # Assume old-style pickled ring + gz_file.seek(0) + ring_data = pickle.load(gz_file) + if not hasattr(ring_data, 'devs'): + ring_data = RingData(ring_data['replica2part2dev_id'], + ring_data['devs'], ring_data['part_shift']) + return ring_data + + def serialize_v1(self, file_obj): + # Write out new-style serialization magic and version: + file_obj.write(struct.pack('!4sH', 'R1NG', 1)) + ring = self.to_dict() + json_text = json.dumps( + {'devs': ring['devs'], 'part_shift': ring['part_shift'], + 'replica_count': len(ring['replica2part2dev_id'])}) + json_len = len(json_text) + file_obj.write(struct.pack('!I', json_len)) + file_obj.write(json_text) + for part2dev_id in ring['replica2part2dev_id']: + file_obj.write(part2dev_id.tostring()) + + def save(self, filename): + """ + Serialize this RingData instance to disk. + + :param filename: File into which this instance should be serialized. + """ + gz_file = GzipFile(filename, 'wb') + self.serialize_v1(gz_file) + gz_file.close() + def to_dict(self): return {'devs': self.devs, 'replica2part2dev_id': self._replica2part2dev_id, @@ -44,43 +110,32 @@ class Ring(object): """ Partitioned consistent hashing ring. - :param pickle_gz_path: path to ring file + :param serialized_path: path to serialized RingData instance :param reload_time: time interval in seconds to check for a ring change """ - def __init__(self, pickle_gz_path, reload_time=15, ring_name=None): + def __init__(self, serialized_path, reload_time=15, ring_name=None): # can't use the ring unless HASH_PATH_SUFFIX is set validate_configuration() if ring_name: - self.pickle_gz_path = os.path.join(pickle_gz_path, + self.serialized_path = os.path.join(serialized_path, ring_name + '.ring.gz') else: - self.pickle_gz_path = os.path.join(pickle_gz_path) + self.serialized_path = os.path.join(serialized_path) self.reload_time = reload_time self._reload(force=True) def _reload(self, force=False): self._rtime = time() + self.reload_time if force or self.has_changed(): - ring_data = pickle.load(self._get_gz_file()) - if not hasattr(ring_data, 'devs'): - ring_data = RingData(ring_data['replica2part2dev_id'], - ring_data['devs'], ring_data['part_shift']) - self._mtime = getmtime(self.pickle_gz_path) + ring_data = RingData.load(self.serialized_path) + self._mtime = getmtime(self.serialized_path) self._devs = ring_data.devs self._replica2part2dev_id = ring_data._replica2part2dev_id self._part_shift = ring_data._part_shift self._rebuild_tier_data() - def _get_gz_file(self): - gz_file = GzipFile(self.pickle_gz_path, 'rb') - if hasattr(gz_file, '_checkReadable'): - return BufferedReader(gz_file) - else: - # Python 2.6 doesn't support BufferedIO - return gz_file - def _rebuild_tier_data(self): self.tier2devs = defaultdict(list) for dev in self._devs: @@ -121,7 +176,7 @@ class Ring(object): :returns: True if the ring on disk has changed, False otherwise """ - return getmtime(self.pickle_gz_path) != self._mtime + return getmtime(self.serialized_path) != self._mtime def get_part_nodes(self, part): """ @@ -172,7 +227,7 @@ class Ring(object): key = hash_path(account, container, obj, raw_digest=True) if time() > self._rtime: self._reload() - part = unpack_from('>I', key)[0] >> self._part_shift + part = struct.unpack_from('>I', key)[0] >> self._part_shift seen_ids = set() return part, [self._devs[r[part]] for r in self._replica2part2dev_id if not (r[part] in seen_ids or seen_ids.add(r[part]))] diff --git a/test/unit/common/ring/test_ring.py b/test/unit/common/ring/test_ring.py index efa1837d4a..9a37cc31b7 100644 --- a/test/unit/common/ring/test_ring.py +++ b/test/unit/common/ring/test_ring.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import array import cPickle as pickle import os import unittest @@ -25,6 +26,20 @@ from swift.common import ring, utils class TestRingData(unittest.TestCase): + def setUp(self): + self.testdir = os.path.join(os.path.dirname(__file__), 'ring_data') + rmtree(self.testdir, ignore_errors=1) + os.mkdir(self.testdir) + + def tearDown(self): + rmtree(self.testdir, ignore_errors=1) + + def assert_ring_data_equal(self, rd_expected, rd_got): + self.assertEquals(rd_expected._replica2part2dev_id, + rd_got._replica2part2dev_id) + self.assertEquals(rd_expected.devs, rd_got.devs) + self.assertEquals(rd_expected._part_shift, rd_got._part_shift) + def test_attrs(self): r2p2d = [[0, 1, 0, 1], [0, 1, 0, 1]] d = [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}] @@ -34,11 +49,23 @@ class TestRingData(unittest.TestCase): self.assertEquals(rd.devs, d) self.assertEquals(rd._part_shift, s) - def test_pickleable(self): + def test_can_load_pickled_ring_data(self): rd = ring.RingData([[0, 1, 0, 1], [0, 1, 0, 1]], [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) + ring_fname = os.path.join(self.testdir, 'foo.ring.gz') for p in xrange(pickle.HIGHEST_PROTOCOL): - pickle.loads(pickle.dumps(rd, protocol=p)) + pickle.dump(rd, GzipFile(ring_fname, 'wb'), protocol=p) + ring_data = ring.RingData.load(ring_fname) + self.assert_ring_data_equal(rd, ring_data) + + def test_roundtrip_serialization(self): + ring_fname = os.path.join(self.testdir, 'foo.ring.gz') + rd = ring.RingData( + [array.array('H', [0, 1, 0, 1]), array.array('H',[0, 1, 0, 1])], + [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) + rd.save(ring_fname) + rd2 = ring.RingData.load(ring_fname) + self.assert_ring_data_equal(rd, rd2) class TestRing(unittest.TestCase): @@ -49,9 +76,10 @@ class TestRing(unittest.TestCase): rmtree(self.testdir, ignore_errors=1) os.mkdir(self.testdir) self.testgz = os.path.join(self.testdir, 'whatever.ring.gz') - self.intended_replica2part2dev_id = [[0, 1, 0, 1], - [0, 1, 0, 1], - [3, 4, 3, 4]] + self.intended_replica2part2dev_id = [ + array.array('H', [0, 1, 0, 1]), + array.array('H', [0, 1, 0, 1]), + array.array('H', [3, 4, 3, 4])] self.intended_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, 'ip': '10.1.1.1', 'port': 6000}, {'id': 1, 'zone': 0, 'weight': 1.0, @@ -63,9 +91,8 @@ class TestRing(unittest.TestCase): 'ip': '10.1.2.2', 'port': 6000}] self.intended_part_shift = 30 self.intended_reload_time = 15 - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) self.ring = ring.Ring(self.testdir, reload_time=self.intended_reload_time, ring_name='whatever') @@ -78,7 +105,7 @@ class TestRing(unittest.TestCase): self.assertEquals(self.ring._part_shift, self.intended_part_shift) self.assertEquals(self.ring.devs, self.intended_devs) self.assertEquals(self.ring.reload_time, self.intended_reload_time) - self.assertEquals(self.ring.pickle_gz_path, self.testgz) + self.assertEquals(self.ring.serialized_path, self.testgz) # test invalid endcap _orig_hash_path_suffix = utils.HASH_PATH_SUFFIX try: @@ -99,9 +126,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 5) self.intended_devs.append({'id': 3, 'zone': 3, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_nodes('a') self.assertEquals(len(self.ring.devs), 6) @@ -113,9 +139,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 6) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_part_nodes(0) self.assertEquals(len(self.ring.devs), 7) @@ -128,9 +153,8 @@ class TestRing(unittest.TestCase): part, nodes = self.ring.get_nodes('a') self.assertEquals(len(self.ring.devs), 7) self.intended_devs.append({'id': 6, 'zone': 5, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_more_nodes(part).next() self.assertEquals(len(self.ring.devs), 8) @@ -142,9 +166,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 8) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.assertEquals(len(self.ring.devs), 9) self.assertNotEquals(self.ring._mtime, orig_mtime)