From f8ce43a21891ae2cc00d0770895b556eea9c7845 Mon Sep 17 00:00:00 2001 From: Darrell Bishop Date: Sun, 5 Aug 2012 00:51:49 -0700 Subject: [PATCH] Use custom encoding for RingData, not pickle. Serialize RingData in a versioned, custom format which is a combination of a JSON-encoded header and .tostring() dumps of the replica2part2dev_id arrays. This format deserializes hundreds of times faster than rings serialized with Python 2.7's pickle (a significant performance regression for ring loading between Python 2.6 and Python 2.7). Fixes bug 1031954. swift.common.ring.ring.RingData is now responsible for serialization and deserialization of its data via a new load() class method and save() object method. The new implementation is backward-compatible; if a ring does not begin with a new-style magic string, it is assumed to be an old-style pickle-dumped ring and is handled as before. So new Swift code can read old rings, but old Swift code will not be able to read newly-serialized rings. THIS SHOULD BE MENTIONED PROMINENTLY IN THE RELEASE NOTES. I didn't want to bite of more than necessary, so I didn't mess with builder file serialization. Change-Id: I799b9a4c894d54fb16592443904ac055b2638e2d --- bin/swift-ring-builder | 15 ++--- swift/common/ring/builder.py | 6 +- swift/common/ring/ring.py | 97 +++++++++++++++++++++++------- test/unit/common/ring/test_ring.py | 65 +++++++++++++------- 4 files changed, 129 insertions(+), 54 deletions(-) diff --git a/bin/swift-ring-builder b/bin/swift-ring-builder index b13fa4b406..fb1649f430 100755 --- a/bin/swift-ring-builder +++ b/bin/swift-ring-builder @@ -610,13 +610,11 @@ swift-ring-builder rebalance print '-' * 79 status = EXIT_WARNING ts = time() - pickle.dump(builder.get_ring().to_dict(), - GzipFile(pathjoin(backup_dir, '%d.' % ts + - basename(ring_file)), 'wb'), protocol=2) + builder.get_ring().save( + pathjoin(backup_dir, '%d.' % ts + basename(ring_file))) pickle.dump(builder.to_dict(), open(pathjoin(backup_dir, '%d.' % ts + basename(argv[1])), 'wb'), protocol=2) - pickle.dump(builder.get_ring().to_dict(), GzipFile(ring_file, 'wb'), - protocol=2) + builder.get_ring().save(ring_file) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) exit(status) @@ -644,10 +642,9 @@ swift-ring-builder write_ring '"rebalance"?' else: print 'Warning: Writing an empty ring' - pickle.dump(ring_data.to_dict(), - GzipFile(pathjoin(backup_dir, '%d.' % time() + - basename(ring_file)), 'wb'), protocol=2) - pickle.dump(ring_data.to_dict(), GzipFile(ring_file, 'wb'), protocol=2) + ring_data.save( + pathjoin(backup_dir, '%d.' % time() + basename(ring_file))) + ring_data.save(ring_file) exit(EXIT_SUCCESS) def pretend_min_part_hours_passed(): diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index cd24942ae7..89ae9d2ec1 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -29,9 +29,9 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree class RingBuilder(object): """ - Used to build swift.common.RingData instances to be written to disk and - used with swift.common.ring.Ring instances. See bin/ring-builder.py for - example usage. + Used to build swift.common.ring.RingData instances to be written to disk + and used with swift.common.ring.Ring instances. See bin/swift-ring-builder + for example usage. The instance variable devs_changed indicates if the device information has changed since the last balancing. This can be used by tools to know whether diff --git a/swift/common/ring/ring.py b/swift/common/ring/ring.py index e7a1ea66fb..c2eccae3f8 100644 --- a/swift/common/ring/ring.py +++ b/swift/common/ring/ring.py @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import array import cPickle as pickle from collections import defaultdict from gzip import GzipFile -from os.path import getmtime -from struct import unpack_from +import json +from os.path import getmtime, join as pathjoin +import struct from time import time import os from io import BufferedReader @@ -34,6 +36,70 @@ class RingData(object): self._replica2part2dev_id = replica2part2dev_id self._part_shift = part_shift + @classmethod + def deserialize_v1(cls, gz_file): + json_len, = struct.unpack('!I', gz_file.read(4)) + ring_dict = json.loads(gz_file.read(json_len)) + ring_dict['replica2part2dev_id'] = [] + partition_count = 1 << (32 - ring_dict['part_shift']) + for x in xrange(ring_dict['replica_count']): + ring_dict['replica2part2dev_id'].append( + array.array('H', gz_file.read(2 * partition_count))) + return ring_dict + + @classmethod + def load(cls, filename): + """ + Load ring data from a file. + + :param filename: Path to a file serialized by the save() method. + :returns: A RingData instance containing the loaded data. + """ + gz_file = GzipFile(filename, 'rb') + # Python 2.6 GzipFile doesn't support BufferedIO + if hasattr(gz_file, '_checkReadable'): + gz_file = BufferedReader(gz_file) + + # See if the file is in the new format + magic = gz_file.read(4) + if magic == 'R1NG': + version, = struct.unpack('!H', gz_file.read(2)) + if version == 1: + ring_data = cls.deserialize_v1(gz_file) + else: + raise Exception('Unknown ring format version %d' % version) + else: + # Assume old-style pickled ring + gz_file.seek(0) + ring_data = pickle.load(gz_file) + if not hasattr(ring_data, 'devs'): + ring_data = RingData(ring_data['replica2part2dev_id'], + ring_data['devs'], ring_data['part_shift']) + return ring_data + + def serialize_v1(self, file_obj): + # Write out new-style serialization magic and version: + file_obj.write(struct.pack('!4sH', 'R1NG', 1)) + ring = self.to_dict() + json_text = json.dumps( + {'devs': ring['devs'], 'part_shift': ring['part_shift'], + 'replica_count': len(ring['replica2part2dev_id'])}) + json_len = len(json_text) + file_obj.write(struct.pack('!I', json_len)) + file_obj.write(json_text) + for part2dev_id in ring['replica2part2dev_id']: + file_obj.write(part2dev_id.tostring()) + + def save(self, filename): + """ + Serialize this RingData instance to disk. + + :param filename: File into which this instance should be serialized. + """ + gz_file = GzipFile(filename, 'wb') + self.serialize_v1(gz_file) + gz_file.close() + def to_dict(self): return {'devs': self.devs, 'replica2part2dev_id': self._replica2part2dev_id, @@ -44,43 +110,32 @@ class Ring(object): """ Partitioned consistent hashing ring. - :param pickle_gz_path: path to ring file + :param serialized_path: path to serialized RingData instance :param reload_time: time interval in seconds to check for a ring change """ - def __init__(self, pickle_gz_path, reload_time=15, ring_name=None): + def __init__(self, serialized_path, reload_time=15, ring_name=None): # can't use the ring unless HASH_PATH_SUFFIX is set validate_configuration() if ring_name: - self.pickle_gz_path = os.path.join(pickle_gz_path, + self.serialized_path = os.path.join(serialized_path, ring_name + '.ring.gz') else: - self.pickle_gz_path = os.path.join(pickle_gz_path) + self.serialized_path = os.path.join(serialized_path) self.reload_time = reload_time self._reload(force=True) def _reload(self, force=False): self._rtime = time() + self.reload_time if force or self.has_changed(): - ring_data = pickle.load(self._get_gz_file()) - if not hasattr(ring_data, 'devs'): - ring_data = RingData(ring_data['replica2part2dev_id'], - ring_data['devs'], ring_data['part_shift']) - self._mtime = getmtime(self.pickle_gz_path) + ring_data = RingData.load(self.serialized_path) + self._mtime = getmtime(self.serialized_path) self._devs = ring_data.devs self._replica2part2dev_id = ring_data._replica2part2dev_id self._part_shift = ring_data._part_shift self._rebuild_tier_data() - def _get_gz_file(self): - gz_file = GzipFile(self.pickle_gz_path, 'rb') - if hasattr(gz_file, '_checkReadable'): - return BufferedReader(gz_file) - else: - # Python 2.6 doesn't support BufferedIO - return gz_file - def _rebuild_tier_data(self): self.tier2devs = defaultdict(list) for dev in self._devs: @@ -121,7 +176,7 @@ class Ring(object): :returns: True if the ring on disk has changed, False otherwise """ - return getmtime(self.pickle_gz_path) != self._mtime + return getmtime(self.serialized_path) != self._mtime def get_part_nodes(self, part): """ @@ -172,7 +227,7 @@ class Ring(object): key = hash_path(account, container, obj, raw_digest=True) if time() > self._rtime: self._reload() - part = unpack_from('>I', key)[0] >> self._part_shift + part = struct.unpack_from('>I', key)[0] >> self._part_shift seen_ids = set() return part, [self._devs[r[part]] for r in self._replica2part2dev_id if not (r[part] in seen_ids or seen_ids.add(r[part]))] diff --git a/test/unit/common/ring/test_ring.py b/test/unit/common/ring/test_ring.py index efa1837d4a..9a37cc31b7 100644 --- a/test/unit/common/ring/test_ring.py +++ b/test/unit/common/ring/test_ring.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import array import cPickle as pickle import os import unittest @@ -25,6 +26,20 @@ from swift.common import ring, utils class TestRingData(unittest.TestCase): + def setUp(self): + self.testdir = os.path.join(os.path.dirname(__file__), 'ring_data') + rmtree(self.testdir, ignore_errors=1) + os.mkdir(self.testdir) + + def tearDown(self): + rmtree(self.testdir, ignore_errors=1) + + def assert_ring_data_equal(self, rd_expected, rd_got): + self.assertEquals(rd_expected._replica2part2dev_id, + rd_got._replica2part2dev_id) + self.assertEquals(rd_expected.devs, rd_got.devs) + self.assertEquals(rd_expected._part_shift, rd_got._part_shift) + def test_attrs(self): r2p2d = [[0, 1, 0, 1], [0, 1, 0, 1]] d = [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}] @@ -34,11 +49,23 @@ class TestRingData(unittest.TestCase): self.assertEquals(rd.devs, d) self.assertEquals(rd._part_shift, s) - def test_pickleable(self): + def test_can_load_pickled_ring_data(self): rd = ring.RingData([[0, 1, 0, 1], [0, 1, 0, 1]], [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) + ring_fname = os.path.join(self.testdir, 'foo.ring.gz') for p in xrange(pickle.HIGHEST_PROTOCOL): - pickle.loads(pickle.dumps(rd, protocol=p)) + pickle.dump(rd, GzipFile(ring_fname, 'wb'), protocol=p) + ring_data = ring.RingData.load(ring_fname) + self.assert_ring_data_equal(rd, ring_data) + + def test_roundtrip_serialization(self): + ring_fname = os.path.join(self.testdir, 'foo.ring.gz') + rd = ring.RingData( + [array.array('H', [0, 1, 0, 1]), array.array('H',[0, 1, 0, 1])], + [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) + rd.save(ring_fname) + rd2 = ring.RingData.load(ring_fname) + self.assert_ring_data_equal(rd, rd2) class TestRing(unittest.TestCase): @@ -49,9 +76,10 @@ class TestRing(unittest.TestCase): rmtree(self.testdir, ignore_errors=1) os.mkdir(self.testdir) self.testgz = os.path.join(self.testdir, 'whatever.ring.gz') - self.intended_replica2part2dev_id = [[0, 1, 0, 1], - [0, 1, 0, 1], - [3, 4, 3, 4]] + self.intended_replica2part2dev_id = [ + array.array('H', [0, 1, 0, 1]), + array.array('H', [0, 1, 0, 1]), + array.array('H', [3, 4, 3, 4])] self.intended_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, 'ip': '10.1.1.1', 'port': 6000}, {'id': 1, 'zone': 0, 'weight': 1.0, @@ -63,9 +91,8 @@ class TestRing(unittest.TestCase): 'ip': '10.1.2.2', 'port': 6000}] self.intended_part_shift = 30 self.intended_reload_time = 15 - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) self.ring = ring.Ring(self.testdir, reload_time=self.intended_reload_time, ring_name='whatever') @@ -78,7 +105,7 @@ class TestRing(unittest.TestCase): self.assertEquals(self.ring._part_shift, self.intended_part_shift) self.assertEquals(self.ring.devs, self.intended_devs) self.assertEquals(self.ring.reload_time, self.intended_reload_time) - self.assertEquals(self.ring.pickle_gz_path, self.testgz) + self.assertEquals(self.ring.serialized_path, self.testgz) # test invalid endcap _orig_hash_path_suffix = utils.HASH_PATH_SUFFIX try: @@ -99,9 +126,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 5) self.intended_devs.append({'id': 3, 'zone': 3, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_nodes('a') self.assertEquals(len(self.ring.devs), 6) @@ -113,9 +139,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 6) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_part_nodes(0) self.assertEquals(len(self.ring.devs), 7) @@ -128,9 +153,8 @@ class TestRing(unittest.TestCase): part, nodes = self.ring.get_nodes('a') self.assertEquals(len(self.ring.devs), 7) self.intended_devs.append({'id': 6, 'zone': 5, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.ring.get_more_nodes(part).next() self.assertEquals(len(self.ring.devs), 8) @@ -142,9 +166,8 @@ class TestRing(unittest.TestCase): orig_mtime = self.ring._mtime self.assertEquals(len(self.ring.devs), 8) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) - pickle.dump(ring.RingData(self.intended_replica2part2dev_id, - self.intended_devs, self.intended_part_shift), - GzipFile(self.testgz, 'wb')) + ring.RingData(self.intended_replica2part2dev_id, + self.intended_devs, self.intended_part_shift).save(self.testgz) sleep(0.1) self.assertEquals(len(self.ring.devs), 9) self.assertNotEquals(self.ring._mtime, orig_mtime)