Use custom encoding for RingData, not pickle.

Serialize RingData in a versioned, custom format which is a combination
of a JSON-encoded header and .tostring() dumps of the
replica2part2dev_id arrays.  This format deserializes hundreds of times
faster than rings serialized with Python 2.7's pickle (a significant
performance regression for ring loading between Python 2.6 and Python
2.7).  Fixes bug 1031954.

swift.common.ring.ring.RingData is now responsible for serialization and
deserialization of its data via a new load() class method and save()
object method.  The new implementation is backward-compatible; if a ring
does not begin with a new-style magic string, it is assumed to be an
old-style pickle-dumped ring and is handled as before.  So new Swift
code can read old rings, but old Swift code will not be able to read
newly-serialized rings. THIS SHOULD BE MENTIONED PROMINENTLY IN THE
RELEASE NOTES.

I didn't want to bite of more than necessary, so I didn't mess with
builder file serialization.

Change-Id: I799b9a4c894d54fb16592443904ac055b2638e2d
This commit is contained in:
Darrell Bishop 2012-08-05 00:51:49 -07:00
parent ef3f8bb335
commit f8ce43a218
4 changed files with 129 additions and 54 deletions

View File

@ -610,13 +610,11 @@ swift-ring-builder <builder_file> rebalance
print '-' * 79 print '-' * 79
status = EXIT_WARNING status = EXIT_WARNING
ts = time() ts = time()
pickle.dump(builder.get_ring().to_dict(), builder.get_ring().save(
GzipFile(pathjoin(backup_dir, '%d.' % ts + pathjoin(backup_dir, '%d.' % ts + basename(ring_file)))
basename(ring_file)), 'wb'), protocol=2)
pickle.dump(builder.to_dict(), open(pathjoin(backup_dir, pickle.dump(builder.to_dict(), open(pathjoin(backup_dir,
'%d.' % ts + basename(argv[1])), 'wb'), protocol=2) '%d.' % ts + basename(argv[1])), 'wb'), protocol=2)
pickle.dump(builder.get_ring().to_dict(), GzipFile(ring_file, 'wb'), builder.get_ring().save(ring_file)
protocol=2)
pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2) pickle.dump(builder.to_dict(), open(argv[1], 'wb'), protocol=2)
exit(status) exit(status)
@ -644,10 +642,9 @@ swift-ring-builder <builder_file> write_ring
'"rebalance"?' '"rebalance"?'
else: else:
print 'Warning: Writing an empty ring' print 'Warning: Writing an empty ring'
pickle.dump(ring_data.to_dict(), ring_data.save(
GzipFile(pathjoin(backup_dir, '%d.' % time() + pathjoin(backup_dir, '%d.' % time() + basename(ring_file)))
basename(ring_file)), 'wb'), protocol=2) ring_data.save(ring_file)
pickle.dump(ring_data.to_dict(), GzipFile(ring_file, 'wb'), protocol=2)
exit(EXIT_SUCCESS) exit(EXIT_SUCCESS)
def pretend_min_part_hours_passed(): def pretend_min_part_hours_passed():

View File

@ -29,9 +29,9 @@ from swift.common.ring.utils import tiers_for_dev, build_tier_tree
class RingBuilder(object): class RingBuilder(object):
""" """
Used to build swift.common.RingData instances to be written to disk and Used to build swift.common.ring.RingData instances to be written to disk
used with swift.common.ring.Ring instances. See bin/ring-builder.py for and used with swift.common.ring.Ring instances. See bin/swift-ring-builder
example usage. for example usage.
The instance variable devs_changed indicates if the device information has The instance variable devs_changed indicates if the device information has
changed since the last balancing. This can be used by tools to know whether changed since the last balancing. This can be used by tools to know whether

View File

@ -13,11 +13,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import array
import cPickle as pickle import cPickle as pickle
from collections import defaultdict from collections import defaultdict
from gzip import GzipFile from gzip import GzipFile
from os.path import getmtime import json
from struct import unpack_from from os.path import getmtime, join as pathjoin
import struct
from time import time from time import time
import os import os
from io import BufferedReader from io import BufferedReader
@ -34,6 +36,70 @@ class RingData(object):
self._replica2part2dev_id = replica2part2dev_id self._replica2part2dev_id = replica2part2dev_id
self._part_shift = part_shift self._part_shift = part_shift
@classmethod
def deserialize_v1(cls, gz_file):
json_len, = struct.unpack('!I', gz_file.read(4))
ring_dict = json.loads(gz_file.read(json_len))
ring_dict['replica2part2dev_id'] = []
partition_count = 1 << (32 - ring_dict['part_shift'])
for x in xrange(ring_dict['replica_count']):
ring_dict['replica2part2dev_id'].append(
array.array('H', gz_file.read(2 * partition_count)))
return ring_dict
@classmethod
def load(cls, filename):
"""
Load ring data from a file.
:param filename: Path to a file serialized by the save() method.
:returns: A RingData instance containing the loaded data.
"""
gz_file = GzipFile(filename, 'rb')
# Python 2.6 GzipFile doesn't support BufferedIO
if hasattr(gz_file, '_checkReadable'):
gz_file = BufferedReader(gz_file)
# See if the file is in the new format
magic = gz_file.read(4)
if magic == 'R1NG':
version, = struct.unpack('!H', gz_file.read(2))
if version == 1:
ring_data = cls.deserialize_v1(gz_file)
else:
raise Exception('Unknown ring format version %d' % version)
else:
# Assume old-style pickled ring
gz_file.seek(0)
ring_data = pickle.load(gz_file)
if not hasattr(ring_data, 'devs'):
ring_data = RingData(ring_data['replica2part2dev_id'],
ring_data['devs'], ring_data['part_shift'])
return ring_data
def serialize_v1(self, file_obj):
# Write out new-style serialization magic and version:
file_obj.write(struct.pack('!4sH', 'R1NG', 1))
ring = self.to_dict()
json_text = json.dumps(
{'devs': ring['devs'], 'part_shift': ring['part_shift'],
'replica_count': len(ring['replica2part2dev_id'])})
json_len = len(json_text)
file_obj.write(struct.pack('!I', json_len))
file_obj.write(json_text)
for part2dev_id in ring['replica2part2dev_id']:
file_obj.write(part2dev_id.tostring())
def save(self, filename):
"""
Serialize this RingData instance to disk.
:param filename: File into which this instance should be serialized.
"""
gz_file = GzipFile(filename, 'wb')
self.serialize_v1(gz_file)
gz_file.close()
def to_dict(self): def to_dict(self):
return {'devs': self.devs, return {'devs': self.devs,
'replica2part2dev_id': self._replica2part2dev_id, 'replica2part2dev_id': self._replica2part2dev_id,
@ -44,43 +110,32 @@ class Ring(object):
""" """
Partitioned consistent hashing ring. Partitioned consistent hashing ring.
:param pickle_gz_path: path to ring file :param serialized_path: path to serialized RingData instance
:param reload_time: time interval in seconds to check for a ring change :param reload_time: time interval in seconds to check for a ring change
""" """
def __init__(self, pickle_gz_path, reload_time=15, ring_name=None): def __init__(self, serialized_path, reload_time=15, ring_name=None):
# can't use the ring unless HASH_PATH_SUFFIX is set # can't use the ring unless HASH_PATH_SUFFIX is set
validate_configuration() validate_configuration()
if ring_name: if ring_name:
self.pickle_gz_path = os.path.join(pickle_gz_path, self.serialized_path = os.path.join(serialized_path,
ring_name + '.ring.gz') ring_name + '.ring.gz')
else: else:
self.pickle_gz_path = os.path.join(pickle_gz_path) self.serialized_path = os.path.join(serialized_path)
self.reload_time = reload_time self.reload_time = reload_time
self._reload(force=True) self._reload(force=True)
def _reload(self, force=False): def _reload(self, force=False):
self._rtime = time() + self.reload_time self._rtime = time() + self.reload_time
if force or self.has_changed(): if force or self.has_changed():
ring_data = pickle.load(self._get_gz_file()) ring_data = RingData.load(self.serialized_path)
if not hasattr(ring_data, 'devs'): self._mtime = getmtime(self.serialized_path)
ring_data = RingData(ring_data['replica2part2dev_id'],
ring_data['devs'], ring_data['part_shift'])
self._mtime = getmtime(self.pickle_gz_path)
self._devs = ring_data.devs self._devs = ring_data.devs
self._replica2part2dev_id = ring_data._replica2part2dev_id self._replica2part2dev_id = ring_data._replica2part2dev_id
self._part_shift = ring_data._part_shift self._part_shift = ring_data._part_shift
self._rebuild_tier_data() self._rebuild_tier_data()
def _get_gz_file(self):
gz_file = GzipFile(self.pickle_gz_path, 'rb')
if hasattr(gz_file, '_checkReadable'):
return BufferedReader(gz_file)
else:
# Python 2.6 doesn't support BufferedIO
return gz_file
def _rebuild_tier_data(self): def _rebuild_tier_data(self):
self.tier2devs = defaultdict(list) self.tier2devs = defaultdict(list)
for dev in self._devs: for dev in self._devs:
@ -121,7 +176,7 @@ class Ring(object):
:returns: True if the ring on disk has changed, False otherwise :returns: True if the ring on disk has changed, False otherwise
""" """
return getmtime(self.pickle_gz_path) != self._mtime return getmtime(self.serialized_path) != self._mtime
def get_part_nodes(self, part): def get_part_nodes(self, part):
""" """
@ -172,7 +227,7 @@ class Ring(object):
key = hash_path(account, container, obj, raw_digest=True) key = hash_path(account, container, obj, raw_digest=True)
if time() > self._rtime: if time() > self._rtime:
self._reload() self._reload()
part = unpack_from('>I', key)[0] >> self._part_shift part = struct.unpack_from('>I', key)[0] >> self._part_shift
seen_ids = set() seen_ids = set()
return part, [self._devs[r[part]] for r in self._replica2part2dev_id return part, [self._devs[r[part]] for r in self._replica2part2dev_id
if not (r[part] in seen_ids or seen_ids.add(r[part]))] if not (r[part] in seen_ids or seen_ids.add(r[part]))]

View File

@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import array
import cPickle as pickle import cPickle as pickle
import os import os
import unittest import unittest
@ -25,6 +26,20 @@ from swift.common import ring, utils
class TestRingData(unittest.TestCase): class TestRingData(unittest.TestCase):
def setUp(self):
self.testdir = os.path.join(os.path.dirname(__file__), 'ring_data')
rmtree(self.testdir, ignore_errors=1)
os.mkdir(self.testdir)
def tearDown(self):
rmtree(self.testdir, ignore_errors=1)
def assert_ring_data_equal(self, rd_expected, rd_got):
self.assertEquals(rd_expected._replica2part2dev_id,
rd_got._replica2part2dev_id)
self.assertEquals(rd_expected.devs, rd_got.devs)
self.assertEquals(rd_expected._part_shift, rd_got._part_shift)
def test_attrs(self): def test_attrs(self):
r2p2d = [[0, 1, 0, 1], [0, 1, 0, 1]] r2p2d = [[0, 1, 0, 1], [0, 1, 0, 1]]
d = [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}] d = [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}]
@ -34,11 +49,23 @@ class TestRingData(unittest.TestCase):
self.assertEquals(rd.devs, d) self.assertEquals(rd.devs, d)
self.assertEquals(rd._part_shift, s) self.assertEquals(rd._part_shift, s)
def test_pickleable(self): def test_can_load_pickled_ring_data(self):
rd = ring.RingData([[0, 1, 0, 1], [0, 1, 0, 1]], rd = ring.RingData([[0, 1, 0, 1], [0, 1, 0, 1]],
[{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30)
ring_fname = os.path.join(self.testdir, 'foo.ring.gz')
for p in xrange(pickle.HIGHEST_PROTOCOL): for p in xrange(pickle.HIGHEST_PROTOCOL):
pickle.loads(pickle.dumps(rd, protocol=p)) pickle.dump(rd, GzipFile(ring_fname, 'wb'), protocol=p)
ring_data = ring.RingData.load(ring_fname)
self.assert_ring_data_equal(rd, ring_data)
def test_roundtrip_serialization(self):
ring_fname = os.path.join(self.testdir, 'foo.ring.gz')
rd = ring.RingData(
[array.array('H', [0, 1, 0, 1]), array.array('H',[0, 1, 0, 1])],
[{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30)
rd.save(ring_fname)
rd2 = ring.RingData.load(ring_fname)
self.assert_ring_data_equal(rd, rd2)
class TestRing(unittest.TestCase): class TestRing(unittest.TestCase):
@ -49,9 +76,10 @@ class TestRing(unittest.TestCase):
rmtree(self.testdir, ignore_errors=1) rmtree(self.testdir, ignore_errors=1)
os.mkdir(self.testdir) os.mkdir(self.testdir)
self.testgz = os.path.join(self.testdir, 'whatever.ring.gz') self.testgz = os.path.join(self.testdir, 'whatever.ring.gz')
self.intended_replica2part2dev_id = [[0, 1, 0, 1], self.intended_replica2part2dev_id = [
[0, 1, 0, 1], array.array('H', [0, 1, 0, 1]),
[3, 4, 3, 4]] array.array('H', [0, 1, 0, 1]),
array.array('H', [3, 4, 3, 4])]
self.intended_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, self.intended_devs = [{'id': 0, 'zone': 0, 'weight': 1.0,
'ip': '10.1.1.1', 'port': 6000}, 'ip': '10.1.1.1', 'port': 6000},
{'id': 1, 'zone': 0, 'weight': 1.0, {'id': 1, 'zone': 0, 'weight': 1.0,
@ -63,9 +91,8 @@ class TestRing(unittest.TestCase):
'ip': '10.1.2.2', 'port': 6000}] 'ip': '10.1.2.2', 'port': 6000}]
self.intended_part_shift = 30 self.intended_part_shift = 30
self.intended_reload_time = 15 self.intended_reload_time = 15
pickle.dump(ring.RingData(self.intended_replica2part2dev_id, ring.RingData(self.intended_replica2part2dev_id,
self.intended_devs, self.intended_part_shift), self.intended_devs, self.intended_part_shift).save(self.testgz)
GzipFile(self.testgz, 'wb'))
self.ring = ring.Ring(self.testdir, self.ring = ring.Ring(self.testdir,
reload_time=self.intended_reload_time, ring_name='whatever') reload_time=self.intended_reload_time, ring_name='whatever')
@ -78,7 +105,7 @@ class TestRing(unittest.TestCase):
self.assertEquals(self.ring._part_shift, self.intended_part_shift) self.assertEquals(self.ring._part_shift, self.intended_part_shift)
self.assertEquals(self.ring.devs, self.intended_devs) self.assertEquals(self.ring.devs, self.intended_devs)
self.assertEquals(self.ring.reload_time, self.intended_reload_time) self.assertEquals(self.ring.reload_time, self.intended_reload_time)
self.assertEquals(self.ring.pickle_gz_path, self.testgz) self.assertEquals(self.ring.serialized_path, self.testgz)
# test invalid endcap # test invalid endcap
_orig_hash_path_suffix = utils.HASH_PATH_SUFFIX _orig_hash_path_suffix = utils.HASH_PATH_SUFFIX
try: try:
@ -99,9 +126,8 @@ class TestRing(unittest.TestCase):
orig_mtime = self.ring._mtime orig_mtime = self.ring._mtime
self.assertEquals(len(self.ring.devs), 5) self.assertEquals(len(self.ring.devs), 5)
self.intended_devs.append({'id': 3, 'zone': 3, 'weight': 1.0}) self.intended_devs.append({'id': 3, 'zone': 3, 'weight': 1.0})
pickle.dump(ring.RingData(self.intended_replica2part2dev_id, ring.RingData(self.intended_replica2part2dev_id,
self.intended_devs, self.intended_part_shift), self.intended_devs, self.intended_part_shift).save(self.testgz)
GzipFile(self.testgz, 'wb'))
sleep(0.1) sleep(0.1)
self.ring.get_nodes('a') self.ring.get_nodes('a')
self.assertEquals(len(self.ring.devs), 6) self.assertEquals(len(self.ring.devs), 6)
@ -113,9 +139,8 @@ class TestRing(unittest.TestCase):
orig_mtime = self.ring._mtime orig_mtime = self.ring._mtime
self.assertEquals(len(self.ring.devs), 6) self.assertEquals(len(self.ring.devs), 6)
self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0})
pickle.dump(ring.RingData(self.intended_replica2part2dev_id, ring.RingData(self.intended_replica2part2dev_id,
self.intended_devs, self.intended_part_shift), self.intended_devs, self.intended_part_shift).save(self.testgz)
GzipFile(self.testgz, 'wb'))
sleep(0.1) sleep(0.1)
self.ring.get_part_nodes(0) self.ring.get_part_nodes(0)
self.assertEquals(len(self.ring.devs), 7) self.assertEquals(len(self.ring.devs), 7)
@ -128,9 +153,8 @@ class TestRing(unittest.TestCase):
part, nodes = self.ring.get_nodes('a') part, nodes = self.ring.get_nodes('a')
self.assertEquals(len(self.ring.devs), 7) self.assertEquals(len(self.ring.devs), 7)
self.intended_devs.append({'id': 6, 'zone': 5, 'weight': 1.0}) self.intended_devs.append({'id': 6, 'zone': 5, 'weight': 1.0})
pickle.dump(ring.RingData(self.intended_replica2part2dev_id, ring.RingData(self.intended_replica2part2dev_id,
self.intended_devs, self.intended_part_shift), self.intended_devs, self.intended_part_shift).save(self.testgz)
GzipFile(self.testgz, 'wb'))
sleep(0.1) sleep(0.1)
self.ring.get_more_nodes(part).next() self.ring.get_more_nodes(part).next()
self.assertEquals(len(self.ring.devs), 8) self.assertEquals(len(self.ring.devs), 8)
@ -142,9 +166,8 @@ class TestRing(unittest.TestCase):
orig_mtime = self.ring._mtime orig_mtime = self.ring._mtime
self.assertEquals(len(self.ring.devs), 8) self.assertEquals(len(self.ring.devs), 8)
self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0}) self.intended_devs.append({'id': 5, 'zone': 4, 'weight': 1.0})
pickle.dump(ring.RingData(self.intended_replica2part2dev_id, ring.RingData(self.intended_replica2part2dev_id,
self.intended_devs, self.intended_part_shift), self.intended_devs, self.intended_part_shift).save(self.testgz)
GzipFile(self.testgz, 'wb'))
sleep(0.1) sleep(0.1)
self.assertEquals(len(self.ring.devs), 9) self.assertEquals(len(self.ring.devs), 9)
self.assertNotEquals(self.ring._mtime, orig_mtime) self.assertNotEquals(self.ring._mtime, orig_mtime)