ShardRange: add is_child_of method

Adds an is_child_of method that infers the parent-child relationship
of two shard ranges from their names. This new method is limited to
use only under the same account.

Co-Authored-By: Jianjian Huo <jhuo@nvidia.com>
Change-Id: Iac3a8ec5d8947989b64aa27f40caa3d8d1423a7c
This commit is contained in:
Alistair Coles 2022-08-23 19:21:43 +01:00
parent b05b27c0b6
commit 73bce63312
2 changed files with 303 additions and 16 deletions

View File

@ -5140,6 +5140,111 @@ class ShardRangeOuterBound(object):
__nonzero__ = __bool__
class ShardName(object):
"""
Encapsulates the components of a shard name.
Instances of this class would typically be constructed via the create() or
parse() class methods.
Shard names have the form:
<account>/<root_container>-<parent_container_hash>-<timestamp>-<index>
Note: not all instances of :class:`~swift.common.utils.ShardRange` have
names that will parse as a :class:`~swift.common.utils.ShardName`; root
container own shard ranges, for example, have a simpler name format of
<account>/<root_container>.
"""
def __init__(self, account, root_container,
parent_container_hash,
timestamp,
index):
self.account = self._validate(account)
self.root_container = self._validate(root_container)
self.parent_container_hash = self._validate(parent_container_hash)
self.timestamp = Timestamp(timestamp)
self.index = int(index)
@classmethod
def _validate(cls, arg):
if arg is None:
raise ValueError('arg must not be None')
return arg
def __str__(self):
return '%s/%s-%s-%s-%s' % (self.account,
self.root_container,
self.parent_container_hash,
self.timestamp.internal,
self.index)
@classmethod
def hash_container_name(cls, container_name):
"""
Calculates the hash of a container name.
:param container_name: name to be hashed.
:return: the hexdigest of the md5 hash of ``container_name``.
:raises ValueError: if ``container_name`` is None.
"""
cls._validate(container_name)
if not isinstance(container_name, bytes):
container_name = container_name.encode('utf-8')
hash = md5(container_name, usedforsecurity=False).hexdigest()
return hash
@classmethod
def create(cls, account, root_container, parent_container,
timestamp, index):
"""
Create an instance of :class:`~swift.common.utils.ShardName`.
:param account: the hidden internal account to which the shard
container belongs.
:param root_container: the name of the root container for the shard.
:param parent_container: the name of the parent container for the
shard.
:param timestamp: an instance of :class:`~swift.common.utils.Timestamp`
:param index: a unique index that will distinguish the path from any
other path generated using the same combination of
``account``, ``root_container``, ``parent_container`` and
``timestamp``.
:return: an instance of :class:`~swift.common.utils.ShardName`.
:raises ValueError: if any argument is None
"""
# we make the shard name unique with respect to other shards names by
# embedding a hash of the parent container name; we use a hash (rather
# than the actual parent container name) to prevent shard names become
# longer with every generation.
parent_container_hash = cls.hash_container_name(parent_container)
return cls(account, root_container, parent_container_hash, timestamp,
index)
@classmethod
def parse(cls, name):
"""
Parse ``name`` to an instance of
:class:`~swift.common.utils.ShardName`.
:param name: a shard name which should have the form:
<account>/
<root_container>-<parent_container_hash>-<timestamp>-<index>
:return: an instance of :class:`~swift.common.utils.ShardName`.
:raises ValueError: if ``name`` is not a valid shard name.
"""
try:
account, container = name.split('/', 1)
root_container, parent_container_hash, timestamp, index = \
container.rsplit('-', 3)
return cls(account, root_container, parent_container_hash,
timestamp, index)
except ValueError:
raise ValueError('invalid name: %s' % name)
class ShardRange(object):
"""
A ShardRange encapsulates sharding state related to a container including
@ -5276,16 +5381,38 @@ class ShardRange(object):
raise TypeError('must be a string type')
return self._encode(bound)
@classmethod
def _make_container_name(cls, root_container, parent_container, timestamp,
index):
if not isinstance(parent_container, bytes):
parent_container = parent_container.encode('utf-8')
return "%s-%s-%s-%s" % (root_container,
md5(parent_container,
usedforsecurity=False).hexdigest(),
cls._to_timestamp(timestamp).internal,
index)
def is_child_of(self, parent):
"""
Test if this shard range is a child of another shard range. The
parent-child relationship is inferred from the names of the shard
ranges. This method is limited to work only within the scope of the
same account.
:param parent: an instance of ``ShardRange``.
:return: True if ``parent`` is the parent of this shard range, False
otherwise, assuming that they are within the same account.
"""
# note: We limit the usages of this method to be within the same
# account, because account shard prefix is configurable and it's hard
# to perform checking without breaking backward-compatibility.
try:
self_parsed_name = ShardName.parse(self.name)
except ValueError:
# self is not a shard and therefore not a child.
return False
try:
parsed_parent_name = ShardName.parse(parent.name)
parent_root_container = parsed_parent_name.root_container
except ValueError:
# parent is a root container.
parent_root_container = parent.container
return (
self_parsed_name.root_container == parent_root_container
and self_parsed_name.parent_container_hash
== ShardName.hash_container_name(parent.container)
)
@classmethod
def make_path(cls, shards_account, root_container, parent_container,
@ -5308,9 +5435,12 @@ class ShardRange(object):
``timestamp``.
:return: a string of the form <account_name>/<container_name>
"""
shard_container = cls._make_container_name(
root_container, parent_container, timestamp, index)
return '%s/%s' % (shards_account, shard_container)
timestamp = cls._to_timestamp(timestamp)
return str(ShardName.create(shards_account,
root_container,
parent_container,
timestamp,
index))
@classmethod
def _to_timestamp(cls, timestamp):

View File

@ -7946,6 +7946,57 @@ class TestDistributeEvenly(unittest.TestCase):
self.assertEqual(out, [[0], [1], [2], [3], [4], [], []])
class TestShardName(unittest.TestCase):
def test(self):
ts = utils.Timestamp.now()
created = utils.ShardName.create('a', 'root', 'parent', ts, 1)
parent_hash = md5(b'parent', usedforsecurity=False).hexdigest()
expected = 'a/root-%s-%s-1' % (parent_hash, ts.internal)
actual = str(created)
self.assertEqual(expected, actual)
parsed = utils.ShardName.parse(actual)
self.assertEqual('a', parsed.account)
self.assertEqual('root', parsed.root_container)
self.assertEqual(parent_hash, parsed.parent_container_hash)
self.assertEqual(ts, parsed.timestamp)
self.assertEqual(1, parsed.index)
self.assertEqual(actual, str(parsed))
def test_root_has_hyphens(self):
parsed = utils.ShardName.parse(
'a/root-has-some-hyphens-hash-1234-99')
self.assertEqual('a', parsed.account)
self.assertEqual('root-has-some-hyphens', parsed.root_container)
self.assertEqual('hash', parsed.parent_container_hash)
self.assertEqual(utils.Timestamp(1234), parsed.timestamp)
self.assertEqual(99, parsed.index)
def test_bad_parse(self):
with self.assertRaises(ValueError) as cm:
utils.ShardName.parse('a')
self.assertEqual('invalid name: a', str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.ShardName.parse('a/c')
self.assertEqual('invalid name: a/c', str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.ShardName.parse('a/root-hash-bad')
self.assertEqual('invalid name: a/root-hash-bad', str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.ShardName.parse('a/root-hash-bad-0')
self.assertEqual('invalid name: a/root-hash-bad-0',
str(cm.exception))
with self.assertRaises(ValueError) as cm:
utils.ShardName.parse('a/root-hash-12345678.12345-bad')
self.assertEqual('invalid name: a/root-hash-12345678.12345-bad',
str(cm.exception))
def test_bad_create(self):
with self.assertRaises(ValueError):
utils.ShardName.create('a', 'root', 'hash', 'bad', '0')
with self.assertRaises(ValueError):
utils.ShardName.create('a', 'root', None, '1235678', 'bad')
class TestShardRange(unittest.TestCase):
def setUp(self):
self.ts_iter = make_timestamp_iter()
@ -8878,9 +8929,115 @@ class TestShardRange(unittest.TestCase):
actual = utils.ShardRange.make_path(
'a', 'root', 'parent', ts.internal, '3')
self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual)
actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo')
self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal),
actual)
def test_is_child_of(self):
# Set up some shard ranges in relational hierarchy:
# account -> root -> grandparent -> parent -> child
# using abbreviated names a_r_gp_p_c
# account 1
ts = next(self.ts_iter)
a1_r1 = utils.ShardRange('a1/r1', ts)
ts = next(self.ts_iter)
a1_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', 'r1', ts, 1), ts)
ts = next(self.ts_iter)
a1_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', a1_r1_gp1.container, ts, 1), ts)
ts = next(self.ts_iter)
a1_r1_gp1_p1_c1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 1), ts)
ts = next(self.ts_iter)
a1_r1_gp1_p1_c2 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 2), ts)
ts = next(self.ts_iter)
a1_r1_gp1_p2 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', a1_r1_gp1.container, ts, 2), ts)
ts = next(self.ts_iter)
a1_r1_gp2 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', 'r1', ts, 2), ts) # different index
ts = next(self.ts_iter)
a1_r1_gp2_p1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', a1_r1_gp2.container, ts, 1), ts)
# drop the index from grandparent name
ts = next(self.ts_iter)
rogue_a1_r1_gp = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r1', 'r1', ts, 1)[:-2], ts)
# account 1, root 2
ts = next(self.ts_iter)
a1_r2 = utils.ShardRange('a1/r2', ts)
ts = next(self.ts_iter)
a1_r2_gp1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r2', a1_r2.container, ts, 1), ts)
ts = next(self.ts_iter)
a1_r2_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a1', 'r2', a1_r2_gp1.container, ts, 3), ts)
# account 2, root1
a2_r1 = utils.ShardRange('a2/r1', ts)
ts = next(self.ts_iter)
a2_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a2', 'r1', a2_r1.container, ts, 1), ts)
ts = next(self.ts_iter)
a2_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path(
'.shards_a2', 'r1', a2_r1_gp1.container, ts, 3), ts)
# verify parent-child within same account.
self.assertTrue(a1_r1_gp1.is_child_of(a1_r1))
self.assertTrue(a1_r1_gp1_p1.is_child_of(a1_r1_gp1))
self.assertTrue(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1))
self.assertTrue(a1_r1_gp1_p1_c2.is_child_of(a1_r1_gp1_p1))
self.assertTrue(a1_r1_gp1_p2.is_child_of(a1_r1_gp1))
self.assertTrue(a1_r1_gp2.is_child_of(a1_r1))
self.assertTrue(a1_r1_gp2_p1.is_child_of(a1_r1_gp2))
self.assertTrue(a1_r2_gp1.is_child_of(a1_r2))
self.assertTrue(a1_r2_gp1_p1.is_child_of(a1_r2_gp1))
self.assertTrue(a2_r1_gp1.is_child_of(a2_r1))
self.assertTrue(a2_r1_gp1_p1.is_child_of(a2_r1_gp1))
# verify not parent-child within same account.
self.assertFalse(a1_r1.is_child_of(a1_r1))
self.assertFalse(a1_r1.is_child_of(a1_r2))
self.assertFalse(a1_r1_gp1.is_child_of(a1_r2))
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1))
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1))
self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1_c1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp2))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(rogue_a1_r1_gp))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p2))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1_p1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c2))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p2))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp2_p1))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c1))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c2))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r2_gp1_p1))
self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a2_r1_gp1_p1))
self.assertFalse(a1_r2_gp1.is_child_of(a1_r1))
self.assertFalse(a1_r2_gp1_p1.is_child_of(a1_r1_gp1))
# across different accounts, 'is_child_of' works in some cases but not
# all, so don't use it for shard ranges in different accounts.
self.assertFalse(a1_r1.is_child_of(a2_r1))
self.assertFalse(a2_r1_gp1_p1.is_child_of(a1_r1_gp1))
self.assertFalse(a1_r1_gp1_p1.is_child_of(a2_r1))
self.assertTrue(a1_r1_gp1.is_child_of(a2_r1))
self.assertTrue(a2_r1_gp1.is_child_of(a1_r1))
def test_expand(self):
bounds = (('', 'd'), ('d', 'k'), ('k', 't'), ('t', ''))