diff --git a/swift/common/utils.py b/swift/common/utils.py index b0fef44195..5bd25a1b20 100644 --- a/swift/common/utils.py +++ b/swift/common/utils.py @@ -5140,6 +5140,111 @@ class ShardRangeOuterBound(object): __nonzero__ = __bool__ +class ShardName(object): + """ + Encapsulates the components of a shard name. + + Instances of this class would typically be constructed via the create() or + parse() class methods. + + Shard names have the form: + + /--- + + Note: not all instances of :class:`~swift.common.utils.ShardRange` have + names that will parse as a :class:`~swift.common.utils.ShardName`; root + container own shard ranges, for example, have a simpler name format of + /. + """ + def __init__(self, account, root_container, + parent_container_hash, + timestamp, + index): + self.account = self._validate(account) + self.root_container = self._validate(root_container) + self.parent_container_hash = self._validate(parent_container_hash) + self.timestamp = Timestamp(timestamp) + self.index = int(index) + + @classmethod + def _validate(cls, arg): + if arg is None: + raise ValueError('arg must not be None') + return arg + + def __str__(self): + return '%s/%s-%s-%s-%s' % (self.account, + self.root_container, + self.parent_container_hash, + self.timestamp.internal, + self.index) + + @classmethod + def hash_container_name(cls, container_name): + """ + Calculates the hash of a container name. + + :param container_name: name to be hashed. + :return: the hexdigest of the md5 hash of ``container_name``. + :raises ValueError: if ``container_name`` is None. + """ + cls._validate(container_name) + if not isinstance(container_name, bytes): + container_name = container_name.encode('utf-8') + hash = md5(container_name, usedforsecurity=False).hexdigest() + return hash + + @classmethod + def create(cls, account, root_container, parent_container, + timestamp, index): + """ + Create an instance of :class:`~swift.common.utils.ShardName`. + + :param account: the hidden internal account to which the shard + container belongs. + :param root_container: the name of the root container for the shard. + :param parent_container: the name of the parent container for the + shard. + :param timestamp: an instance of :class:`~swift.common.utils.Timestamp` + :param index: a unique index that will distinguish the path from any + other path generated using the same combination of + ``account``, ``root_container``, ``parent_container`` and + ``timestamp``. + + :return: an instance of :class:`~swift.common.utils.ShardName`. + :raises ValueError: if any argument is None + """ + # we make the shard name unique with respect to other shards names by + # embedding a hash of the parent container name; we use a hash (rather + # than the actual parent container name) to prevent shard names become + # longer with every generation. + parent_container_hash = cls.hash_container_name(parent_container) + return cls(account, root_container, parent_container_hash, timestamp, + index) + + @classmethod + def parse(cls, name): + """ + Parse ``name`` to an instance of + :class:`~swift.common.utils.ShardName`. + + :param name: a shard name which should have the form: + / + --- + + :return: an instance of :class:`~swift.common.utils.ShardName`. + :raises ValueError: if ``name`` is not a valid shard name. + """ + try: + account, container = name.split('/', 1) + root_container, parent_container_hash, timestamp, index = \ + container.rsplit('-', 3) + return cls(account, root_container, parent_container_hash, + timestamp, index) + except ValueError: + raise ValueError('invalid name: %s' % name) + + class ShardRange(object): """ A ShardRange encapsulates sharding state related to a container including @@ -5276,16 +5381,38 @@ class ShardRange(object): raise TypeError('must be a string type') return self._encode(bound) - @classmethod - def _make_container_name(cls, root_container, parent_container, timestamp, - index): - if not isinstance(parent_container, bytes): - parent_container = parent_container.encode('utf-8') - return "%s-%s-%s-%s" % (root_container, - md5(parent_container, - usedforsecurity=False).hexdigest(), - cls._to_timestamp(timestamp).internal, - index) + def is_child_of(self, parent): + """ + Test if this shard range is a child of another shard range. The + parent-child relationship is inferred from the names of the shard + ranges. This method is limited to work only within the scope of the + same account. + + :param parent: an instance of ``ShardRange``. + :return: True if ``parent`` is the parent of this shard range, False + otherwise, assuming that they are within the same account. + """ + # note: We limit the usages of this method to be within the same + # account, because account shard prefix is configurable and it's hard + # to perform checking without breaking backward-compatibility. + try: + self_parsed_name = ShardName.parse(self.name) + except ValueError: + # self is not a shard and therefore not a child. + return False + + try: + parsed_parent_name = ShardName.parse(parent.name) + parent_root_container = parsed_parent_name.root_container + except ValueError: + # parent is a root container. + parent_root_container = parent.container + + return ( + self_parsed_name.root_container == parent_root_container + and self_parsed_name.parent_container_hash + == ShardName.hash_container_name(parent.container) + ) @classmethod def make_path(cls, shards_account, root_container, parent_container, @@ -5308,9 +5435,12 @@ class ShardRange(object): ``timestamp``. :return: a string of the form / """ - shard_container = cls._make_container_name( - root_container, parent_container, timestamp, index) - return '%s/%s' % (shards_account, shard_container) + timestamp = cls._to_timestamp(timestamp) + return str(ShardName.create(shards_account, + root_container, + parent_container, + timestamp, + index)) @classmethod def _to_timestamp(cls, timestamp): diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py index c99c39a9f7..7d10aa35e5 100644 --- a/test/unit/common/test_utils.py +++ b/test/unit/common/test_utils.py @@ -7946,6 +7946,57 @@ class TestDistributeEvenly(unittest.TestCase): self.assertEqual(out, [[0], [1], [2], [3], [4], [], []]) +class TestShardName(unittest.TestCase): + def test(self): + ts = utils.Timestamp.now() + created = utils.ShardName.create('a', 'root', 'parent', ts, 1) + parent_hash = md5(b'parent', usedforsecurity=False).hexdigest() + expected = 'a/root-%s-%s-1' % (parent_hash, ts.internal) + actual = str(created) + self.assertEqual(expected, actual) + parsed = utils.ShardName.parse(actual) + self.assertEqual('a', parsed.account) + self.assertEqual('root', parsed.root_container) + self.assertEqual(parent_hash, parsed.parent_container_hash) + self.assertEqual(ts, parsed.timestamp) + self.assertEqual(1, parsed.index) + self.assertEqual(actual, str(parsed)) + + def test_root_has_hyphens(self): + parsed = utils.ShardName.parse( + 'a/root-has-some-hyphens-hash-1234-99') + self.assertEqual('a', parsed.account) + self.assertEqual('root-has-some-hyphens', parsed.root_container) + self.assertEqual('hash', parsed.parent_container_hash) + self.assertEqual(utils.Timestamp(1234), parsed.timestamp) + self.assertEqual(99, parsed.index) + + def test_bad_parse(self): + with self.assertRaises(ValueError) as cm: + utils.ShardName.parse('a') + self.assertEqual('invalid name: a', str(cm.exception)) + with self.assertRaises(ValueError) as cm: + utils.ShardName.parse('a/c') + self.assertEqual('invalid name: a/c', str(cm.exception)) + with self.assertRaises(ValueError) as cm: + utils.ShardName.parse('a/root-hash-bad') + self.assertEqual('invalid name: a/root-hash-bad', str(cm.exception)) + with self.assertRaises(ValueError) as cm: + utils.ShardName.parse('a/root-hash-bad-0') + self.assertEqual('invalid name: a/root-hash-bad-0', + str(cm.exception)) + with self.assertRaises(ValueError) as cm: + utils.ShardName.parse('a/root-hash-12345678.12345-bad') + self.assertEqual('invalid name: a/root-hash-12345678.12345-bad', + str(cm.exception)) + + def test_bad_create(self): + with self.assertRaises(ValueError): + utils.ShardName.create('a', 'root', 'hash', 'bad', '0') + with self.assertRaises(ValueError): + utils.ShardName.create('a', 'root', None, '1235678', 'bad') + + class TestShardRange(unittest.TestCase): def setUp(self): self.ts_iter = make_timestamp_iter() @@ -8878,9 +8929,115 @@ class TestShardRange(unittest.TestCase): actual = utils.ShardRange.make_path( 'a', 'root', 'parent', ts.internal, '3') self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) - actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo') - self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal), - actual) + + def test_is_child_of(self): + # Set up some shard ranges in relational hierarchy: + # account -> root -> grandparent -> parent -> child + # using abbreviated names a_r_gp_p_c + + # account 1 + ts = next(self.ts_iter) + a1_r1 = utils.ShardRange('a1/r1', ts) + ts = next(self.ts_iter) + a1_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', 'r1', ts, 1), ts) + ts = next(self.ts_iter) + a1_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', a1_r1_gp1.container, ts, 1), ts) + ts = next(self.ts_iter) + a1_r1_gp1_p1_c1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 1), ts) + ts = next(self.ts_iter) + a1_r1_gp1_p1_c2 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', a1_r1_gp1_p1.container, ts, 2), ts) + ts = next(self.ts_iter) + a1_r1_gp1_p2 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', a1_r1_gp1.container, ts, 2), ts) + ts = next(self.ts_iter) + a1_r1_gp2 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', 'r1', ts, 2), ts) # different index + ts = next(self.ts_iter) + a1_r1_gp2_p1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', a1_r1_gp2.container, ts, 1), ts) + # drop the index from grandparent name + ts = next(self.ts_iter) + rogue_a1_r1_gp = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r1', 'r1', ts, 1)[:-2], ts) + + # account 1, root 2 + ts = next(self.ts_iter) + a1_r2 = utils.ShardRange('a1/r2', ts) + ts = next(self.ts_iter) + a1_r2_gp1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r2', a1_r2.container, ts, 1), ts) + ts = next(self.ts_iter) + a1_r2_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a1', 'r2', a1_r2_gp1.container, ts, 3), ts) + + # account 2, root1 + a2_r1 = utils.ShardRange('a2/r1', ts) + ts = next(self.ts_iter) + a2_r1_gp1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a2', 'r1', a2_r1.container, ts, 1), ts) + ts = next(self.ts_iter) + a2_r1_gp1_p1 = utils.ShardRange(utils.ShardRange.make_path( + '.shards_a2', 'r1', a2_r1_gp1.container, ts, 3), ts) + + # verify parent-child within same account. + self.assertTrue(a1_r1_gp1.is_child_of(a1_r1)) + self.assertTrue(a1_r1_gp1_p1.is_child_of(a1_r1_gp1)) + self.assertTrue(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1)) + self.assertTrue(a1_r1_gp1_p1_c2.is_child_of(a1_r1_gp1_p1)) + self.assertTrue(a1_r1_gp1_p2.is_child_of(a1_r1_gp1)) + + self.assertTrue(a1_r1_gp2.is_child_of(a1_r1)) + self.assertTrue(a1_r1_gp2_p1.is_child_of(a1_r1_gp2)) + + self.assertTrue(a1_r2_gp1.is_child_of(a1_r2)) + self.assertTrue(a1_r2_gp1_p1.is_child_of(a1_r2_gp1)) + + self.assertTrue(a2_r1_gp1.is_child_of(a2_r1)) + self.assertTrue(a2_r1_gp1_p1.is_child_of(a2_r1_gp1)) + + # verify not parent-child within same account. + self.assertFalse(a1_r1.is_child_of(a1_r1)) + self.assertFalse(a1_r1.is_child_of(a1_r2)) + + self.assertFalse(a1_r1_gp1.is_child_of(a1_r2)) + self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1)) + self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1)) + self.assertFalse(a1_r1_gp1.is_child_of(a1_r1_gp1_p1_c1)) + + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp2)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(rogue_a1_r1_gp)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p2)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r2_gp1_p1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a1_r1_gp1_p1_c2)) + + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p2)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp2_p1)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c1)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r1_gp1_p1_c2)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a1_r2_gp1_p1)) + self.assertFalse(a1_r1_gp1_p1_c1.is_child_of(a2_r1_gp1_p1)) + + self.assertFalse(a1_r2_gp1.is_child_of(a1_r1)) + self.assertFalse(a1_r2_gp1_p1.is_child_of(a1_r1_gp1)) + + # across different accounts, 'is_child_of' works in some cases but not + # all, so don't use it for shard ranges in different accounts. + self.assertFalse(a1_r1.is_child_of(a2_r1)) + self.assertFalse(a2_r1_gp1_p1.is_child_of(a1_r1_gp1)) + self.assertFalse(a1_r1_gp1_p1.is_child_of(a2_r1)) + self.assertTrue(a1_r1_gp1.is_child_of(a2_r1)) + self.assertTrue(a2_r1_gp1.is_child_of(a1_r1)) def test_expand(self): bounds = (('', 'd'), ('d', 'k'), ('k', 't'), ('t', ''))