From 053b625f42e2d1d90e18ad72e16099a5b62447dd Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Tue, 29 Nov 2016 17:53:42 -0800 Subject: [PATCH] Remove ring md5 integration check from recon unittests The actual value computed by md5 isn't that important; even in recon it's only used as an opaque identifier that assumed to be consistent across nodes for the same file. However the way these tests were written with hard coded md5 values makes them brittle to changes in the RingData format and susceptible to the burden of needless unrelated test maintenance churn. e.g. Related-Change: I23b5e0a8082b30ca257aeb1fab03ab74e6f0b2d4 Change-Id: I9623752c3cd2361f57864f3e938e1baf5e9292d7 --- swift/common/middleware/recon.py | 17 +- test/unit/common/middleware/test_recon.py | 232 +++++++++++++--------- 2 files changed, 154 insertions(+), 95 deletions(-) diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index 0e2dfb4b41..ba4e4fbfd4 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -29,6 +29,16 @@ from resource import getpagesize from hashlib import md5 +MD5_BLOCK_READ_BYTES = 4096 + + +def _hash_for_ringfile(f): + md5sum = md5() + for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''): + md5sum.update(block) + return md5sum.hexdigest() + + class ReconMiddleware(object): """ Recon middleware used for monitoring. @@ -250,15 +260,10 @@ class ReconMiddleware(object): """get all ring md5sum's""" sums = {} for ringfile in self.rings: - md5sum = md5() if os.path.exists(ringfile): try: with openr(ringfile, 'rb') as f: - block = f.read(4096) - while block: - md5sum.update(block) - block = f.read(4096) - sums[ringfile] = md5sum.hexdigest() + sums[ringfile] = _hash_for_ringfile(f) except IOError as err: sums[ringfile] = None if err.errno != errno.ENOENT: diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py index aa4d6499e9..6d9b53c778 100644 --- a/test/unit/common/middleware/test_recon.py +++ b/test/unit/common/middleware/test_recon.py @@ -21,7 +21,7 @@ from posix import stat_result, statvfs_result from shutil import rmtree import unittest from unittest import TestCase -import sys +from six import StringIO from swift import __version__ as swiftver from swift.common import ring, utils @@ -217,8 +217,7 @@ class TestReconSuccess(TestCase): # which will cause ring md5 checks to fail self.tempdir = '/tmp/swift_recon_md5_test' utils.mkdirs(self.tempdir) - self.app = recon.ReconMiddleware(FakeApp(), - {'swift_dir': self.tempdir}) + self.app = self._get_app() self.mockos = MockOS() self.fakecache = FakeFromCache() self.real_listdir = os.listdir @@ -233,6 +232,13 @@ class TestReconSuccess(TestCase): self.app._from_recon_cache = self.fakecache.fake_from_recon_cache self.frecon = FakeRecon() + # replace hash md5 implementation of the _hash_for_ringfile function + mock_hash_for_ringfile = mock.patch( + 'swift.common.middleware.recon._hash_for_ringfile', + lambda f: 'hash-' + os.path.basename(f.name)) + self.addCleanup(mock_hash_for_ringfile.stop) + mock_hash_for_ringfile.start() + self.ring_part_shift = 5 self.ring_devs = [{'id': 0, 'zone': 0, 'weight': 1.0, 'ip': '10.1.1.1', 'port': 6200, @@ -259,6 +265,10 @@ class TestReconSuccess(TestCase): del self.fakecache rmtree(self.tempdir) + def _get_app(self): + app = recon.ReconMiddleware(FakeApp(), {'swift_dir': self.tempdir}) + return app + def _create_ring(self, ringpath, replica_map, devs, part_shift): def fake_time(): return 0 @@ -314,33 +324,23 @@ class TestReconSuccess(TestCase): def test_get_ring_md5(self): # We should only see configured and present rings, so to handle the # "normal" case just patch the policies to match the existing rings. - expt_out = {'little': {'%s/account.ring.gz' % self.tempdir: - '672c6c50dfcb77e04f5a2124aef87596', - '%s/container.ring.gz' % self.tempdir: - '4c4392f8bf816596990ca7cd4d4b6e50', - '%s/object.ring.gz' % self.tempdir: - 'a34178f7399706e41395eb3ac8b2c4f3', - '%s/object-1.ring.gz' % self.tempdir: - 'cd54c473676ce5e3103e68f0e9f2326d', - '%s/object-2.ring.gz' % self.tempdir: - '8783ec76f29bbcfd3f51acc63b2fc337'}, - 'big': {'%s/account.ring.gz' % self.tempdir: - '70d2dde8144c09e5b42858e0fa17ab7e', - '%s/container.ring.gz' % self.tempdir: - '0bd14f4327cbea88bde7ab7850d4d77d', - '%s/object.ring.gz' % self.tempdir: - '2d17555687af36fd8c7ee5b0c492c582', - '%s/object-1.ring.gz' % self.tempdir: - 'bc1145d31771d7957d939077fe40e2e8', - '%s/object-2.ring.gz' % self.tempdir: - 'cd95a01ae1ab158f2e9e4c207aeb1769'}} + expt_out = {'%s/account.ring.gz' % self.tempdir: + 'hash-account.ring.gz', + '%s/container.ring.gz' % self.tempdir: + 'hash-container.ring.gz', + '%s/object.ring.gz' % self.tempdir: + 'hash-object.ring.gz', + '%s/object-1.ring.gz' % self.tempdir: + 'hash-object-1.ring.gz', + '%s/object-2.ring.gz' % self.tempdir: + 'hash-object-2.ring.gz'} # We need to instantiate app after overriding the configured policies. + app = self._get_app() # object-{1,2}.ring.gz should both appear as they are present on disk # and were configured as policies. - app = recon.ReconMiddleware(FakeApp(), {'swift_dir': self.tempdir}) self.assertEqual(sorted(app.get_ring_md5().items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) def test_get_ring_md5_ioerror_produces_none_hash(self): # Ring files that are present but produce an IOError on read should @@ -380,18 +380,14 @@ class TestReconSuccess(TestCase): raise IOError return open(fn, fmode) - expt_out = {'little': {'%s/account.ring.gz' % self.tempdir: None, - '%s/container.ring.gz' % self.tempdir: None, - '%s/object.ring.gz' % self.tempdir: - 'a34178f7399706e41395eb3ac8b2c4f3'}, - 'big': {'%s/account.ring.gz' % self.tempdir: None, - '%s/container.ring.gz' % self.tempdir: None, - '%s/object.ring.gz' % self.tempdir: - '2d17555687af36fd8c7ee5b0c492c582'}} + expt_out = {'%s/account.ring.gz' % self.tempdir: None, + '%s/container.ring.gz' % self.tempdir: None, + '%s/object.ring.gz' % self.tempdir: + 'hash-object.ring.gz'} ringmd5 = self.app.get_ring_md5(openr=fake_open_objonly) self.assertEqual(sorted(ringmd5.items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) @patch_policies([ StoragePolicy(0, 'stagecoach'), @@ -402,30 +398,22 @@ class TestReconSuccess(TestCase): # If a configured ring is missing when the app is instantiated, but is # later moved into place, we shouldn't need to restart object-server # for it to appear in recon. - expt_out = {'little': {'%s/account.ring.gz' % self.tempdir: - '672c6c50dfcb77e04f5a2124aef87596', - '%s/container.ring.gz' % self.tempdir: - '4c4392f8bf816596990ca7cd4d4b6e50', - '%s/object.ring.gz' % self.tempdir: - 'a34178f7399706e41395eb3ac8b2c4f3', - '%s/object-2.ring.gz' % self.tempdir: - '8783ec76f29bbcfd3f51acc63b2fc337'}, - 'big': {'%s/account.ring.gz' % self.tempdir: - '70d2dde8144c09e5b42858e0fa17ab7e', - '%s/container.ring.gz' % self.tempdir: - '0bd14f4327cbea88bde7ab7850d4d77d', - '%s/object.ring.gz' % self.tempdir: - '2d17555687af36fd8c7ee5b0c492c582', - '%s/object-2.ring.gz' % self.tempdir: - 'cd95a01ae1ab158f2e9e4c207aeb1769'}} + expt_out = {'%s/account.ring.gz' % self.tempdir: + 'hash-account.ring.gz', + '%s/container.ring.gz' % self.tempdir: + 'hash-container.ring.gz', + '%s/object.ring.gz' % self.tempdir: + 'hash-object.ring.gz', + '%s/object-2.ring.gz' % self.tempdir: + 'hash-object-2.ring.gz'} # We need to instantiate app after overriding the configured policies. + app = self._get_app() # object-1.ring.gz should not appear as it's present but unconfigured. # object-3502.ring.gz should not appear as it's configured but not - # present. - app = recon.ReconMiddleware(FakeApp(), {'swift_dir': self.tempdir}) + # (yet) present. self.assertEqual(sorted(app.get_ring_md5().items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) # Simulate the configured policy's missing ringfile being moved into # place during runtime @@ -436,14 +424,12 @@ class TestReconSuccess(TestCase): array.array('H', [1, 1, 0, 3])] self._create_ring(os.path.join(self.tempdir, ringfn), ringmap, self.ring_devs, self.ring_part_shift) - expt_out[sys.byteorder][ringpath] = \ - '77f752964f3bd4719e1b9b6cee47659f' if sys.byteorder == 'little' \ - else '3e189e6c668a4d159707438dfb87589a' + expt_out[ringpath] = 'hash-' + ringfn # We should now see it in the ringmd5 response, without a restart # (using the same app instance) self.assertEqual(sorted(app.get_ring_md5().items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) @patch_policies([ StoragePolicy(0, 'stagecoach', is_default=True), @@ -453,30 +439,22 @@ class TestReconSuccess(TestCase): def test_get_ring_md5_excludes_configured_missing_obj_rings(self): # Object rings that are configured but missing aren't meant to appear # in the ringmd5 response. - expt_out = {'little': {'%s/account.ring.gz' % self.tempdir: - '672c6c50dfcb77e04f5a2124aef87596', - '%s/container.ring.gz' % self.tempdir: - '4c4392f8bf816596990ca7cd4d4b6e50', - '%s/object.ring.gz' % self.tempdir: - 'a34178f7399706e41395eb3ac8b2c4f3', - '%s/object-2.ring.gz' % self.tempdir: - '8783ec76f29bbcfd3f51acc63b2fc337'}, - 'big': {'%s/account.ring.gz' % self.tempdir: - '70d2dde8144c09e5b42858e0fa17ab7e', - '%s/container.ring.gz' % self.tempdir: - '0bd14f4327cbea88bde7ab7850d4d77d', - '%s/object.ring.gz' % self.tempdir: - '2d17555687af36fd8c7ee5b0c492c582', - '%s/object-2.ring.gz' % self.tempdir: - 'cd95a01ae1ab158f2e9e4c207aeb1769'}} + expt_out = {'%s/account.ring.gz' % self.tempdir: + 'hash-account.ring.gz', + '%s/container.ring.gz' % self.tempdir: + 'hash-container.ring.gz', + '%s/object.ring.gz' % self.tempdir: + 'hash-object.ring.gz', + '%s/object-2.ring.gz' % self.tempdir: + 'hash-object-2.ring.gz'} # We need to instantiate app after overriding the configured policies. + app = self._get_app() # object-1.ring.gz should not appear as it's present but unconfigured. # object-2305.ring.gz should not appear as it's configured but not # present. - app = recon.ReconMiddleware(FakeApp(), {'swift_dir': self.tempdir}) self.assertEqual(sorted(app.get_ring_md5().items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) @patch_policies([ StoragePolicy(0, 'zero', is_default=True), @@ -484,25 +462,19 @@ class TestReconSuccess(TestCase): def test_get_ring_md5_excludes_unconfigured_present_obj_rings(self): # Object rings that are present but not configured in swift.conf # aren't meant to appear in the ringmd5 response. - expt_out = {'little': {'%s/account.ring.gz' % self.tempdir: - '672c6c50dfcb77e04f5a2124aef87596', - '%s/container.ring.gz' % self.tempdir: - '4c4392f8bf816596990ca7cd4d4b6e50', - '%s/object.ring.gz' % self.tempdir: - 'a34178f7399706e41395eb3ac8b2c4f3'}, - 'big': {'%s/account.ring.gz' % self.tempdir: - '70d2dde8144c09e5b42858e0fa17ab7e', - '%s/container.ring.gz' % self.tempdir: - '0bd14f4327cbea88bde7ab7850d4d77d', - '%s/object.ring.gz' % self.tempdir: - '2d17555687af36fd8c7ee5b0c492c582'}} + expt_out = {'%s/account.ring.gz' % self.tempdir: + 'hash-account.ring.gz', + '%s/container.ring.gz' % self.tempdir: + 'hash-container.ring.gz', + '%s/object.ring.gz' % self.tempdir: + 'hash-object.ring.gz'} # We need to instantiate app after overriding the configured policies. + app = self._get_app() # object-{1,2}.ring.gz should not appear as they are present on disk # but were not configured as policies. - app = recon.ReconMiddleware(FakeApp(), {'swift_dir': self.tempdir}) self.assertEqual(sorted(app.get_ring_md5().items()), - sorted(expt_out[sys.byteorder].items())) + sorted(expt_out.items())) def test_from_recon_cache(self): oart = OpenAndReadTester(['{"notneeded": 5, "testkey1": "canhazio"}']) @@ -1445,5 +1417,87 @@ class TestReconMiddleware(unittest.TestCase): resp = self.real_app_get_swift_conf_md5(fail_io_open) self.assertIsNone(resp['/etc/swift/swift.conf']) + +class TestReconUtilityFunctions(unittest.TestCase): + + def test_hash_for_ringfile_on_filelike_smallish(self): + stub_data = 'some data' + stub_filelike = StringIO(stub_data) + with mock.patch('swift.common.middleware.recon.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = recon._hash_for_ringfile(stub_filelike) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual([mock.call(stub_data)], + mock_hasher.update.call_args_list) + + def test_hash_for_ringfile_on_filelike_big(self): + num_blocks = 10 + block_size = recon.MD5_BLOCK_READ_BYTES + truncate = 523 + start_char = ord('a') + expected_blocks = [chr(i) * block_size + for i in range(start_char, start_char + num_blocks)] + full_data = ''.join(expected_blocks) + trimmed_data = full_data[:-truncate] + # sanity + self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate) + stub_filelike = StringIO(trimmed_data) + with mock.patch('swift.common.middleware.recon.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = recon._hash_for_ringfile(stub_filelike) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual(num_blocks, len(mock_hasher.update.call_args_list)) + expected_block = 'a' * block_size + found_blocks = [] + for i, (expected_block, call) in enumerate(zip( + expected_blocks, mock_hasher.update.call_args_list)): + args, kwargs = call + self.assertEqual(kwargs, {}) + self.assertEqual(1, len(args)) + block = args[0] + if i < num_blocks - 1: + self.assertEqual(block, expected_block) + else: + self.assertEqual(block, expected_block[:-truncate]) + found_blocks.append(block) + self.assertEqual(''.join(found_blocks), trimmed_data) + + def test_hash_for_ringfile_on_filelike_empty(self): + stub_filelike = StringIO('') + with mock.patch('swift.common.middleware.recon.md5') as mock_md5: + mock_hasher = mock_md5.return_value + rv = recon._hash_for_ringfile(stub_filelike) + self.assertTrue(mock_hasher.hexdigest.called) + self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertEqual([], mock_hasher.update.call_args_list) + + def test_hash_for_ringfile_on_filelike_brittle(self): + data_to_expected_hash = { + '': 'd41d8cd98f00b204e9800998ecf8427e', + 'some data': '1e50210a0202497fb79bc38b6ade6c34', + ('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3', + } + # unlike some other places where the concrete implementation really + # matters for backwards compatibility these brittle tests are probably + # not needed or justified, if a future maintainer rips them out later + # they're probably doing the right thing + failures = [] + for stub_data, expected_hash in data_to_expected_hash.items(): + rv = recon._hash_for_ringfile(StringIO(stub_data)) + try: + self.assertEqual(expected_hash, rv) + except AssertionError: + trim_cap = 80 + if len(stub_data) > trim_cap: + stub_data = '%s...' % stub_data[:trim_cap] + failures.append('hash for %r was %s instead of expected %s' % ( + stub_data, rv, expected_hash)) + if failures: + self.fail('Some data did not compute expected hash:\n' + + '\n'.join(failures)) + + if __name__ == '__main__': unittest.main()