Merge "Fixed regression in consolidate_hashes"

2016-11-25 15:09:18 +00:00 · 2016-11-25 15:09:18 +00:00 · 8a353ab1b9
commit 8a353ab1b9
parent 609b092d1e 8ac432fff3
2 changed files with 128 additions and 39 deletions
--- a/swift/obj/diskfile.py
+++ b/swift/obj/diskfile.py
@ -276,7 +276,8 @@ def consolidate_hashes(partition_dir):
            with open(invalidations_file, 'rb') as inv_fh:
                for line in inv_fh:
                    suffix = line.strip()
-                    if hashes is not None and hashes.get(suffix) is not None:
+                    if hashes is not None and \
+                            hashes.get(suffix, '') is not None:
                        hashes[suffix] = None
                        modified = True
        except (IOError, OSError) as e:
--- a/test/unit/obj/test_diskfile.py
+++ b/test/unit/obj/test_diskfile.py
@ -6030,9 +6030,11 @@ class TestSuffixHashes(unittest.TestCase):
            self.assertFalse(os.path.exists(hashes_file))
            self.assertFalse(os.path.exists(inv_file))

-    def test_invalidate_hash_file_exists(self):
+    def test_invalidate_hash_empty_file_exists(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertEqual(hashes, {})
            # create something to hash
            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
                                     policy=policy)
@ -6041,6 +6043,34 @@ class TestSuffixHashes(unittest.TestCase):
            suffix = os.path.basename(suffix_dir)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertIn(suffix, hashes)  # sanity
+
+    def test_invalidate_hash_consolidation(self):
+        def assert_consolidation(suffixes):
+            # verify that suffixes are invalidated after consolidation
+            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
+                hashes = df_mgr.consolidate_hashes(part_path)
+            self.assertTrue(mock_lock.called)
+            for suffix in suffixes:
+                self.assertIn(suffix, hashes)
+                self.assertIsNone(hashes[suffix])
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(hashes, pickle.load(f))
+            with open(invalidations_file, 'rb') as f:
+                self.assertEqual("", f.read())
+            return hashes
+
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            # create something to hash
+            df = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o',
+                                     policy=policy)
+            df.delete(self.ts())
+            suffix_dir = os.path.dirname(df._datadir)
+            suffix = os.path.basename(suffix_dir)
+            original_hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, original_hashes)  # sanity
+            self.assertIsNotNone(original_hashes[suffix])
+
            # sanity check hashes file
            part_path = os.path.join(self.devices, 'sda1',
                                     diskfile.get_data_dir(policy), '0')
@ -6048,24 +6078,57 @@ class TestSuffixHashes(unittest.TestCase):
            invalidations_file = os.path.join(
                part_path, diskfile.HASH_INVALIDATIONS_FILE)
            with open(hashes_file, 'rb') as f:
-                self.assertEqual(hashes, pickle.load(f))
+                self.assertEqual(original_hashes, pickle.load(f))
+            self.assertFalse(os.path.exists(invalidations_file))

            # invalidate the hash
            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
                df_mgr.invalidate_hash(suffix_dir)
            self.assertTrue(mock_lock.called)
+            # suffix should be in invalidations file
            with open(invalidations_file, 'rb') as f:
                self.assertEqual(suffix + "\n", f.read())
+            # hashes file is unchanged
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(original_hashes, pickle.load(f))

            # consolidate the hash and the invalidations
-            with mock.patch('swift.obj.diskfile.lock_path') as mock_lock:
-                hashes = df_mgr.consolidate_hashes(part_path)
-            self.assertIsNone(hashes.get(suffix))
+            hashes = assert_consolidation([suffix])

+            # invalidate a different suffix hash in same partition but not in
+            # existing hashes.pkl
+            i = 0
+            while True:
+                df2 = df_mgr.get_diskfile('sda1', '0', 'a', 'c', 'o%d' % i,
+                                          policy=policy)
+                i += 1
+                suffix_dir2 = os.path.dirname(df2._datadir)
+                if suffix_dir != suffix_dir2:
+                    break
+
+            df2.delete(self.ts())
+            suffix2 = os.path.basename(suffix_dir2)
+            # suffix2 should be in invalidations file
+            with open(invalidations_file, 'rb') as f:
+                self.assertEqual(suffix2 + "\n", f.read())
+            # hashes file is not yet changed
            with open(hashes_file, 'rb') as f:
                self.assertEqual(hashes, pickle.load(f))
+
+            # consolidate hashes
+            hashes = assert_consolidation([suffix, suffix2])
+
+            # invalidating suffix2 multiple times is ok
+            df2.delete(self.ts())
+            df2.delete(self.ts())
+            # suffix2 should be in invalidations file
            with open(invalidations_file, 'rb') as f:
-                self.assertEqual("", f.read())
+                self.assertEqual("%s\n%s\n" % (suffix2, suffix2), f.read())
+            # hashes file is not yet changed
+            with open(hashes_file, 'rb') as f:
+                self.assertEqual(hashes, pickle.load(f))
+            # consolidate hashes
+            assert_consolidation([suffix, suffix2])

    # invalidate_hash tests - error handling

@ -6208,63 +6271,88 @@ class TestSuffixHashes(unittest.TestCase):
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
            self.assertEqual(hashes, {})

-    def test_hash_suffix_one_reclaim_tombstone_with_hash_pkl(self):
+    def test_hash_suffix_ts_cleanup_after_recalc(self):
        for policy in self.iter_policies():
            df_mgr = self.df_router[policy]
            df = df_mgr.get_diskfile(
                'sda1', '0', 'a', 'c', 'o', policy=policy)
            suffix_dir = os.path.dirname(df._datadir)
-            part_dir = os.path.dirname(suffix_dir)
-            hash_file = os.path.join(part_dir, diskfile.HASH_FILE)
+            suffix = os.path.basename(suffix_dir)

            # scale back reclaim age a bit
            df_mgr.reclaim_age = 1000
-            # write a tombstone that's just a *little* older
-            old_time = time() - 1001
+            # write a valid tombstone
+            old_time = time() - 500
            timestamp = Timestamp(old_time)
            df.delete(timestamp.internal)
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
-            # sanity
-            self.assertEqual(hashes, {})
-            self.assertFalse(os.path.exists(df._datadir))
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])

-            hash_timestamp = os.stat(hash_file).st_mtime
-
-            # if hash.pkl exists, that .ts file is not reclaimed
-            df = df_mgr.get_diskfile(
-                'sda1', '0', 'a', 'c', 'o', policy=policy)
-            df.delete(timestamp.internal)
-
-            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
-            # This was a cached value so the value looks empty
-            self.assertEqual(hashes, {})
-            # and the hash.pkl is not touched
-            self.assertEqual(hash_timestamp, os.stat(hash_file).st_mtime)
-            # and we still have tombstone entry
+            # we have tombstone entry
            tombstone = '%s.ts' % timestamp.internal
            self.assertTrue(os.path.exists(df._datadir))
            self.assertIn(tombstone, os.listdir(df._datadir))

+            # lower reclaim age to force tombstone reclaiming
+            df_mgr.reclaim_age = 200
+
+            # not cleaning up because suffix not invalidated
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # recalculating suffix hash cause cleanup
+            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
+
+            self.assertEqual(hashes, {})
+            self.assertFalse(os.path.exists(df._datadir))
+
+    def test_hash_suffix_ts_cleanup_after_invalidate_hash(self):
+        for policy in self.iter_policies():
+            df_mgr = self.df_router[policy]
+            df = df_mgr.get_diskfile(
+                'sda1', '0', 'a', 'c', 'o', policy=policy)
+            suffix_dir = os.path.dirname(df._datadir)
+            suffix = os.path.basename(suffix_dir)
+
+            # scale back reclaim age a bit
+            df_mgr.reclaim_age = 1000
+            # write a valid tombstone
+            old_time = time() - 500
+            timestamp = Timestamp(old_time)
+            df.delete(timestamp.internal)
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
+            # we have tombstone entry
+            tombstone = '%s.ts' % timestamp.internal
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+
+            # lower reclaim age to force tombstone reclaiming
+            df_mgr.reclaim_age = 200
+
+            # not cleaning up because suffix not invalidated
+            hashes = df_mgr.get_hashes('sda1', '0', [], policy)
+            self.assertTrue(os.path.exists(df._datadir))
+            self.assertIn(tombstone, os.listdir(df._datadir))
+            self.assertIn(suffix, hashes)
+            self.assertIsNotNone(hashes[suffix])
+
            # However if we call invalidate_hash for the suffix dir,
            # get_hashes can reclaim the tombstone
            with mock.patch('swift.obj.diskfile.lock_path'):
                df_mgr.invalidate_hash(suffix_dir)

+            # updating invalidated hashes cause cleanup
            hashes = df_mgr.get_hashes('sda1', '0', [], policy)

            self.assertEqual(hashes, {})
-            # If we have no other objects in the suffix, get_hashes
-            # doesn't reclaim anything
-            self.assertTrue(os.path.exists(df._datadir))
-            self.assertIn(tombstone, os.listdir(df._datadir))
-            self.assertEqual(hash_timestamp, os.stat(hash_file).st_mtime)
-
-            # *BUT* if suffix value is given to recalc, it can force to recaim!
-            suffix = os.path.dirname(suffix_dir)
-            hashes = df_mgr.get_hashes('sda1', '0', [suffix], policy)
            self.assertFalse(os.path.exists(df._datadir))
-            # hash.pkl was updated
-            self.assertGreater(os.stat(hash_file).st_mtime, hash_timestamp)

    def test_hash_suffix_one_reclaim_and_one_valid_tombstone(self):
        for policy in self.iter_policies():