Db reclamation should remove empty suffix dirs

When a db is reclaimed it removes the hash dir the db files are in,
but it does not try to remove the parent suffix dir though it might
be empty now. This eventually leads to a bunch of empty suffix dirs
lying around. This patch fixes that by attempting to remove the
parent suffix dir after a hash dir reclamation.

Here's a quick script to see how bad a given drive might be:

import os, os.path, sys
if len(sys.argv) != 2:
    sys.exit('%s <mount-point>' % sys.argv[0])
in_use = 0
empty = 0
containers = os.path.join(sys.argv[1], 'containers')
for p in os.listdir(containers):
    partition = os.path.join(containers, p)
    for s in os.listdir(partition):
        suffix = os.path.join(partition, s)
        if os.listdir(suffix):
            in_use += 1
        else:
            empty += 1
print in_use, 'in use,', empty, 'empty,', '%.02f%%' % (
    100.0 * empty / (in_use + empty)), 'empty'

And here's a quick script to clean up a drive:
NOTE THAT I HAVEN'T ACTUALLY RUN THIS ON A LIVE NODE YET!

import errno, os, os.path, sys
if len(sys.argv) != 2:
    sys.exit('%s <mount-point>' % sys.argv[0])
containers = os.path.join(sys.argv[1], 'containers')
for p in os.listdir(containers):
    partition = os.path.join(containers, p)
    for s in os.listdir(partition):
        suffix = os.path.join(partition, s)
        try:
            os.rmdir(suffix)
        except OSError, err:
            if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
                print err

Change-Id: I2e6463a4cd40597fc236ebe3e73b4b31347f2309
This commit is contained in:
gholt 2012-10-25 19:17:57 +00:00
parent f9b49a8052
commit b5509b1bee
2 changed files with 53 additions and 12 deletions

View File

@ -423,8 +423,16 @@ class Replicator(Daemon):
self.logger.timing_since('timing', start_time)
def delete_db(self, object_file):
hash_dir = os.path.dirname(object_file)
suf_dir = os.path.dirname(hash_dir)
with lock_parent_directory(object_file):
shutil.rmtree(os.path.dirname(object_file), True)
shutil.rmtree(hash_dir, True)
try:
os.rmdir(suf_dir)
except OSError, err:
if err.errno not in (errno.ENOENT, errno.ENOTEMPTY):
self.logger.exception(
_('ERROR while trying to clean up %s') % suf_dir)
self.stats['remove'] += 1
device_name = self.extract_device(object_file)
self.logger.increment('removes.' + device_name)

View File

@ -18,6 +18,7 @@ from contextlib import contextmanager
import os
import logging
import errno
from shutil import rmtree
from tempfile import mkdtemp, NamedTemporaryFile
from swift.common import db_replicator
@ -332,20 +333,52 @@ class TestDBReplicator(unittest.TestCase):
replicator.logger = FakeLogger()
temp_dir = mkdtemp()
temp_file = NamedTemporaryFile(dir=temp_dir, delete=False)
try:
temp_suf_dir = os.path.join(temp_dir, '16e')
os.mkdir(temp_suf_dir)
temp_hash_dir = os.path.join(temp_suf_dir,
'166e33924a08ede4204871468c11e16e')
os.mkdir(temp_hash_dir)
temp_file = NamedTemporaryFile(dir=temp_hash_dir, delete=False)
temp_hash_dir2 = os.path.join(temp_suf_dir,
'266e33924a08ede4204871468c11e16e')
os.mkdir(temp_hash_dir2)
temp_file2 = NamedTemporaryFile(dir=temp_hash_dir2, delete=False)
# sanity-checks
self.assertTrue(os.path.exists(temp_dir))
self.assertTrue(os.path.exists(temp_file.name))
self.assertEqual(0, replicator.stats['remove'])
# sanity-checks
self.assertTrue(os.path.exists(temp_dir))
self.assertTrue(os.path.exists(temp_suf_dir))
self.assertTrue(os.path.exists(temp_hash_dir))
self.assertTrue(os.path.exists(temp_file.name))
self.assertTrue(os.path.exists(temp_hash_dir2))
self.assertTrue(os.path.exists(temp_file2.name))
self.assertEqual(0, replicator.stats['remove'])
replicator.delete_db(temp_file.name)
replicator.delete_db(temp_file.name)
self.assertFalse(os.path.exists(temp_dir))
self.assertFalse(os.path.exists(temp_file.name))
self.assertEqual([(('removes.some_device',), {})],
replicator.logger.log_dict['increment'])
self.assertEqual(1, replicator.stats['remove'])
self.assertTrue(os.path.exists(temp_dir))
self.assertTrue(os.path.exists(temp_suf_dir))
self.assertFalse(os.path.exists(temp_hash_dir))
self.assertFalse(os.path.exists(temp_file.name))
self.assertTrue(os.path.exists(temp_hash_dir2))
self.assertTrue(os.path.exists(temp_file2.name))
self.assertEqual([(('removes.some_device',), {})],
replicator.logger.log_dict['increment'])
self.assertEqual(1, replicator.stats['remove'])
replicator.delete_db(temp_file2.name)
self.assertTrue(os.path.exists(temp_dir))
self.assertFalse(os.path.exists(temp_suf_dir))
self.assertFalse(os.path.exists(temp_hash_dir))
self.assertFalse(os.path.exists(temp_file.name))
self.assertFalse(os.path.exists(temp_hash_dir2))
self.assertFalse(os.path.exists(temp_file2.name))
self.assertEqual([(('removes.some_device',), {})] * 2,
replicator.logger.log_dict['increment'])
self.assertEqual(2, replicator.stats['remove'])
finally:
rmtree(temp_dir)
def test_extract_device(self):
replicator = TestReplicator({'devices': '/some/root'})