Make rsync ignore it's own temporary files

In situations where rsync may inadvertently be unable to cleanup it's
temporary files we shouldn't spread them around the cluster.

By asking our rsync subexec to --exclude patterns that match it's own
convention for temporary naming we'll only ever transfer real replicated
artifacts and never temporary artifacts which should always be ignored
until they are fully transfered.

Cleanup of stale rsync droppings should be performed by the auditor and
will be addressed in a separate change related to lp bug #1554005.

Closes-Bug: #1553995

Change-Id: Ibe598b339af024d05e4d89c34d696e972d8189ff
This commit is contained in:
Clay Gerrard 2016-03-14 16:52:50 -07:00
parent 9e90d30962
commit f87a5487b5
3 changed files with 75 additions and 0 deletions

View File

@ -227,6 +227,7 @@ class ObjectReplicator(Daemon):
'--timeout=%s' % self.rsync_io_timeout,
'--contimeout=%s' % self.rsync_io_timeout,
'--bwlimit=%s' % self.rsync_bwlimit,
'--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6))
]
if self.rsync_compress and \
job['region'] != node['region']:

View File

@ -19,6 +19,7 @@ from uuid import uuid4
import random
from hashlib import md5
from collections import defaultdict
import os
from swiftclient import client
@ -82,6 +83,22 @@ class TestObjectHandoff(ReplProbeTest):
raise Exception('Direct object GET did not return VERIFY, instead '
'it returned: %s' % repr(odata))
# drop a tempfile in the handoff's datadir, like it might have
# had if there was an rsync failure while it was previously a
# primary
handoff_device_path = self.device_dir('object', another_onode)
data_filename = None
for root, dirs, files in os.walk(handoff_device_path):
for filename in files:
if filename.endswith('.data'):
data_filename = filename
temp_filename = '.%s.6MbL6r' % data_filename
temp_filepath = os.path.join(root, temp_filename)
if not data_filename:
self.fail('Did not find any data files on %r' %
handoff_device_path)
open(temp_filepath, 'w')
# Assert container listing (via proxy and directly) has container/obj
objs = [o['name'] for o in
client.get_container(self.url, self.token, container)[1]]
@ -134,6 +151,20 @@ class TestObjectHandoff(ReplProbeTest):
raise Exception('Direct object GET did not return VERIFY, instead '
'it returned: %s' % repr(odata))
# and that it does *not* have a temporary rsync dropping!
found_data_filename = False
primary_device_path = self.device_dir('object', onode)
for root, dirs, files in os.walk(primary_device_path):
for filename in files:
if filename.endswith('.6MbL6r'):
self.fail('Found unexpected file %s' %
os.path.join(root, filename))
if filename == data_filename:
found_data_filename = True
self.assertTrue(found_data_filename,
'Did not find data file %r on %r' % (
data_filename, primary_device_path))
# Assert the handoff server no longer has container/obj
try:
direct_client.direct_get_object(

View File

@ -910,6 +910,49 @@ class TestObjectReplicator(unittest.TestCase):
jobs = self.replicator.collect_jobs()
self.assertEqual(len(jobs), 0)
def test_replicator_skips_rsync_temp_files(self):
# the empty pre-setup dirs aren't that useful to us
device_path = os.path.join(self.devices, 'sda')
rmtree(device_path, ignore_errors=1)
os.mkdir(device_path)
# create a real data file to trigger rsync
df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o',
policy=POLICIES.legacy)
ts = next(self.ts)
with df.create() as w:
w.write('asdf')
w.put({'X-Timestamp': ts.internal})
w.commit(ts)
# pre-flight and post sync request for both other primaries
expected_replicate_requests = 4
process_arg_checker = [
# (return_code, stdout, <each in capture rsync args>)
(0, '', []),
(0, '', []),
]
stub_body = pickle.dumps({})
with _mock_process(process_arg_checker) as rsync_log, \
mock.patch('swift.obj.replicator.whataremyips',
side_effect=_ips), \
mocked_http_conn(*[200] * expected_replicate_requests,
body=stub_body) as conn_log:
self.replicator.replicate()
self.assertEqual(['REPLICATE'] * expected_replicate_requests,
[r['method'] for r in conn_log.requests])
# expect one rsync to each other primary node
self.assertEqual(2, len(rsync_log))
expected = '--exclude=.*.[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]' \
'[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]'
for subprocess_info in rsync_log:
rsync_args = subprocess_info['rsync_args']
for arg in rsync_args:
if arg.startswith('--exclude'):
self.assertEqual(arg, expected)
break
else:
self.fail('Did not find --exclude argument in %r' %
rsync_args)
def test_replicator_removes_zbf(self):
# After running xfs_repair, a partition directory could become a
# zero-byte file. If this happens, the replicator should clean it