From f87a5487b5224f77261d82a1087d31820c29e8f8 Mon Sep 17 00:00:00 2001 From: Clay Gerrard Date: Mon, 14 Mar 2016 16:52:50 -0700 Subject: [PATCH] Make rsync ignore it's own temporary files In situations where rsync may inadvertently be unable to cleanup it's temporary files we shouldn't spread them around the cluster. By asking our rsync subexec to --exclude patterns that match it's own convention for temporary naming we'll only ever transfer real replicated artifacts and never temporary artifacts which should always be ignored until they are fully transfered. Cleanup of stale rsync droppings should be performed by the auditor and will be addressed in a separate change related to lp bug #1554005. Closes-Bug: #1553995 Change-Id: Ibe598b339af024d05e4d89c34d696e972d8189ff --- swift/obj/replicator.py | 1 + test/probe/test_object_handoff.py | 31 ++++++++++++++++++++++ test/unit/obj/test_replicator.py | 43 +++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py index aa38407d35..2d6a38bd3d 100644 --- a/swift/obj/replicator.py +++ b/swift/obj/replicator.py @@ -227,6 +227,7 @@ class ObjectReplicator(Daemon): '--timeout=%s' % self.rsync_io_timeout, '--contimeout=%s' % self.rsync_io_timeout, '--bwlimit=%s' % self.rsync_bwlimit, + '--exclude=.*.%s' % ''.join('[0-9a-zA-Z]' for i in range(6)) ] if self.rsync_compress and \ job['region'] != node['region']: diff --git a/test/probe/test_object_handoff.py b/test/probe/test_object_handoff.py index f3b02c53cd..a360021b7c 100755 --- a/test/probe/test_object_handoff.py +++ b/test/probe/test_object_handoff.py @@ -19,6 +19,7 @@ from uuid import uuid4 import random from hashlib import md5 from collections import defaultdict +import os from swiftclient import client @@ -82,6 +83,22 @@ class TestObjectHandoff(ReplProbeTest): raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) + # drop a tempfile in the handoff's datadir, like it might have + # had if there was an rsync failure while it was previously a + # primary + handoff_device_path = self.device_dir('object', another_onode) + data_filename = None + for root, dirs, files in os.walk(handoff_device_path): + for filename in files: + if filename.endswith('.data'): + data_filename = filename + temp_filename = '.%s.6MbL6r' % data_filename + temp_filepath = os.path.join(root, temp_filename) + if not data_filename: + self.fail('Did not find any data files on %r' % + handoff_device_path) + open(temp_filepath, 'w') + # Assert container listing (via proxy and directly) has container/obj objs = [o['name'] for o in client.get_container(self.url, self.token, container)[1]] @@ -134,6 +151,20 @@ class TestObjectHandoff(ReplProbeTest): raise Exception('Direct object GET did not return VERIFY, instead ' 'it returned: %s' % repr(odata)) + # and that it does *not* have a temporary rsync dropping! + found_data_filename = False + primary_device_path = self.device_dir('object', onode) + for root, dirs, files in os.walk(primary_device_path): + for filename in files: + if filename.endswith('.6MbL6r'): + self.fail('Found unexpected file %s' % + os.path.join(root, filename)) + if filename == data_filename: + found_data_filename = True + self.assertTrue(found_data_filename, + 'Did not find data file %r on %r' % ( + data_filename, primary_device_path)) + # Assert the handoff server no longer has container/obj try: direct_client.direct_get_object( diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py index a32fa99806..a3220887ed 100644 --- a/test/unit/obj/test_replicator.py +++ b/test/unit/obj/test_replicator.py @@ -910,6 +910,49 @@ class TestObjectReplicator(unittest.TestCase): jobs = self.replicator.collect_jobs() self.assertEqual(len(jobs), 0) + def test_replicator_skips_rsync_temp_files(self): + # the empty pre-setup dirs aren't that useful to us + device_path = os.path.join(self.devices, 'sda') + rmtree(device_path, ignore_errors=1) + os.mkdir(device_path) + # create a real data file to trigger rsync + df = self.df_mgr.get_diskfile('sda', '0', 'a', 'c', 'o', + policy=POLICIES.legacy) + ts = next(self.ts) + with df.create() as w: + w.write('asdf') + w.put({'X-Timestamp': ts.internal}) + w.commit(ts) + # pre-flight and post sync request for both other primaries + expected_replicate_requests = 4 + process_arg_checker = [ + # (return_code, stdout, ) + (0, '', []), + (0, '', []), + ] + stub_body = pickle.dumps({}) + with _mock_process(process_arg_checker) as rsync_log, \ + mock.patch('swift.obj.replicator.whataremyips', + side_effect=_ips), \ + mocked_http_conn(*[200] * expected_replicate_requests, + body=stub_body) as conn_log: + self.replicator.replicate() + self.assertEqual(['REPLICATE'] * expected_replicate_requests, + [r['method'] for r in conn_log.requests]) + # expect one rsync to each other primary node + self.assertEqual(2, len(rsync_log)) + expected = '--exclude=.*.[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]' \ + '[0-9a-zA-Z][0-9a-zA-Z][0-9a-zA-Z]' + for subprocess_info in rsync_log: + rsync_args = subprocess_info['rsync_args'] + for arg in rsync_args: + if arg.startswith('--exclude'): + self.assertEqual(arg, expected) + break + else: + self.fail('Did not find --exclude argument in %r' % + rsync_args) + def test_replicator_removes_zbf(self): # After running xfs_repair, a partition directory could become a # zero-byte file. If this happens, the replicator should clean it