Ignore auditor status files to prevent replicator reports errors

Ignore `auditor_status_*.json` files during the collecting jobs
and replicator won't use these wrong paths to find objects that
causes an exception to increase failure count in replicator report.

Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com>
Co-Authored-By: Mark Kirkwood <mark.kirkwood@catalyst.net.nz>

Change-Id: Ib15a0987288d9ee32432c1998aefe638ca3b223b
Closes-Bug: #1583305
This commit is contained in:
Charles Hsu 2016-08-11 00:53:13 +08:00
parent 22184eb882
commit 65b1820407
2 changed files with 42 additions and 4 deletions

View File

@ -357,12 +357,12 @@ class ObjectReplicator(Daemon):
handoff_partition_deleted = True
except (Exception, Timeout):
self.logger.exception(_("Error syncing handoff partition"))
self._add_failure_stats(failure_devs_info)
finally:
target_devs_info = set([(target_dev['replication_ip'],
target_dev['device'])
for target_dev in job['nodes']])
self.stats['success'] += len(target_devs_info - failure_devs_info)
self._add_failure_stats(failure_devs_info)
if not handoff_partition_deleted:
self.handoffs_remaining += 1
self.partition_times.append(time.time() - begin)
@ -491,10 +491,10 @@ class ObjectReplicator(Daemon):
self.suffix_count += len(local_hash)
except (Exception, Timeout):
failure_devs_info.update(target_devs_info)
self._add_failure_stats(failure_devs_info)
self.logger.exception(_("Error syncing partition"))
finally:
self.stats['success'] += len(target_devs_info - failure_devs_info)
self._add_failure_stats(failure_devs_info)
self.partition_times.append(time.time() - begin)
self.logger.timing_since('partition.update.timing', begin)
@ -613,6 +613,11 @@ class ObjectReplicator(Daemon):
and partition not in override_partitions):
continue
if (partition.startswith('auditor_status_') and
partition.endswith('.json')):
# ignore auditor status files
continue
part_nodes = None
try:
job_path = join(obj_path, partition)

View File

@ -235,7 +235,7 @@ class TestObjectReplicator(unittest.TestCase):
config,
))
def _write_disk_data(self, disk_name):
def _write_disk_data(self, disk_name, with_json=False):
os.mkdir(os.path.join(self.devices, disk_name))
objects = os.path.join(self.devices, disk_name,
diskfile.get_data_dir(POLICIES[0]))
@ -251,6 +251,13 @@ class TestObjectReplicator(unittest.TestCase):
parts_1[part] = os.path.join(objects_1, part)
os.mkdir(parts_1[part])
if with_json:
for json_file in ['auditor_status_ZBF.json',
'auditor_status_ALL.json']:
for obj_dir in [objects, objects_1]:
with open(os.path.join(obj_dir, json_file), 'w'):
pass
return objects, objects_1, parts, parts_1
def _create_replicator(self):
@ -418,6 +425,32 @@ class TestObjectReplicator(unittest.TestCase):
self.assertEqual(jobs_by_pol_part[part]['path'],
os.path.join(self.objects_1, part[1:]))
def test_collect_jobs_failure_report_with_auditor_stats_json(self):
devs = [
{'id': 0, 'device': 'sda', 'zone': 0,
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
'replication_ip': '127.0.0.0', 'replication_port': 6200},
{'id': 1, 'device': 'sdb', 'zone': 1,
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
'replication_ip': '127.0.0.0', 'replication_port': 6200},
{'id': 2, 'device': 'sdc', 'zone': 2,
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
'replication_ip': '127.0.0.1', 'replication_port': 6200},
{'id': 3, 'device': 'sdd', 'zone': 3,
'region': 1, 'ip': '1.1.1.1', 'port': 1111,
'replication_ip': '127.0.0.1', 'replication_port': 6200},
]
objects_sdb, objects_1_sdb, _, _ = \
self._write_disk_data('sdb', with_json=True)
objects_sdc, objects_1_sdc, _, _ = \
self._write_disk_data('sdc', with_json=True)
objects_sdd, objects_1_sdd, _, _ = \
self._write_disk_data('sdd', with_json=True)
_create_test_rings(self.testdir, devs)
self.replicator.collect_jobs()
self.assertEqual(self.replicator.stats['failure'], 0)
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
def test_collect_jobs_multi_disk(self, mock_shuffle):
devs = [
@ -1599,7 +1632,7 @@ class TestObjectReplicator(unittest.TestCase):
return 2, {'abc': 'def'}
def fake_exc(tester, *args, **kwargs):
if 'Error syncing partition' in args[0]:
if 'Error syncing partition timeout' in args[0]:
tester.i_failed = True
self.i_failed = False