Merge "Fix SSYNC concurrency on partition"
This commit is contained in:
commit
b9d2c08e8d
@ -2585,7 +2585,7 @@ def _get_any_lock(fds):
|
|||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def lock_path(directory, timeout=10, timeout_class=None, limit=1):
|
def lock_path(directory, timeout=10, timeout_class=None, limit=1, name=None):
|
||||||
"""
|
"""
|
||||||
Context manager that acquires a lock on a directory. This will block until
|
Context manager that acquires a lock on a directory. This will block until
|
||||||
the lock can be acquired, or the timeout time has expired (whichever occurs
|
the lock can be acquired, or the timeout time has expired (whichever occurs
|
||||||
@ -2605,6 +2605,8 @@ def lock_path(directory, timeout=10, timeout_class=None, limit=1):
|
|||||||
the same directory at the time this method is called. Note that this
|
the same directory at the time this method is called. Note that this
|
||||||
limit is only applied during the current call to this method and does
|
limit is only applied during the current call to this method and does
|
||||||
not prevent subsequent calls giving a larger limit. Defaults to 1.
|
not prevent subsequent calls giving a larger limit. Defaults to 1.
|
||||||
|
:param name: A string to distinguishes different type of locks in a
|
||||||
|
directory
|
||||||
:raises TypeError: if limit is not an int.
|
:raises TypeError: if limit is not an int.
|
||||||
:raises ValueError: if limit is less than 1.
|
:raises ValueError: if limit is less than 1.
|
||||||
"""
|
"""
|
||||||
@ -2614,6 +2616,8 @@ def lock_path(directory, timeout=10, timeout_class=None, limit=1):
|
|||||||
timeout_class = swift.common.exceptions.LockTimeout
|
timeout_class = swift.common.exceptions.LockTimeout
|
||||||
mkdirs(directory)
|
mkdirs(directory)
|
||||||
lockpath = '%s/.lock' % directory
|
lockpath = '%s/.lock' % directory
|
||||||
|
if name:
|
||||||
|
lockpath += '-%s' % str(name)
|
||||||
fds = [os.open(get_zero_indexed_base_string(lockpath, i),
|
fds = [os.open(get_zero_indexed_base_string(lockpath, i),
|
||||||
os.O_WRONLY | os.O_CREAT)
|
os.O_WRONLY | os.O_CREAT)
|
||||||
for i in range(limit)]
|
for i in range(limit)]
|
||||||
|
@ -1267,23 +1267,34 @@ class BaseDiskFileManager(object):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def replication_lock(self, device):
|
def replication_lock(self, device, policy, partition):
|
||||||
"""
|
"""
|
||||||
A context manager that will lock on the device given, if
|
A context manager that will lock on the device given, if
|
||||||
configured to do so.
|
configured to do so.
|
||||||
|
|
||||||
:param device: name of target device
|
:param device: name of target device
|
||||||
|
:param policy: policy targeted by the replication request
|
||||||
|
:param partition: partition targeted by the replication request
|
||||||
:raises ReplicationLockTimeout: If the lock on the device
|
:raises ReplicationLockTimeout: If the lock on the device
|
||||||
cannot be granted within the configured timeout.
|
cannot be granted within the configured timeout.
|
||||||
"""
|
"""
|
||||||
if self.replication_concurrency_per_device:
|
if self.replication_concurrency_per_device:
|
||||||
dev_path = self.get_dev_path(device)
|
dev_path = self.get_dev_path(device)
|
||||||
|
part_path = os.path.join(dev_path, get_data_dir(policy),
|
||||||
|
str(partition))
|
||||||
|
limit_time = time.time() + self.replication_lock_timeout
|
||||||
with lock_path(
|
with lock_path(
|
||||||
dev_path,
|
dev_path,
|
||||||
timeout=self.replication_lock_timeout,
|
timeout=self.replication_lock_timeout,
|
||||||
timeout_class=ReplicationLockTimeout,
|
timeout_class=ReplicationLockTimeout,
|
||||||
limit=self.replication_concurrency_per_device):
|
limit=self.replication_concurrency_per_device):
|
||||||
yield True
|
with lock_path(
|
||||||
|
part_path,
|
||||||
|
timeout=limit_time - time.time(),
|
||||||
|
timeout_class=ReplicationLockTimeout,
|
||||||
|
limit=1,
|
||||||
|
name='replication'):
|
||||||
|
yield True
|
||||||
else:
|
else:
|
||||||
yield True
|
yield True
|
||||||
|
|
||||||
@ -1574,6 +1585,8 @@ class BaseDiskFileManager(object):
|
|||||||
# This lock is only held by people dealing with the hashes
|
# This lock is only held by people dealing with the hashes
|
||||||
# or the hash invalidations, and we've just removed those.
|
# or the hash invalidations, and we've just removed those.
|
||||||
_unlink_if_present(os.path.join(partition_path, ".lock"))
|
_unlink_if_present(os.path.join(partition_path, ".lock"))
|
||||||
|
_unlink_if_present(os.path.join(partition_path,
|
||||||
|
".lock-replication"))
|
||||||
os.rmdir(partition_path)
|
os.rmdir(partition_path)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
self.logger.debug("Error cleaning up empty partition: %s", err)
|
self.logger.debug("Error cleaning up empty partition: %s", err)
|
||||||
|
@ -158,7 +158,9 @@ class Receiver(object):
|
|||||||
if not self.app.replication_semaphore.acquire(False):
|
if not self.app.replication_semaphore.acquire(False):
|
||||||
raise swob.HTTPServiceUnavailable()
|
raise swob.HTTPServiceUnavailable()
|
||||||
try:
|
try:
|
||||||
with self.diskfile_mgr.replication_lock(self.device):
|
with self.diskfile_mgr.replication_lock(self.device,
|
||||||
|
self.policy,
|
||||||
|
self.partition):
|
||||||
for data in self.missing_check():
|
for data in self.missing_check():
|
||||||
yield data
|
yield data
|
||||||
for data in self.updates():
|
for data in self.updates():
|
||||||
|
@ -1072,6 +1072,28 @@ class TestUtils(unittest.TestCase):
|
|||||||
self.assertTrue(exc2 is not None)
|
self.assertTrue(exc2 is not None)
|
||||||
self.assertTrue(not success)
|
self.assertTrue(not success)
|
||||||
|
|
||||||
|
@with_tempdir
|
||||||
|
def test_lock_path_name(self, tmpdir):
|
||||||
|
# With default limit (1), can't take the same named lock twice
|
||||||
|
success = False
|
||||||
|
with utils.lock_path(tmpdir, 0.1, name='foo'):
|
||||||
|
with self.assertRaises(LockTimeout):
|
||||||
|
with utils.lock_path(tmpdir, 0.1, name='foo'):
|
||||||
|
success = True
|
||||||
|
self.assertFalse(success)
|
||||||
|
# With default limit (1), can take two differently named locks
|
||||||
|
success = False
|
||||||
|
with utils.lock_path(tmpdir, 0.1, name='foo'):
|
||||||
|
with utils.lock_path(tmpdir, 0.1, name='bar'):
|
||||||
|
success = True
|
||||||
|
self.assertTrue(success)
|
||||||
|
# With default limit (1), can take a named lock and the default lock
|
||||||
|
success = False
|
||||||
|
with utils.lock_path(tmpdir, 0.1, name='foo'):
|
||||||
|
with utils.lock_path(tmpdir, 0.1):
|
||||||
|
success = True
|
||||||
|
self.assertTrue(success)
|
||||||
|
|
||||||
def test_normalize_timestamp(self):
|
def test_normalize_timestamp(self):
|
||||||
# Test swift.common.utils.normalize_timestamp
|
# Test swift.common.utils.normalize_timestamp
|
||||||
self.assertEqual(utils.normalize_timestamp('1253327593.48174'),
|
self.assertEqual(utils.normalize_timestamp('1253327593.48174'),
|
||||||
|
@ -1080,9 +1080,11 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
self.df_mgr.replication_concurrency_per_device = 1
|
self.df_mgr.replication_concurrency_per_device = 1
|
||||||
self.df_mgr.replication_lock_timeout = 0.1
|
self.df_mgr.replication_lock_timeout = 0.1
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
with self.assertRaises(ReplicationLockTimeout):
|
with self.assertRaises(ReplicationLockTimeout):
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
success = True
|
success = True
|
||||||
self.assertFalse(success)
|
self.assertFalse(success)
|
||||||
|
|
||||||
@ -1093,9 +1095,11 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
|
|
||||||
# 2 locks must succeed
|
# 2 locks must succeed
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
try:
|
try:
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
success = True
|
success = True
|
||||||
except ReplicationLockTimeout as err:
|
except ReplicationLockTimeout as err:
|
||||||
self.fail('Unexpected exception: %s' % err)
|
self.fail('Unexpected exception: %s' % err)
|
||||||
@ -1103,10 +1107,13 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
|
|
||||||
# 3 locks must succeed
|
# 3 locks must succeed
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
POLICIES.legacy, '1'):
|
||||||
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
try:
|
try:
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '3'):
|
||||||
success = True
|
success = True
|
||||||
except ReplicationLockTimeout as err:
|
except ReplicationLockTimeout as err:
|
||||||
self.fail('Unexpected exception: %s' % err)
|
self.fail('Unexpected exception: %s' % err)
|
||||||
@ -1119,9 +1126,11 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
|
|
||||||
# 2 locks with replication_concurrency_per_device=2 must succeed
|
# 2 locks with replication_concurrency_per_device=2 must succeed
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
try:
|
try:
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
success = True
|
success = True
|
||||||
except ReplicationLockTimeout as err:
|
except ReplicationLockTimeout as err:
|
||||||
self.fail('Unexpected exception: %s' % err)
|
self.fail('Unexpected exception: %s' % err)
|
||||||
@ -1129,10 +1138,13 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
|
|
||||||
# 3 locks with replication_concurrency_per_device=2 must fail
|
# 3 locks with replication_concurrency_per_device=2 must fail
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
POLICIES.legacy, '1'):
|
||||||
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
with self.assertRaises(ReplicationLockTimeout):
|
with self.assertRaises(ReplicationLockTimeout):
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '3'):
|
||||||
success = True
|
success = True
|
||||||
self.assertFalse(success)
|
self.assertFalse(success)
|
||||||
|
|
||||||
@ -1141,14 +1153,29 @@ class DiskFileManagerMixin(BaseDiskFileTestMixin):
|
|||||||
self.df_mgr.replication_concurrency_per_device = 1
|
self.df_mgr.replication_concurrency_per_device = 1
|
||||||
self.df_mgr.replication_lock_timeout = 0.1
|
self.df_mgr.replication_lock_timeout = 0.1
|
||||||
success = False
|
success = False
|
||||||
with self.df_mgr.replication_lock(self.existing_device):
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
try:
|
try:
|
||||||
with self.df_mgr.replication_lock(self.existing_device2):
|
with self.df_mgr.replication_lock(self.existing_device2,
|
||||||
|
POLICIES.legacy, '2'):
|
||||||
success = True
|
success = True
|
||||||
except ReplicationLockTimeout as err:
|
except ReplicationLockTimeout as err:
|
||||||
self.fail('Unexpected exception: %s' % err)
|
self.fail('Unexpected exception: %s' % err)
|
||||||
self.assertTrue(success)
|
self.assertTrue(success)
|
||||||
|
|
||||||
|
def test_replication_lock_same_partition(self):
|
||||||
|
# Double check settings
|
||||||
|
self.df_mgr.replication_concurrency_per_device = 2
|
||||||
|
self.df_mgr.replication_lock_timeout = 0.1
|
||||||
|
success = False
|
||||||
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
|
with self.assertRaises(ReplicationLockTimeout):
|
||||||
|
with self.df_mgr.replication_lock(self.existing_device,
|
||||||
|
POLICIES.legacy, '1'):
|
||||||
|
success = True
|
||||||
|
self.assertFalse(success)
|
||||||
|
|
||||||
def test_missing_splice_warning(self):
|
def test_missing_splice_warning(self):
|
||||||
with mock.patch('swift.common.splice.splice._c_splice', None):
|
with mock.patch('swift.common.splice.splice._c_splice', None):
|
||||||
self.conf['splice'] = 'yes'
|
self.conf['splice'] = 'yes'
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
@ -131,7 +132,9 @@ class TestReceiver(unittest.TestCase):
|
|||||||
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
|
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
|
||||||
':UPDATES: START', ':UPDATES: END'])
|
':UPDATES: START', ':UPDATES: END'])
|
||||||
self.assertEqual(resp.status_int, 200)
|
self.assertEqual(resp.status_int, 200)
|
||||||
mocked_replication_lock.assert_called_once_with('sda1')
|
mocked_replication_lock.assert_called_once_with('sda1',
|
||||||
|
POLICIES.legacy,
|
||||||
|
'1')
|
||||||
|
|
||||||
def test_Receiver_with_default_storage_policy(self):
|
def test_Receiver_with_default_storage_policy(self):
|
||||||
req = swob.Request.blank(
|
req = swob.Request.blank(
|
||||||
@ -290,7 +293,7 @@ class TestReceiver(unittest.TestCase):
|
|||||||
self.controller, req)
|
self.controller, req)
|
||||||
|
|
||||||
def test_SSYNC_replication_lock_fail(self):
|
def test_SSYNC_replication_lock_fail(self):
|
||||||
def _mock(path):
|
def _mock(path, policy, partition):
|
||||||
with exceptions.ReplicationLockTimeout(0.01, '/somewhere/' + path):
|
with exceptions.ReplicationLockTimeout(0.01, '/somewhere/' + path):
|
||||||
eventlet.sleep(0.05)
|
eventlet.sleep(0.05)
|
||||||
with mock.patch.object(
|
with mock.patch.object(
|
||||||
@ -311,6 +314,54 @@ class TestReceiver(unittest.TestCase):
|
|||||||
'None/sda1/1 SSYNC LOCK TIMEOUT: 0.01 seconds: '
|
'None/sda1/1 SSYNC LOCK TIMEOUT: 0.01 seconds: '
|
||||||
'/somewhere/sda1')
|
'/somewhere/sda1')
|
||||||
|
|
||||||
|
def test_SSYNC_replication_lock_per_partition(self):
|
||||||
|
def _concurrent_ssync(path1, path2):
|
||||||
|
env = {'REQUEST_METHOD': 'SSYNC'}
|
||||||
|
body = ':MISSING_CHECK: START\r\n:MISSING_CHECK: END\r\n' \
|
||||||
|
':UPDATES: START\r\n:UPDATES: END\r\n'
|
||||||
|
req1 = swob.Request.blank(path1, environ=env, body=body)
|
||||||
|
req2 = swob.Request.blank(path2, environ=env, body=body)
|
||||||
|
|
||||||
|
rcvr1 = ssync_receiver.Receiver(self.controller, req1)
|
||||||
|
rcvr2 = ssync_receiver.Receiver(self.controller, req2)
|
||||||
|
|
||||||
|
body_lines1 = []
|
||||||
|
body_lines2 = []
|
||||||
|
|
||||||
|
for chunk1, chunk2 in itertools.izip_longest(rcvr1(), rcvr2()):
|
||||||
|
if chunk1 and chunk1.strip():
|
||||||
|
body_lines1.append(chunk1.strip())
|
||||||
|
if chunk2 and chunk2.strip():
|
||||||
|
body_lines2.append(chunk2.strip())
|
||||||
|
|
||||||
|
return body_lines1, body_lines2
|
||||||
|
|
||||||
|
self.controller._diskfile_router[POLICIES[0]]\
|
||||||
|
.replication_lock_timeout = 0.01
|
||||||
|
self.controller._diskfile_router[POLICIES[0]]\
|
||||||
|
.replication_concurrency_per_device = 2
|
||||||
|
# It should be possible to lock two different partitions
|
||||||
|
body_lines1, body_lines2 = _concurrent_ssync('/sda1/1', '/sda1/2')
|
||||||
|
self.assertEqual(
|
||||||
|
body_lines1,
|
||||||
|
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
|
||||||
|
':UPDATES: START', ':UPDATES: END'])
|
||||||
|
self.assertEqual(
|
||||||
|
body_lines2,
|
||||||
|
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
|
||||||
|
':UPDATES: START', ':UPDATES: END'])
|
||||||
|
|
||||||
|
# It should not be possible to lock the same partition twice
|
||||||
|
body_lines1, body_lines2 = _concurrent_ssync('/sda1/1', '/sda1/1')
|
||||||
|
self.assertEqual(
|
||||||
|
body_lines1,
|
||||||
|
[':MISSING_CHECK: START', ':MISSING_CHECK: END',
|
||||||
|
':UPDATES: START', ':UPDATES: END'])
|
||||||
|
self.assertRegexpMatches(
|
||||||
|
''.join(body_lines2),
|
||||||
|
"^:ERROR: 0 '0\.0[0-9]+ seconds: "
|
||||||
|
"/.+/sda1/objects/1/.lock-replication'$")
|
||||||
|
|
||||||
def test_SSYNC_initial_path(self):
|
def test_SSYNC_initial_path(self):
|
||||||
with mock.patch.object(
|
with mock.patch.object(
|
||||||
self.controller, 'replication_semaphore') as \
|
self.controller, 'replication_semaphore') as \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user