From 091edb06310ef7210d4a476ad5b4cbbb03de3551 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Thu, 13 Jul 2023 13:45:20 -0700 Subject: [PATCH] Retry SQLite DB write failures due to locks Adds a database retry decorator to capture and retry exceptions rooted in SQLite locking. These locking errors are rooted in the fact that essentially, we can only have one distinct writer at a time. This writer becomes transaction oriented as well. Unfortunately with our green threads and API surface, we run into cases where we have background operations (mainly, periodic tasks...) and API surface transacations which need to operate against the DB as well. Because we can't say one task or another (realistically speaking) can have exclusive control and access, then we run into database locking errors. So when we encounter a lock error, we retry. Adds two additional configuration parameters to the database configuration section, to allow this capability to be further tuned, as file IO performance is *surely* a contributing factor to our locking issues as we mostly see them with a loaded CI system where other issues begin to crop up. The new parameters are as follows: * sqlite_retries, a boolean value allowing the retry logic to be disabled. This can largely be ignored, but is available as it was logical to include. * sqlite_max_wait_for_retry, a integer value, default 30 seconds as to how long to wait for retrying SQLite database operations which are failing due to a "database is locked" error. The retry logic uses the tenacity library, and performs an expoential backoff. Setting the amount of time to a very large number is not advisable, as such the default of 30 seconds was deemed reasonable. Change-Id: Ifeb92e9f23a94f2d96bb495fe63a71df9865fef3 --- ironic/conf/database.py | 10 +++- ironic/db/sqlalchemy/api.py | 47 +++++++++++++++++++ ironic/tests/unit/db/base.py | 2 + ironic/tests/unit/db/test_nodes.py | 38 +++++++++++++-- ironic/tests/unit/objects/test_bios.py | 30 +++++++----- ironic/tests/unit/objects/test_node.py | 17 +++---- .../tests/unit/objects/test_node_inventory.py | 10 ++-- ironic/tests/unit/objects/test_port.py | 15 +++--- ...dd-sqlite-db-retries-f493d5d7aa6db78b.yaml | 25 ++++++++++ 9 files changed, 160 insertions(+), 34 deletions(-) create mode 100644 releasenotes/notes/add-sqlite-db-retries-f493d5d7aa6db78b.yaml diff --git a/ironic/conf/database.py b/ironic/conf/database.py index fccf2f437d..37986e1cd3 100644 --- a/ironic/conf/database.py +++ b/ironic/conf/database.py @@ -20,7 +20,15 @@ from ironic.common.i18n import _ opts = [ cfg.StrOpt('mysql_engine', default='InnoDB', - help=_('MySQL engine to use.')) + help=_('MySQL engine to use.')), + cfg.BoolOpt('sqlite_retries', + default=True, + help=_('If SQLite database operation retry logic is enabled ' + 'or not. Enabled by default.')), + cfg.IntOpt('sqlite_max_wait_for_retry', + default=30, + help=_('Maximum number of seconds to retry SQLite database ' + 'locks.')), ] diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index cbacf32024..5c976e8152 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -16,7 +16,9 @@ import collections import datetime +import functools import json +import logging import threading from oslo_concurrency import lockutils @@ -37,6 +39,7 @@ from sqlalchemy.exc import NoResultFound, MultipleResultsFound from sqlalchemy.orm import Load from sqlalchemy.orm import selectinload from sqlalchemy import sql +import tenacity from ironic.common import exception from ironic.common.i18n import _ @@ -63,6 +66,33 @@ synchronized = lockutils.synchronized_with_prefix('ironic-') MAX_TRAITS_PER_NODE = 50 +def wrap_sqlite_retry(f): + + @functools.wraps(f) + def wrapper(*args, **kwargs): + if (CONF.database.sqlite_retries + and not utils.is_ironic_using_sqlite()): + return f(*args, **kwargs) + else: + for attempt in tenacity.Retrying( + retry=( + tenacity.retry_if_exception_type( + sa.exc.OperationalError) + & tenacity.retry_if_exception( + lambda e: 'database is locked' in str(e)) + ), + wait=tenacity.wait_full_jitter( + multiplier=0.25, + max=CONF.database.sqlite_max_wait_for_retry), + before_sleep=( + tenacity.before_sleep_log(LOG, logging.debug) + ), + reraise=True): + with attempt: + return f(*args, **kwargs) + return wrapper + + def get_backend(): """The backend is this module itself.""" return Connection() @@ -653,6 +683,7 @@ class Connection(api.Connection): return mapping @synchronized(RESERVATION_SEMAPHORE, fair=True) + @wrap_sqlite_retry def _reserve_node_place_lock(self, tag, node_id, node): try: # NOTE(TheJulia): We explicitly do *not* synch the session @@ -705,6 +736,7 @@ class Connection(api.Connection): # Return a node object as that is the contract for this method. return self.get_node_by_id(node.id) + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def release_node(self, tag, node_id): with _session_for_read() as session: @@ -734,6 +766,7 @@ class Connection(api.Connection): except NoResultFound: raise exception.NodeNotFound(node=node_id) + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def create_node(self, values): # ensure defaults are present for new nodes @@ -839,6 +872,7 @@ class Connection(api.Connection): raise exception.InstanceNotFound(instance_uuid=instance) return res + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def destroy_node(self, node_id): with _session_for_write() as session: @@ -914,6 +948,7 @@ class Connection(api.Connection): query.delete() + @wrap_sqlite_retry def update_node(self, node_id, values): # NOTE(dtantsur): this can lead to very strange errors if 'uuid' in values: @@ -1059,6 +1094,7 @@ class Connection(api.Connection): return _paginate_query(models.Port, limit, marker, sort_key, sort_dir, query) + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def create_port(self, values): if not values.get('uuid'): @@ -1076,6 +1112,7 @@ class Connection(api.Connection): raise exception.PortAlreadyExists(uuid=values['uuid']) return port + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def update_port(self, port_id, values): # NOTE(dtantsur): this can lead to very strange errors @@ -1098,6 +1135,7 @@ class Connection(api.Connection): raise exception.MACAlreadyExists(mac=values['address']) return ref + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def destroy_port(self, port_id): with _session_for_write() as session: @@ -1498,6 +1536,7 @@ class Connection(api.Connection): .filter_by(conductor_id=conductor_id)) query.delete() + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def touch_node_provisioning(self, node_id): with _session_for_write() as session: @@ -2049,6 +2088,7 @@ class Connection(api.Connection): models.NodeTrait).filter_by(node_id=node_id, trait=trait) return session.query(q.exists()).scalar() + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def create_bios_setting_list(self, node_id, settings, version): bios_settings = [] @@ -2078,6 +2118,7 @@ class Connection(api.Connection): node=node_id, name=setting['name']) return bios_settings + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def update_bios_setting_list(self, node_id, settings, version): bios_settings = [] @@ -2109,6 +2150,7 @@ class Connection(api.Connection): node=node_id, name=setting['name']) return bios_settings + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def delete_bios_setting_list(self, node_id, names): missing_bios_settings = [] @@ -2635,6 +2677,7 @@ class Connection(api.Connection): # ordered ascending originally. return final_set + @wrap_sqlite_retry def bulk_delete_node_history_records(self, entries): with _session_for_write() as session: # Uses input entry list, selects entries matching those ids @@ -2672,6 +2715,7 @@ class Connection(api.Connection): ) return res + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def create_node_inventory(self, values): inventory = models.NodeInventory() @@ -2686,6 +2730,7 @@ class Connection(api.Connection): session.flush() return inventory + @wrap_sqlite_retry @oslo_db_api.retry_on_deadlock def destroy_node_inventory_by_node_id(self, node_id): with _session_for_write() as session: @@ -2748,6 +2793,7 @@ class Connection(api.Connection): return shard_list + @wrap_sqlite_retry def create_firmware_component(self, values): """Create a FirmwareComponent record for a given node. @@ -2779,6 +2825,7 @@ class Connection(api.Connection): name=values['component'], node=values['node_id']) return fw_component + @wrap_sqlite_retry def update_firmware_component(self, node_id, component, values): """Update a FirmwareComponent record. diff --git a/ironic/tests/unit/db/base.py b/ironic/tests/unit/db/base.py index 8541e53642..e251b067e7 100644 --- a/ironic/tests/unit/db/base.py +++ b/ironic/tests/unit/db/base.py @@ -43,6 +43,8 @@ class Database(fixtures.Fixture): self.post_migrations() self._DB = "".join(line for line in conn.connection.iterdump()) self.engine.dispose() + # Disable retry logic for unit testing, to minimize overhead. + CONF.set_override('sqlite_retries', False, group='database') def setup_sqlite(self, db_migrate): if db_migrate.version(): diff --git a/ironic/tests/unit/db/test_nodes.py b/ironic/tests/unit/db/test_nodes.py index e8c0fa6e48..ba93ec7122 100644 --- a/ironic/tests/unit/db/test_nodes.py +++ b/ironic/tests/unit/db/test_nodes.py @@ -18,17 +18,27 @@ import datetime from unittest import mock +from oslo_config import cfg from oslo_utils import timeutils from oslo_utils import uuidutils -from sqlalchemy.orm import exc as sa_exc +from sqlalchemy import exc as sa_exc +# NOTE(TheJulia): At some point, we should drop the line below, +# as I understand the ORM exceptions are going to move completely +# after SQLAlchemy 2.0 to just the base exception class. +from sqlalchemy.orm import exc as sa_orm_exc from ironic.common import exception from ironic.common import states +from ironic.db.sqlalchemy import api as dbapi +from ironic.db.sqlalchemy.api import Connection as db_conn from ironic.db.sqlalchemy.models import NodeInventory from ironic.tests.unit.db import base from ironic.tests.unit.db import utils +CONF = cfg.CONF + + class DbNodeTestCase(base.DbTestCase): def test_create_node(self): @@ -535,7 +545,7 @@ class DbNodeTestCase(base.DbTestCase): 'deploy_interface', 'boot_interface', 'driver', 'extra']: try: - self.assertRaises(sa_exc.DetachedInstanceError, + self.assertRaises(sa_orm_exc.DetachedInstanceError, _attempt_field_access, r, field) except AttributeError: pass @@ -592,7 +602,7 @@ class DbNodeTestCase(base.DbTestCase): 'driver', 'extra', 'power_state', 'traits']: try: - self.assertRaises(sa_exc.DetachedInstanceError, + self.assertRaises(sa_orm_exc.DetachedInstanceError, _attempt_field_access, r, field) except AttributeError: # We expect an AttributeError, in addition to @@ -847,6 +857,28 @@ class DbNodeTestCase(base.DbTestCase): self.assertRaises(exception.NodeNotFound, self.dbapi.update_node, node_uuid, {'extra': new_extra}) + @mock.patch.object(dbapi, 'LOG', autospec=True) + def test_update_node_retries(self, log_mock): + """Test retry logic to ensure it works.""" + node = utils.create_test_node() + CONF.set_override('sqlite_retries', True, group='database') + # NOTE(TheJulia): Update is an ideal place to test retries + # as the underlying work is done by _do_update_node. + with mock.patch.object(db_conn, '_do_update_node', + autospec=True) as mock_update: + sa_err = sa_exc.OperationalError( + statement=None, + params=None, + orig=Exception('database is locked')) + mock_update.side_effect = [ + sa_err, + sa_err, + node + ] + self.dbapi.update_node(node.id, {'extra': {'foo': 'bar'}}) + self.assertEqual(3, mock_update.call_count) + self.assertEqual(2, log_mock.log.call_count) + def test_update_node_uuid(self): node = utils.create_test_node() self.assertRaises(exception.InvalidParameterValue, diff --git a/ironic/tests/unit/objects/test_bios.py b/ironic/tests/unit/objects/test_bios.py index 5f4d8b9993..9f7f4e7465 100644 --- a/ironic/tests/unit/objects/test_bios.py +++ b/ironic/tests/unit/objects/test_bios.py @@ -15,6 +15,7 @@ from unittest import mock from ironic.common import context from ironic.db import api as dbapi +from ironic.db.sqlalchemy.api import Connection as db_conn from ironic import objects from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -71,7 +72,7 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): self.assertEqual(bios_setting2['name'], bios_obj_list[1].name) self.assertEqual(bios_setting2['value'], bios_obj_list[1].value) - @mock.patch.object(dbapi.IMPL, 'create_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'create_bios_setting_list', autospec=True) def test_create(self, mock_create_list): fake_call_args = {'node_id': self.bios_setting['node_id'], 'name': self.bios_setting['name'], @@ -97,7 +98,8 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): mock_create_list.return_value = [self.bios_setting] mock_create_list.call_args bios_obj.create() - mock_create_list.assert_called_once_with(self.bios_setting['node_id'], + mock_create_list.assert_called_once_with(mock.ANY, + self.bios_setting['node_id'], setting, self.bios_setting['version']) self.assertEqual(self.bios_setting['node_id'], bios_obj.node_id) @@ -113,7 +115,7 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): bios_obj.reset_required) self.assertEqual(self.bios_setting['unique'], bios_obj.unique) - @mock.patch.object(dbapi.IMPL, 'update_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'update_bios_setting_list', autospec=True) def test_save(self, mock_update_list): fake_call_args = {'node_id': self.bios_setting['node_id'], 'name': self.bios_setting['name'], @@ -131,14 +133,15 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): mock_update_list.return_value = [self.bios_setting] mock_update_list.call_args bios_obj.save() - mock_update_list.assert_called_once_with(self.bios_setting['node_id'], + mock_update_list.assert_called_once_with(mock.ANY, + self.bios_setting['node_id'], setting, self.bios_setting['version']) self.assertEqual(self.bios_setting['node_id'], bios_obj.node_id) self.assertEqual(self.bios_setting['name'], bios_obj.name) self.assertEqual(self.bios_setting['value'], bios_obj.value) - @mock.patch.object(dbapi.IMPL, 'create_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'create_bios_setting_list', autospec=True) def test_list_create(self, mock_create_list): bios_setting2 = db_utils.get_test_bios_setting(name='hyperthread', value='enabled') @@ -147,7 +150,8 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): bios_obj_list = objects.BIOSSettingList.create( self.context, self.node_id, settings) - mock_create_list.assert_called_once_with(self.node_id, settings, '1.1') + mock_create_list.assert_called_once_with(mock.ANY, self.node_id, + settings, '1.1') self.assertEqual(self.context, bios_obj_list._context) self.assertEqual(2, len(bios_obj_list)) self.assertEqual(self.bios_setting['node_id'], @@ -157,7 +161,7 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): self.assertEqual(bios_setting2['node_id'], bios_obj_list[1].node_id) self.assertEqual(bios_setting2['name'], bios_obj_list[1].name) - @mock.patch.object(dbapi.IMPL, 'update_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'update_bios_setting_list', autospec=True) def test_list_save(self, mock_update_list): bios_setting2 = db_utils.get_test_bios_setting(name='hyperthread', value='enabled') @@ -166,7 +170,8 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): bios_obj_list = objects.BIOSSettingList.save( self.context, self.node_id, settings) - mock_update_list.assert_called_once_with(self.node_id, settings, '1.1') + mock_update_list.assert_called_once_with(mock.ANY, self.node_id, + settings, '1.1') self.assertEqual(self.context, bios_obj_list._context) self.assertEqual(2, len(bios_obj_list)) self.assertEqual(self.bios_setting['node_id'], @@ -177,19 +182,20 @@ class TestBIOSSettingObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): self.assertEqual(bios_setting2['name'], bios_obj_list[1].name) self.assertEqual(bios_setting2['value'], bios_obj_list[1].value) - @mock.patch.object(dbapi.IMPL, 'delete_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'delete_bios_setting_list', autospec=True) def test_delete(self, mock_delete): objects.BIOSSetting.delete(self.context, self.node_id, self.bios_setting['name']) - mock_delete.assert_called_once_with(self.node_id, + mock_delete.assert_called_once_with(mock.ANY, + self.node_id, [self.bios_setting['name']]) - @mock.patch.object(dbapi.IMPL, 'delete_bios_setting_list', autospec=True) + @mock.patch.object(db_conn, 'delete_bios_setting_list', autospec=True) def test_list_delete(self, mock_delete): bios_setting2 = db_utils.get_test_bios_setting(name='hyperthread') name_list = [self.bios_setting['name'], bios_setting2['name']] objects.BIOSSettingList.delete(self.context, self.node_id, name_list) - mock_delete.assert_called_once_with(self.node_id, name_list) + mock_delete.assert_called_once_with(mock.ANY, self.node_id, name_list) @mock.patch('ironic.objects.bios.BIOSSettingList.get_by_node_id', spec_set=types.FunctionType) diff --git a/ironic/tests/unit/objects/test_node.py b/ironic/tests/unit/objects/test_node.py index 05793e3a61..b2322a7a4f 100644 --- a/ironic/tests/unit/objects/test_node.py +++ b/ironic/tests/unit/objects/test_node.py @@ -22,6 +22,7 @@ from testtools import matchers from ironic.common import context from ironic.common import exception +from ironic.db.sqlalchemy.api import Connection as db_conn from ironic import objects from ironic.objects import node as node_objects from ironic.tests.unit.db import base as db_base @@ -479,15 +480,15 @@ class TestNodeObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): node_id) def test_release(self): - with mock.patch.object(self.dbapi, 'release_node', + with mock.patch.object(db_conn, 'release_node', autospec=True) as mock_release: node_id = self.fake_node['id'] fake_tag = 'fake-tag' objects.Node.release(self.context, fake_tag, node_id) - mock_release.assert_called_once_with(fake_tag, node_id) + mock_release.assert_called_once_with(mock.ANY, fake_tag, node_id) def test_release_node_not_found(self): - with mock.patch.object(self.dbapi, 'release_node', + with mock.patch.object(db_conn, 'release_node', autospec=True) as mock_release: node_id = 'non-existent' mock_release.side_effect = exception.NodeNotFound(node=node_id) @@ -496,25 +497,25 @@ class TestNodeObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): 'fake-tag', node_id) def test_touch_provisioning(self): - with mock.patch.object(self.dbapi, 'get_node_by_uuid', + with mock.patch.object(db_conn, 'get_node_by_uuid', autospec=True) as mock_get_node: mock_get_node.return_value = self.fake_node - with mock.patch.object(self.dbapi, 'touch_node_provisioning', + with mock.patch.object(db_conn, 'touch_node_provisioning', autospec=True) as mock_touch: node = objects.Node.get(self.context, self.fake_node['uuid']) node.touch_provisioning() - mock_touch.assert_called_once_with(node.id) + mock_touch.assert_called_once_with(mock.ANY, node.id) def test_create(self): node = obj_utils.get_test_node(self.ctxt, **self.fake_node) - with mock.patch.object(self.dbapi, 'create_node', + with mock.patch.object(db_conn, 'create_node', autospec=True) as mock_create_node: mock_create_node.return_value = db_utils.get_test_node() node.create() args, _kwargs = mock_create_node.call_args - self.assertEqual(objects.Node.VERSION, args[0]['version']) + self.assertEqual(objects.Node.VERSION, args[1]['version']) self.assertEqual(1, mock_create_node.call_count) def test_create_with_invalid_properties(self): diff --git a/ironic/tests/unit/objects/test_node_inventory.py b/ironic/tests/unit/objects/test_node_inventory.py index 9eee77a160..be16aa159a 100644 --- a/ironic/tests/unit/objects/test_node_inventory.py +++ b/ironic/tests/unit/objects/test_node_inventory.py @@ -12,6 +12,7 @@ from unittest import mock +from ironic.db.sqlalchemy.api import Connection as db_conn from ironic import objects from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils @@ -25,25 +26,26 @@ class TestNodeInventoryObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): self.fake_inventory = db_utils.get_test_inventory() def test_create(self): - with mock.patch.object(self.dbapi, 'create_node_inventory', + with mock.patch.object(db_conn, 'create_node_inventory', autospec=True) as mock_db_create: mock_db_create.return_value = self.fake_inventory new_inventory = objects.NodeInventory( self.context, **self.fake_inventory) new_inventory.create() - mock_db_create.assert_called_once_with(self.fake_inventory) + mock_db_create.assert_called_once_with(mock.ANY, + self.fake_inventory) def test_destroy(self): node_id = self.fake_inventory['node_id'] with mock.patch.object(self.dbapi, 'get_node_inventory_by_node_id', autospec=True) as mock_get: mock_get.return_value = self.fake_inventory - with mock.patch.object(self.dbapi, + with mock.patch.object(db_conn, 'destroy_node_inventory_by_node_id', autospec=True) as mock_db_destroy: inventory = objects.NodeInventory.get_by_node_id(self.context, node_id) inventory.destroy() - mock_db_destroy.assert_called_once_with(node_id) + mock_db_destroy.assert_called_once_with(mock.ANY, node_id) diff --git a/ironic/tests/unit/objects/test_port.py b/ironic/tests/unit/objects/test_port.py index 4c7280216f..2f43785764 100644 --- a/ironic/tests/unit/objects/test_port.py +++ b/ironic/tests/unit/objects/test_port.py @@ -21,6 +21,7 @@ from oslo_config import cfg from testtools import matchers from ironic.common import exception +from ironic.db.sqlalchemy.api import Connection as db_conn from ironic import objects from ironic.objects import base as obj_base from ironic.tests.unit.db import base as db_base @@ -86,9 +87,9 @@ class TestPortObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): def test_create(self): port = objects.Port(self.context, **self.fake_port) - with mock.patch.object(self.dbapi, 'create_port', + with mock.patch.object(db_conn, 'create_port', autospec=True) as mock_create_port: - with mock.patch.object(self.dbapi, 'get_port_by_id', + with mock.patch.object(db_conn, 'get_port_by_id', autospec=True) as mock_get_port: test_port = db_utils.get_test_port() mock_create_port.return_value = test_port @@ -97,7 +98,7 @@ class TestPortObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): port.create() args, _kwargs = mock_create_port.call_args - self.assertEqual(objects.Port.VERSION, args[0]['version']) + self.assertEqual(objects.Port.VERSION, args[1]['version']) def test_save(self): uuid = self.fake_port['uuid'] @@ -106,7 +107,7 @@ class TestPortObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): with mock.patch.object(self.dbapi, 'get_port_by_uuid', autospec=True) as mock_get_port: mock_get_port.return_value = self.fake_port - with mock.patch.object(self.dbapi, 'update_port', + with mock.patch.object(db_conn, 'update_port', autospec=True) as mock_update_port: mock_update_port.return_value = ( db_utils.get_test_port(address=address, @@ -117,8 +118,10 @@ class TestPortObject(db_base.DbTestCase, obj_utils.SchemasTestMixIn): mock_get_port.assert_called_once_with(uuid) mock_update_port.assert_called_once_with( - uuid, {'version': objects.Port.VERSION, - 'address': "b2:54:00:cf:2d:40"}) + mock.ANY, + uuid, + {'version': objects.Port.VERSION, + 'address': "b2:54:00:cf:2d:40"}) self.assertEqual(self.context, p._context) res_updated_at = (p.updated_at).replace(tzinfo=None) self.assertEqual(test_time, res_updated_at) diff --git a/releasenotes/notes/add-sqlite-db-retries-f493d5d7aa6db78b.yaml b/releasenotes/notes/add-sqlite-db-retries-f493d5d7aa6db78b.yaml new file mode 100644 index 0000000000..4c5f116cc9 --- /dev/null +++ b/releasenotes/notes/add-sqlite-db-retries-f493d5d7aa6db78b.yaml @@ -0,0 +1,25 @@ +--- +fixes: + - | + Adds a database write retry decorate for SQLite failures reporting + "database is locked". By default, through the new configuration + parameter ``[database]sqlite_max_wait_for_retry``, retries will + be performed on failing write operations for up to *30* seconds. + + This value can be tuned, but be warned it is an expotential + backoff retry model, and HTTP requests can give up if no + response is received in a reasonable time, thus *30* seconds + was deemed a reasonable default. + + The retry logic can be disabled using the + ``[database]sqlite_retries`` option, which defaults to + *True*. Users of other, mutli-threaded/concurrent-write database + platforms are not impacted by this change, as the retry logic + recognizes if another database is in use and bypasses the retry + logic in that case. A similar retry logic concept already exists + with other databases in the form of a "Database Deadlock" retry + where two writers conflict on the same row or table. The database + abstraction layer already handles such deadlock conditions. + The SQLite file based locking issue is unfortunately more common + with file based write locking as the entire file, in other words + the entire database, to perform the write operation.