Limit number of records deleted by aodh-expirer

This patch introduces the same functionality as is implemented in panko
recently[1], and allows us to limit the number of alarm histories
deleted in a single iteration, to avoid the query takes a long time and
if there are many expired records.

[1] Icf83ffe089301b3782273923f18efd4d209131c2

Change-Id: Ie1d1bbb911cf56a56f712291f61ffaabfa97422f
This commit is contained in:
Takashi Kajinami 2020-10-05 08:47:40 +09:00
parent ce0954c8e7
commit 02179e0c32
8 changed files with 48 additions and 21 deletions

View File

@ -33,9 +33,22 @@ def expirer():
if conf.database.alarm_history_time_to_live > 0:
LOG.debug("Clearing expired alarm history data")
storage_conn = storage.get_connection_from_config(conf)
storage_conn.clear_expired_alarm_history_data(
conf.database.alarm_history_time_to_live)
conn = storage.get_connection_from_config(conf)
max_count = conf.database.alarm_histories_delete_batch_size
try:
if max_count > 0:
conn.clear_expired_alarm_history_data(
conf.database.alarm_history_time_to_live,
max_count)
else:
deleted = max_count = 100
while deleted and deleted > 0:
deleted = conn.clear_expired_alarm_history_data(
conf.database.alarm_history_time_to_live,
max_count)
except TypeError:
LOG.warning("Storage driver does not support "
"'alarm_histories_delete_batch_size' config option.")
else:
LOG.info("Nothing to clean, database alarm history time to live "
"is disabled")

View File

@ -34,6 +34,11 @@ OPTS = [
default=-1,
help=("Number of seconds that alarm histories are kept "
"in the database for (<= 0 means forever).")),
cfg.IntOpt('alarm_histories_delete_batch_size',
default=0,
min=0,
help=("Number of alarm histories to be deleted in one "
"iteration from the database (0 means all).")),
]

View File

@ -191,13 +191,13 @@ class Connection(object):
return cls.STORAGE_CAPABILITIES
@staticmethod
def clear_expired_alarm_history_data(alarm_history_ttl):
def clear_expired_alarm_history_data(ttl, max_count=None):
"""Clear expired alarm history data from the backend storage system.
Clearing occurs according to the time-to-live.
:param alarm_history_ttl: Number of seconds to keep alarm history
records for.
:param ttl: Number of seconds to keep alarm history records for.
:param max_count: Number of records to delete.
"""
raise aodh.NotImplementedError('Clearing alarm history '
'not implemented')

View File

@ -56,13 +56,12 @@ class Connection(base.Connection):
"""Delete an alarm and its history data."""
@staticmethod
def clear_expired_alarm_history_data(alarm_history_ttl):
def clear_expired_alarm_history_data(ttl, max_count=None):
"""Clear expired alarm history data from the backend storage system.
Clearing occurs according to the time-to-live.
:param alarm_history_ttl: Number of seconds to keep alarm history
records for.
:param ttl: Number of seconds to keep alarm history records for.
:param max_count: Number of records to delete.
"""
LOG.info('Dropping alarm history data with TTL %d',
alarm_history_ttl)
LOG.info('Dropping alarm history %d data with TTL %d', max_count, ttl)

View File

@ -398,21 +398,23 @@ class Connection(base.Connection):
alarm_change_row.update(alarm_change)
session.add(alarm_change_row)
def clear_expired_alarm_history_data(self, alarm_history_ttl):
def clear_expired_alarm_history_data(self, ttl, max_count=100):
"""Clear expired alarm history data from the backend storage system.
Clearing occurs according to the time-to-live.
:param alarm_history_ttl: Number of seconds to keep alarm history
records for.
:param ttl: Number of seconds to keep alarm history records for.
:param max_count: Number of records to delete.
"""
session = self._engine_facade.get_session()
with session.begin():
valid_start = (timeutils.utcnow() -
datetime.timedelta(seconds=alarm_history_ttl))
deleted_rows = (session.query(models.AlarmChange)
.filter(models.AlarmChange.timestamp < valid_start)
.delete())
end = timeutils.utcnow() - datetime.timedelta(seconds=ttl)
alarm_history_q = (session.query(models.AlarmChange.event_id)
.filter(models.AlarmChange.timestamp < end))
event_ids = [i[0] for i in alarm_history_q.limit(max_count)]
deleted_rows = session.query(models.AlarmChange).filter(
models.AlarmChange.event_id.in_(event_ids)
).delete(synchronize_session="fetch")
LOG.info("%d alarm histories are removed from database",
deleted_rows)

View File

@ -277,7 +277,7 @@ class AlarmHistoryTest(AlarmTestBase):
def _clear_alarm_history(self, utcnow, ttl, count):
self.mock_utcnow.return_value = utcnow
self.alarm_conn.clear_expired_alarm_history_data(ttl)
self.alarm_conn.clear_expired_alarm_history_data(ttl, 100)
history = list(self.alarm_conn.query_alarm_history())
self.assertEqual(count, len(history))

View File

@ -55,6 +55,7 @@ class BinTestCase(base.BaseTestCase):
def test_run_expirer_ttl_enabled(self):
content = ("[database]\n"
"alarm_history_time_to_live=1\n"
"alarm_histories_delete_batch_size=10\n"
"connection=log://localhost\n")
content = content.encode('utf-8')
self.tempfile = fileutils.write_to_tempfile(content=content,
@ -67,7 +68,7 @@ class BinTestCase(base.BaseTestCase):
stderr=subprocess.PIPE)
out, __ = subp.communicate()
self.assertEqual(0, subp.poll())
msg = "Dropping alarm history data with TTL 1"
msg = "Dropping alarm history 10 data with TTL 1"
msg = msg.encode('utf-8')
self.assertIn(msg, out)

View File

@ -0,0 +1,7 @@
---
features:
- |
A new ``alarm_histories_delete_bacth_size`` option has been added to limit
a number of alarm histories deleted from the database by aodh-expirer in
a single iteration. This parameter is useful when there are a lot of alarm
histories in the database.