mirror of
https://github.com/home-assistant/core.git
synced 2025-07-28 07:37:34 +00:00
Chunk purging attributes and data ids for old SQLite versions (#104296)
This commit is contained in:
parent
b41b56e54c
commit
9ed745638d
@ -41,7 +41,7 @@ from .queries import (
|
|||||||
find_statistics_runs_to_purge,
|
find_statistics_runs_to_purge,
|
||||||
)
|
)
|
||||||
from .repack import repack_database
|
from .repack import repack_database
|
||||||
from .util import chunked, retryable_database_job, session_scope
|
from .util import chunked_or_all, retryable_database_job, session_scope
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from . import Recorder
|
from . import Recorder
|
||||||
@ -283,12 +283,16 @@ def _select_event_data_ids_to_purge(
|
|||||||
|
|
||||||
|
|
||||||
def _select_unused_attributes_ids(
|
def _select_unused_attributes_ids(
|
||||||
session: Session, attributes_ids: set[int], database_engine: DatabaseEngine
|
instance: Recorder,
|
||||||
|
session: Session,
|
||||||
|
attributes_ids: set[int],
|
||||||
|
database_engine: DatabaseEngine,
|
||||||
) -> set[int]:
|
) -> set[int]:
|
||||||
"""Return a set of attributes ids that are not used by any states in the db."""
|
"""Return a set of attributes ids that are not used by any states in the db."""
|
||||||
if not attributes_ids:
|
if not attributes_ids:
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
|
seen_ids: set[int] = set()
|
||||||
if not database_engine.optimizer.slow_range_in_select:
|
if not database_engine.optimizer.slow_range_in_select:
|
||||||
#
|
#
|
||||||
# SQLite has a superior query optimizer for the distinct query below as it uses
|
# SQLite has a superior query optimizer for the distinct query below as it uses
|
||||||
@ -303,12 +307,17 @@ def _select_unused_attributes_ids(
|
|||||||
# (136723);
|
# (136723);
|
||||||
# ...Using index
|
# ...Using index
|
||||||
#
|
#
|
||||||
seen_ids = {
|
for attributes_ids_chunk in chunked_or_all(
|
||||||
state[0]
|
attributes_ids, instance.max_bind_vars
|
||||||
for state in session.execute(
|
):
|
||||||
attributes_ids_exist_in_states_with_fast_in_distinct(attributes_ids)
|
seen_ids.update(
|
||||||
).all()
|
state[0]
|
||||||
}
|
for state in session.execute(
|
||||||
|
attributes_ids_exist_in_states_with_fast_in_distinct(
|
||||||
|
attributes_ids_chunk
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
#
|
#
|
||||||
# This branch is for DBMS that cannot optimize the distinct query well and has
|
# This branch is for DBMS that cannot optimize the distinct query well and has
|
||||||
@ -334,7 +343,6 @@ def _select_unused_attributes_ids(
|
|||||||
# We now break the query into groups of 100 and use a lambda_stmt to ensure
|
# We now break the query into groups of 100 and use a lambda_stmt to ensure
|
||||||
# that the query is only cached once.
|
# that the query is only cached once.
|
||||||
#
|
#
|
||||||
seen_ids = set()
|
|
||||||
groups = [iter(attributes_ids)] * 100
|
groups = [iter(attributes_ids)] * 100
|
||||||
for attr_ids in zip_longest(*groups, fillvalue=None):
|
for attr_ids in zip_longest(*groups, fillvalue=None):
|
||||||
seen_ids |= {
|
seen_ids |= {
|
||||||
@ -361,29 +369,33 @@ def _purge_unused_attributes_ids(
|
|||||||
database_engine = instance.database_engine
|
database_engine = instance.database_engine
|
||||||
assert database_engine is not None
|
assert database_engine is not None
|
||||||
if unused_attribute_ids_set := _select_unused_attributes_ids(
|
if unused_attribute_ids_set := _select_unused_attributes_ids(
|
||||||
session, attributes_ids_batch, database_engine
|
instance, session, attributes_ids_batch, database_engine
|
||||||
):
|
):
|
||||||
_purge_batch_attributes_ids(instance, session, unused_attribute_ids_set)
|
_purge_batch_attributes_ids(instance, session, unused_attribute_ids_set)
|
||||||
|
|
||||||
|
|
||||||
def _select_unused_event_data_ids(
|
def _select_unused_event_data_ids(
|
||||||
session: Session, data_ids: set[int], database_engine: DatabaseEngine
|
instance: Recorder,
|
||||||
|
session: Session,
|
||||||
|
data_ids: set[int],
|
||||||
|
database_engine: DatabaseEngine,
|
||||||
) -> set[int]:
|
) -> set[int]:
|
||||||
"""Return a set of event data ids that are not used by any events in the db."""
|
"""Return a set of event data ids that are not used by any events in the db."""
|
||||||
if not data_ids:
|
if not data_ids:
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
|
seen_ids: set[int] = set()
|
||||||
# See _select_unused_attributes_ids for why this function
|
# See _select_unused_attributes_ids for why this function
|
||||||
# branches for non-sqlite databases.
|
# branches for non-sqlite databases.
|
||||||
if not database_engine.optimizer.slow_range_in_select:
|
if not database_engine.optimizer.slow_range_in_select:
|
||||||
seen_ids = {
|
for data_ids_chunk in chunked_or_all(data_ids, instance.max_bind_vars):
|
||||||
state[0]
|
seen_ids.update(
|
||||||
for state in session.execute(
|
state[0]
|
||||||
data_ids_exist_in_events_with_fast_in_distinct(data_ids)
|
for state in session.execute(
|
||||||
).all()
|
data_ids_exist_in_events_with_fast_in_distinct(data_ids_chunk)
|
||||||
}
|
).all()
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
seen_ids = set()
|
|
||||||
groups = [iter(data_ids)] * 100
|
groups = [iter(data_ids)] * 100
|
||||||
for data_ids_group in zip_longest(*groups, fillvalue=None):
|
for data_ids_group in zip_longest(*groups, fillvalue=None):
|
||||||
seen_ids |= {
|
seen_ids |= {
|
||||||
@ -404,7 +416,7 @@ def _purge_unused_data_ids(
|
|||||||
database_engine = instance.database_engine
|
database_engine = instance.database_engine
|
||||||
assert database_engine is not None
|
assert database_engine is not None
|
||||||
if unused_data_ids_set := _select_unused_event_data_ids(
|
if unused_data_ids_set := _select_unused_event_data_ids(
|
||||||
session, data_ids_batch, database_engine
|
instance, session, data_ids_batch, database_engine
|
||||||
):
|
):
|
||||||
_purge_batch_data_ids(instance, session, unused_data_ids_set)
|
_purge_batch_data_ids(instance, session, unused_data_ids_set)
|
||||||
|
|
||||||
@ -519,7 +531,7 @@ def _purge_batch_attributes_ids(
|
|||||||
instance: Recorder, session: Session, attributes_ids: set[int]
|
instance: Recorder, session: Session, attributes_ids: set[int]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Delete old attributes ids in batches of max_bind_vars."""
|
"""Delete old attributes ids in batches of max_bind_vars."""
|
||||||
for attributes_ids_chunk in chunked(attributes_ids, instance.max_bind_vars):
|
for attributes_ids_chunk in chunked_or_all(attributes_ids, instance.max_bind_vars):
|
||||||
deleted_rows = session.execute(
|
deleted_rows = session.execute(
|
||||||
delete_states_attributes_rows(attributes_ids_chunk)
|
delete_states_attributes_rows(attributes_ids_chunk)
|
||||||
)
|
)
|
||||||
@ -533,7 +545,7 @@ def _purge_batch_data_ids(
|
|||||||
instance: Recorder, session: Session, data_ids: set[int]
|
instance: Recorder, session: Session, data_ids: set[int]
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Delete old event data ids in batches of max_bind_vars."""
|
"""Delete old event data ids in batches of max_bind_vars."""
|
||||||
for data_ids_chunk in chunked(data_ids, instance.max_bind_vars):
|
for data_ids_chunk in chunked_or_all(data_ids, instance.max_bind_vars):
|
||||||
deleted_rows = session.execute(delete_event_data_rows(data_ids_chunk))
|
deleted_rows = session.execute(delete_event_data_rows(data_ids_chunk))
|
||||||
_LOGGER.debug("Deleted %s data events", deleted_rows)
|
_LOGGER.debug("Deleted %s data events", deleted_rows)
|
||||||
|
|
||||||
@ -694,7 +706,10 @@ def _purge_filtered_states(
|
|||||||
# we will need to purge them here.
|
# we will need to purge them here.
|
||||||
_purge_event_ids(session, filtered_event_ids)
|
_purge_event_ids(session, filtered_event_ids)
|
||||||
unused_attribute_ids_set = _select_unused_attributes_ids(
|
unused_attribute_ids_set = _select_unused_attributes_ids(
|
||||||
session, {id_ for id_ in attributes_ids if id_ is not None}, database_engine
|
instance,
|
||||||
|
session,
|
||||||
|
{id_ for id_ in attributes_ids if id_ is not None},
|
||||||
|
database_engine,
|
||||||
)
|
)
|
||||||
_purge_batch_attributes_ids(instance, session, unused_attribute_ids_set)
|
_purge_batch_attributes_ids(instance, session, unused_attribute_ids_set)
|
||||||
return False
|
return False
|
||||||
@ -741,7 +756,7 @@ def _purge_filtered_events(
|
|||||||
_purge_state_ids(instance, session, state_ids)
|
_purge_state_ids(instance, session, state_ids)
|
||||||
_purge_event_ids(session, event_ids_set)
|
_purge_event_ids(session, event_ids_set)
|
||||||
if unused_data_ids_set := _select_unused_event_data_ids(
|
if unused_data_ids_set := _select_unused_event_data_ids(
|
||||||
session, set(data_ids), database_engine
|
instance, session, set(data_ids), database_engine
|
||||||
):
|
):
|
||||||
_purge_batch_data_ids(instance, session, unused_data_ids_set)
|
_purge_batch_data_ids(instance, session, unused_data_ids_set)
|
||||||
return False
|
return False
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
"""SQLAlchemy util functions."""
|
"""SQLAlchemy util functions."""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from collections.abc import Callable, Generator, Iterable, Sequence
|
from collections.abc import Callable, Collection, Generator, Iterable, Sequence
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
import functools
|
import functools
|
||||||
@ -857,6 +857,20 @@ def chunked(iterable: Iterable, chunked_num: int) -> Iterable[Any]:
|
|||||||
return iter(partial(take, chunked_num, iter(iterable)), [])
|
return iter(partial(take, chunked_num, iter(iterable)), [])
|
||||||
|
|
||||||
|
|
||||||
|
def chunked_or_all(iterable: Collection[Any], chunked_num: int) -> Iterable[Any]:
|
||||||
|
"""Break *collection* into iterables of length *n*.
|
||||||
|
|
||||||
|
Returns the collection if its length is less than *n*.
|
||||||
|
|
||||||
|
Unlike chunked, this function requires a collection so it can
|
||||||
|
determine the length of the collection and return the collection
|
||||||
|
if it is less than *n*.
|
||||||
|
"""
|
||||||
|
if len(iterable) <= chunked_num:
|
||||||
|
return (iterable,)
|
||||||
|
return chunked(iterable, chunked_num)
|
||||||
|
|
||||||
|
|
||||||
def get_index_by_name(session: Session, table_name: str, index_name: str) -> str | None:
|
def get_index_by_name(session: Session, table_name: str, index_name: str) -> str | None:
|
||||||
"""Get an index by name."""
|
"""Get an index by name."""
|
||||||
connection = session.connection()
|
connection = session.connection()
|
||||||
|
@ -25,6 +25,7 @@ from homeassistant.components.recorder.models import (
|
|||||||
process_timestamp,
|
process_timestamp,
|
||||||
)
|
)
|
||||||
from homeassistant.components.recorder.util import (
|
from homeassistant.components.recorder.util import (
|
||||||
|
chunked_or_all,
|
||||||
end_incomplete_runs,
|
end_incomplete_runs,
|
||||||
is_second_sunday,
|
is_second_sunday,
|
||||||
resolve_period,
|
resolve_period,
|
||||||
@ -1023,3 +1024,24 @@ async def test_resolve_period(hass: HomeAssistant) -> None:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
) == (now - timedelta(hours=1, minutes=25), now - timedelta(minutes=25))
|
) == (now - timedelta(hours=1, minutes=25), now - timedelta(minutes=25))
|
||||||
|
|
||||||
|
|
||||||
|
def test_chunked_or_all():
|
||||||
|
"""Test chunked_or_all can iterate chunk sizes larger than the passed in collection."""
|
||||||
|
all = []
|
||||||
|
incoming = (1, 2, 3, 4)
|
||||||
|
for chunk in chunked_or_all(incoming, 2):
|
||||||
|
assert len(chunk) == 2
|
||||||
|
all.extend(chunk)
|
||||||
|
assert all == [1, 2, 3, 4]
|
||||||
|
|
||||||
|
all = []
|
||||||
|
incoming = (1, 2, 3, 4)
|
||||||
|
for chunk in chunked_or_all(incoming, 5):
|
||||||
|
assert len(chunk) == 4
|
||||||
|
# Verify the chunk is the same object as the incoming
|
||||||
|
# collection since we want to avoid copying the collection
|
||||||
|
# if we don't need to
|
||||||
|
assert chunk is incoming
|
||||||
|
all.extend(chunk)
|
||||||
|
assert all == [1, 2, 3, 4]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user