Replace start time state query with single correlated scalar subquery (#133553)

This commit is contained in:
J. Nick Koston 2024-12-19 00:14:32 -10:00 committed by GitHub
parent 1c119518db
commit d35b34f142
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 42 deletions

View File

@ -28,7 +28,12 @@ from homeassistant.helpers.recorder import get_instance
import homeassistant.util.dt as dt_util
from ..const import LAST_REPORTED_SCHEMA_VERSION
from ..db_schema import SHARED_ATTR_OR_LEGACY_ATTRIBUTES, StateAttributes, States
from ..db_schema import (
SHARED_ATTR_OR_LEGACY_ATTRIBUTES,
StateAttributes,
States,
StatesMeta,
)
from ..filters import Filters
from ..models import (
LazyState,
@ -558,40 +563,38 @@ def _get_start_time_state_for_entities_stmt(
include_last_changed: bool,
) -> Select:
"""Baked query to get states for specific entities."""
# We got an include-list of entities, accelerate the query by filtering already
# in the inner and the outer query.
# This query is the result of significant research in
# https://github.com/home-assistant/core/issues/132865
# A reverse index scan with a limit 1 is the fastest way to get the
# last state change before a specific point in time for all supported
# databases. Since all databases support this query as a join
# condition we can use it as a subquery to get the last state change
# before a specific point in time for all entities.
stmt = (
_stmt_and_join_attributes_for_start_state(
no_attributes, include_last_changed, False
)
.select_from(StatesMeta)
.join(
(
most_recent_states_for_entities_by_date := (
select(
States.metadata_id.label("max_metadata_id"),
func.max(States.last_updated_ts).label("max_last_updated"),
)
.filter(
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < epoch_time)
& States.metadata_id.in_(metadata_ids)
)
.group_by(States.metadata_id)
.subquery()
)
),
States,
and_(
States.metadata_id
== most_recent_states_for_entities_by_date.c.max_metadata_id,
States.last_updated_ts
== most_recent_states_for_entities_by_date.c.max_last_updated,
== (
select(States.last_updated_ts)
.where(
(StatesMeta.metadata_id == States.metadata_id)
& (States.last_updated_ts < epoch_time)
& (States.last_updated_ts >= run_start_ts)
)
.order_by(States.last_updated_ts.desc())
.limit(1)
)
.scalar_subquery()
.correlate(StatesMeta),
States.metadata_id == StatesMeta.metadata_id,
),
)
.filter(
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < epoch_time)
& States.metadata_id.in_(metadata_ids)
)
.where(StatesMeta.metadata_id.in_(metadata_ids))
)
if no_attributes:
return stmt

View File

@ -63,6 +63,7 @@ from .db_schema import (
STATISTICS_TABLES,
Statistics,
StatisticsBase,
StatisticsMeta,
StatisticsRuns,
StatisticsShortTerm,
)
@ -2034,24 +2035,35 @@ def _generate_statistics_at_time_stmt(
types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
) -> StatementLambdaElement:
"""Create the statement for finding the statistics for a given time."""
# This query is the result of significant research in
# https://github.com/home-assistant/core/issues/132865
# A reverse index scan with a limit 1 is the fastest way to get the
# last start_time_ts before a specific point in time for all supported
# databases. Since all databases support this query as a join
# condition we can use it as a subquery to get the last start_time_ts
# before a specific point in time for all entities.
stmt = _generate_select_columns_for_types_stmt(table, types)
stmt += lambda q: q.join(
(
most_recent_statistic_ids := (
select(
func.max(table.start_ts).label("max_start_ts"),
table.metadata_id.label("max_metadata_id"),
stmt += (
lambda q: q.select_from(StatisticsMeta)
.join(
table,
and_(
table.start_ts
== (
select(table.start_ts)
.where(
(StatisticsMeta.id == table.metadata_id)
& (table.start_ts < start_time_ts)
)
.order_by(table.start_ts.desc())
.limit(1)
)
.filter(table.start_ts < start_time_ts)
.filter(table.metadata_id.in_(metadata_ids))
.group_by(table.metadata_id)
.subquery()
)
),
and_(
table.start_ts == most_recent_statistic_ids.c.max_start_ts,
table.metadata_id == most_recent_statistic_ids.c.max_metadata_id,
),
.scalar_subquery()
.correlate(StatisticsMeta),
table.metadata_id == StatisticsMeta.id,
),
)
.where(table.metadata_id.in_(metadata_ids))
)
return stmt