Replace start time state query with single correlated scalar subquery (#133553)

This commit is contained in:
J. Nick Koston 2024-12-19 00:14:32 -10:00 committed by GitHub
parent 1c119518db
commit d35b34f142
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 57 additions and 42 deletions

View File

@ -28,7 +28,12 @@ from homeassistant.helpers.recorder import get_instance
import homeassistant.util.dt as dt_util import homeassistant.util.dt as dt_util
from ..const import LAST_REPORTED_SCHEMA_VERSION from ..const import LAST_REPORTED_SCHEMA_VERSION
from ..db_schema import SHARED_ATTR_OR_LEGACY_ATTRIBUTES, StateAttributes, States from ..db_schema import (
SHARED_ATTR_OR_LEGACY_ATTRIBUTES,
StateAttributes,
States,
StatesMeta,
)
from ..filters import Filters from ..filters import Filters
from ..models import ( from ..models import (
LazyState, LazyState,
@ -558,40 +563,38 @@ def _get_start_time_state_for_entities_stmt(
include_last_changed: bool, include_last_changed: bool,
) -> Select: ) -> Select:
"""Baked query to get states for specific entities.""" """Baked query to get states for specific entities."""
# We got an include-list of entities, accelerate the query by filtering already # This query is the result of significant research in
# in the inner and the outer query. # https://github.com/home-assistant/core/issues/132865
# A reverse index scan with a limit 1 is the fastest way to get the
# last state change before a specific point in time for all supported
# databases. Since all databases support this query as a join
# condition we can use it as a subquery to get the last state change
# before a specific point in time for all entities.
stmt = ( stmt = (
_stmt_and_join_attributes_for_start_state( _stmt_and_join_attributes_for_start_state(
no_attributes, include_last_changed, False no_attributes, include_last_changed, False
) )
.select_from(StatesMeta)
.join( .join(
( States,
most_recent_states_for_entities_by_date := (
select(
States.metadata_id.label("max_metadata_id"),
func.max(States.last_updated_ts).label("max_last_updated"),
)
.filter(
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < epoch_time)
& States.metadata_id.in_(metadata_ids)
)
.group_by(States.metadata_id)
.subquery()
)
),
and_( and_(
States.metadata_id
== most_recent_states_for_entities_by_date.c.max_metadata_id,
States.last_updated_ts States.last_updated_ts
== most_recent_states_for_entities_by_date.c.max_last_updated, == (
select(States.last_updated_ts)
.where(
(StatesMeta.metadata_id == States.metadata_id)
& (States.last_updated_ts < epoch_time)
& (States.last_updated_ts >= run_start_ts)
)
.order_by(States.last_updated_ts.desc())
.limit(1)
)
.scalar_subquery()
.correlate(StatesMeta),
States.metadata_id == StatesMeta.metadata_id,
), ),
) )
.filter( .where(StatesMeta.metadata_id.in_(metadata_ids))
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < epoch_time)
& States.metadata_id.in_(metadata_ids)
)
) )
if no_attributes: if no_attributes:
return stmt return stmt

View File

@ -63,6 +63,7 @@ from .db_schema import (
STATISTICS_TABLES, STATISTICS_TABLES,
Statistics, Statistics,
StatisticsBase, StatisticsBase,
StatisticsMeta,
StatisticsRuns, StatisticsRuns,
StatisticsShortTerm, StatisticsShortTerm,
) )
@ -2034,24 +2035,35 @@ def _generate_statistics_at_time_stmt(
types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]], types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
) -> StatementLambdaElement: ) -> StatementLambdaElement:
"""Create the statement for finding the statistics for a given time.""" """Create the statement for finding the statistics for a given time."""
# This query is the result of significant research in
# https://github.com/home-assistant/core/issues/132865
# A reverse index scan with a limit 1 is the fastest way to get the
# last start_time_ts before a specific point in time for all supported
# databases. Since all databases support this query as a join
# condition we can use it as a subquery to get the last start_time_ts
# before a specific point in time for all entities.
stmt = _generate_select_columns_for_types_stmt(table, types) stmt = _generate_select_columns_for_types_stmt(table, types)
stmt += lambda q: q.join( stmt += (
( lambda q: q.select_from(StatisticsMeta)
most_recent_statistic_ids := ( .join(
select( table,
func.max(table.start_ts).label("max_start_ts"), and_(
table.metadata_id.label("max_metadata_id"), table.start_ts
== (
select(table.start_ts)
.where(
(StatisticsMeta.id == table.metadata_id)
& (table.start_ts < start_time_ts)
)
.order_by(table.start_ts.desc())
.limit(1)
) )
.filter(table.start_ts < start_time_ts) .scalar_subquery()
.filter(table.metadata_id.in_(metadata_ids)) .correlate(StatisticsMeta),
.group_by(table.metadata_id) table.metadata_id == StatisticsMeta.id,
.subquery() ),
) )
), .where(table.metadata_id.in_(metadata_ids))
and_(
table.start_ts == most_recent_statistic_ids.c.max_start_ts,
table.metadata_id == most_recent_statistic_ids.c.max_metadata_id,
),
) )
return stmt return stmt