diff --git a/homeassistant/components/recorder/history/modern.py b/homeassistant/components/recorder/history/modern.py index 9159bbc6181..e9af4a673c3 100644 --- a/homeassistant/components/recorder/history/modern.py +++ b/homeassistant/components/recorder/history/modern.py @@ -28,7 +28,12 @@ from homeassistant.helpers.recorder import get_instance import homeassistant.util.dt as dt_util from ..const import LAST_REPORTED_SCHEMA_VERSION -from ..db_schema import SHARED_ATTR_OR_LEGACY_ATTRIBUTES, StateAttributes, States +from ..db_schema import ( + SHARED_ATTR_OR_LEGACY_ATTRIBUTES, + StateAttributes, + States, + StatesMeta, +) from ..filters import Filters from ..models import ( LazyState, @@ -558,40 +563,38 @@ def _get_start_time_state_for_entities_stmt( include_last_changed: bool, ) -> Select: """Baked query to get states for specific entities.""" - # We got an include-list of entities, accelerate the query by filtering already - # in the inner and the outer query. + # This query is the result of significant research in + # https://github.com/home-assistant/core/issues/132865 + # A reverse index scan with a limit 1 is the fastest way to get the + # last state change before a specific point in time for all supported + # databases. Since all databases support this query as a join + # condition we can use it as a subquery to get the last state change + # before a specific point in time for all entities. stmt = ( _stmt_and_join_attributes_for_start_state( no_attributes, include_last_changed, False ) + .select_from(StatesMeta) .join( - ( - most_recent_states_for_entities_by_date := ( - select( - States.metadata_id.label("max_metadata_id"), - func.max(States.last_updated_ts).label("max_last_updated"), - ) - .filter( - (States.last_updated_ts >= run_start_ts) - & (States.last_updated_ts < epoch_time) - & States.metadata_id.in_(metadata_ids) - ) - .group_by(States.metadata_id) - .subquery() - ) - ), + States, and_( - States.metadata_id - == most_recent_states_for_entities_by_date.c.max_metadata_id, States.last_updated_ts - == most_recent_states_for_entities_by_date.c.max_last_updated, + == ( + select(States.last_updated_ts) + .where( + (StatesMeta.metadata_id == States.metadata_id) + & (States.last_updated_ts < epoch_time) + & (States.last_updated_ts >= run_start_ts) + ) + .order_by(States.last_updated_ts.desc()) + .limit(1) + ) + .scalar_subquery() + .correlate(StatesMeta), + States.metadata_id == StatesMeta.metadata_id, ), ) - .filter( - (States.last_updated_ts >= run_start_ts) - & (States.last_updated_ts < epoch_time) - & States.metadata_id.in_(metadata_ids) - ) + .where(StatesMeta.metadata_id.in_(metadata_ids)) ) if no_attributes: return stmt diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 3f1d5b981e3..c6783a5cbc2 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -63,6 +63,7 @@ from .db_schema import ( STATISTICS_TABLES, Statistics, StatisticsBase, + StatisticsMeta, StatisticsRuns, StatisticsShortTerm, ) @@ -2034,24 +2035,35 @@ def _generate_statistics_at_time_stmt( types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]], ) -> StatementLambdaElement: """Create the statement for finding the statistics for a given time.""" + # This query is the result of significant research in + # https://github.com/home-assistant/core/issues/132865 + # A reverse index scan with a limit 1 is the fastest way to get the + # last start_time_ts before a specific point in time for all supported + # databases. Since all databases support this query as a join + # condition we can use it as a subquery to get the last start_time_ts + # before a specific point in time for all entities. stmt = _generate_select_columns_for_types_stmt(table, types) - stmt += lambda q: q.join( - ( - most_recent_statistic_ids := ( - select( - func.max(table.start_ts).label("max_start_ts"), - table.metadata_id.label("max_metadata_id"), + stmt += ( + lambda q: q.select_from(StatisticsMeta) + .join( + table, + and_( + table.start_ts + == ( + select(table.start_ts) + .where( + (StatisticsMeta.id == table.metadata_id) + & (table.start_ts < start_time_ts) + ) + .order_by(table.start_ts.desc()) + .limit(1) ) - .filter(table.start_ts < start_time_ts) - .filter(table.metadata_id.in_(metadata_ids)) - .group_by(table.metadata_id) - .subquery() - ) - ), - and_( - table.start_ts == most_recent_statistic_ids.c.max_start_ts, - table.metadata_id == most_recent_statistic_ids.c.max_metadata_id, - ), + .scalar_subquery() + .correlate(StatisticsMeta), + table.metadata_id == StatisticsMeta.id, + ), + ) + .where(table.metadata_id.in_(metadata_ids)) ) return stmt