From bdef0ba6e56be06f50e3f7e4366aaa6f021fc40a Mon Sep 17 00:00:00 2001 From: Alex Hermann Date: Tue, 28 Nov 2023 23:23:49 +0100 Subject: [PATCH] Significantly improve performance for some cases of the history start time state query (#99450) * recorder: Apply filter in the outer query too Function _get_start_time_state_for_entities_stmt() produced a query which is dead-slow in my installation. On analysis, the outer query produced millions of rows which had to be joined to the subquery. The subquery has a filter which would eliminate almost all of the outer rows. To speed up the query, apply the same filter to the outer query, so way less rows have to be joined. This reduced the query time on my system from more than half an hour to mere milliseconds. * lint * merge filter --------- Co-authored-by: J. Nick Koston --- .../components/recorder/history/modern.py | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/homeassistant/components/recorder/history/modern.py b/homeassistant/components/recorder/history/modern.py index 68c357c0ed4..da58822e266 100644 --- a/homeassistant/components/recorder/history/modern.py +++ b/homeassistant/components/recorder/history/modern.py @@ -527,31 +527,37 @@ def _get_start_time_state_for_entities_stmt( ) -> Select: """Baked query to get states for specific entities.""" # We got an include-list of entities, accelerate the query by filtering already - # in the inner query. - stmt = _stmt_and_join_attributes_for_start_state( - no_attributes, include_last_changed - ).join( - ( - most_recent_states_for_entities_by_date := ( - select( - States.metadata_id.label("max_metadata_id"), - func.max(States.last_updated_ts).label("max_last_updated"), + # in the inner and the outer query. + stmt = ( + _stmt_and_join_attributes_for_start_state(no_attributes, include_last_changed) + .join( + ( + most_recent_states_for_entities_by_date := ( + select( + States.metadata_id.label("max_metadata_id"), + func.max(States.last_updated_ts).label("max_last_updated"), + ) + .filter( + (States.last_updated_ts >= run_start_ts) + & (States.last_updated_ts < epoch_time) + & States.metadata_id.in_(metadata_ids) + ) + .group_by(States.metadata_id) + .subquery() ) - .filter( - (States.last_updated_ts >= run_start_ts) - & (States.last_updated_ts < epoch_time) - ) - .filter(States.metadata_id.in_(metadata_ids)) - .group_by(States.metadata_id) - .subquery() - ) - ), - and_( - States.metadata_id - == most_recent_states_for_entities_by_date.c.max_metadata_id, - States.last_updated_ts - == most_recent_states_for_entities_by_date.c.max_last_updated, - ), + ), + and_( + States.metadata_id + == most_recent_states_for_entities_by_date.c.max_metadata_id, + States.last_updated_ts + == most_recent_states_for_entities_by_date.c.max_last_updated, + ), + ) + .filter( + (States.last_updated_ts >= run_start_ts) + & (States.last_updated_ts < epoch_time) + & States.metadata_id.in_(metadata_ids) + ) ) if no_attributes: return stmt