From 95fcdc568484f00f24c3cf5ed6e44a375a179d49 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 21 Apr 2023 22:28:07 -0500 Subject: [PATCH] Speed up sql queries where ORM rows are not needed (#91839) * Speed up logbook and history queries where ORM rows are not needed This avoids having sqlalchemy wrap Result in ChunkedIteratorResult which has additional overhead we do not need for these cases * more places * anything that uses _sorted_statistics_to_dict does not need orm rows either --- homeassistant/components/logbook/processor.py | 28 +++++-------------- .../components/recorder/history/modern.py | 8 ++++-- .../components/recorder/statistics.py | 12 ++++++-- .../recorder/table_managers/event_data.py | 2 +- .../recorder/table_managers/event_types.py | 2 +- .../table_managers/state_attributes.py | 2 +- .../recorder/table_managers/states_meta.py | 2 +- .../table_managers/statistics_meta.py | 1 + homeassistant/components/recorder/util.py | 6 +++- tests/components/recorder/test_util.py | 17 +++++++++-- 10 files changed, 45 insertions(+), 35 deletions(-) diff --git a/homeassistant/components/logbook/processor.py b/homeassistant/components/logbook/processor.py index 6df07684330..d1b8b0ab17c 100644 --- a/homeassistant/components/logbook/processor.py +++ b/homeassistant/components/logbook/processor.py @@ -19,7 +19,10 @@ from homeassistant.components.recorder.models import ( process_datetime_to_timestamp, process_timestamp_to_utc_isoformat, ) -from homeassistant.components.recorder.util import session_scope +from homeassistant.components.recorder.util import ( + execute_stmt_lambda_element, + session_scope, +) from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN from homeassistant.const import ( ATTR_DOMAIN, @@ -135,25 +138,6 @@ class EventProcessor: end_day: dt, ) -> list[dict[str, Any]]: """Get events for a period of time.""" - - def yield_rows(result: Result) -> Sequence[Row] | Result: - """Yield rows from the database.""" - # end_day - start_day intentionally checks .days and not .total_seconds() - # since we don't want to switch over to buffered if they go - # over one day by a few hours since the UI makes it so easy to do that. - if self.limited_select or (end_day - start_day).days <= 1: - return result.all() - # Only buffer rows to reduce memory pressure - # if we expect the result set is going to be very large. - # What is considered very large is going to differ - # based on the hardware Home Assistant is running on. - # - # sqlalchemy suggests that is at least 10k, but for - # even and RPi3 that number seems higher in testing - # so we don't switch over until we request > 1 day+ of data. - # - return result.yield_per(1024) - with session_scope(hass=self.hass, read_only=True) as session: metadata_ids: list[int] | None = None instance = get_instance(self.hass) @@ -178,7 +162,9 @@ class EventProcessor: self.filters, self.context_id, ) - return self.humanify(yield_rows(session.execute(stmt))) + return self.humanify( + execute_stmt_lambda_element(session, stmt, orm_rows=False) + ) def humanify( self, rows: Generator[EventAsRow, None, None] | Sequence[Row] | Result diff --git a/homeassistant/components/recorder/history/modern.py b/homeassistant/components/recorder/history/modern.py index 74f613c2cec..b5ac7c571ac 100644 --- a/homeassistant/components/recorder/history/modern.py +++ b/homeassistant/components/recorder/history/modern.py @@ -273,7 +273,7 @@ def get_significant_states_with_session( ], ) return _sorted_states_to_dict( - execute_stmt_lambda_element(session, stmt, None, end_time), + execute_stmt_lambda_element(session, stmt, None, end_time, orm_rows=False), start_time_ts if include_start_time_state else None, entity_ids, entity_id_to_metadata_id, @@ -426,7 +426,9 @@ def state_changes_during_period( return cast( MutableMapping[str, list[State]], _sorted_states_to_dict( - execute_stmt_lambda_element(session, stmt, None, end_time), + execute_stmt_lambda_element( + session, stmt, None, end_time, orm_rows=False + ), start_time_ts if include_start_time_state else None, entity_ids, entity_id_to_metadata_id, @@ -518,7 +520,7 @@ def get_last_state_changes( number_of_states, metadata_id ), ) - states = list(execute_stmt_lambda_element(session, stmt)) + states = list(execute_stmt_lambda_element(session, stmt, orm_rows=False)) return cast( MutableMapping[str, list[State]], _sorted_states_to_dict( diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index bd496680fa7..06dd20defbd 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -1555,7 +1555,9 @@ def _statistics_during_period_with_session( stmt = _generate_statistics_during_period_stmt( start_time, end_time, metadata_ids, table, types ) - stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) + stats = cast( + Sequence[Row], execute_stmt_lambda_element(session, stmt, orm_rows=False) + ) if not stats: return {} @@ -1664,7 +1666,9 @@ def _get_last_statistics( stmt = _get_last_statistics_stmt(metadata_id, number_of_stats) else: stmt = _get_last_statistics_short_term_stmt(metadata_id, number_of_stats) - stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) + stats = cast( + Sequence[Row], execute_stmt_lambda_element(session, stmt, orm_rows=False) + ) if not stats: return {} @@ -1755,7 +1759,9 @@ def get_latest_short_term_statistics( if statistic_id in metadata ] stmt = _latest_short_term_statistics_stmt(metadata_ids) - stats = cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) + stats = cast( + Sequence[Row], execute_stmt_lambda_element(session, stmt, orm_rows=False) + ) if not stats: return {} diff --git a/homeassistant/components/recorder/table_managers/event_data.py b/homeassistant/components/recorder/table_managers/event_data.py index 4c661e3dc29..4e08719e572 100644 --- a/homeassistant/components/recorder/table_managers/event_data.py +++ b/homeassistant/components/recorder/table_managers/event_data.py @@ -97,7 +97,7 @@ class EventDataManager(BaseLRUTableManager[EventData]): with session.no_autoflush: for hashs_chunk in chunked(hashes, SQLITE_MAX_BIND_VARS): for data_id, shared_data in execute_stmt_lambda_element( - session, get_shared_event_datas(hashs_chunk) + session, get_shared_event_datas(hashs_chunk), orm_rows=False ): results[shared_data] = self._id_map[shared_data] = cast( int, data_id diff --git a/homeassistant/components/recorder/table_managers/event_types.py b/homeassistant/components/recorder/table_managers/event_types.py index 5b77e9116c7..96ce77d3bff 100644 --- a/homeassistant/components/recorder/table_managers/event_types.py +++ b/homeassistant/components/recorder/table_managers/event_types.py @@ -69,7 +69,7 @@ class EventTypeManager(BaseLRUTableManager[EventTypes]): with session.no_autoflush: for missing_chunk in chunked(missing, SQLITE_MAX_BIND_VARS): for event_type_id, event_type in execute_stmt_lambda_element( - session, find_event_type_ids(missing_chunk) + session, find_event_type_ids(missing_chunk), orm_rows=False ): results[event_type] = self._id_map[event_type] = cast( int, event_type_id diff --git a/homeassistant/components/recorder/table_managers/state_attributes.py b/homeassistant/components/recorder/table_managers/state_attributes.py index 51c626bd366..442277be96e 100644 --- a/homeassistant/components/recorder/table_managers/state_attributes.py +++ b/homeassistant/components/recorder/table_managers/state_attributes.py @@ -114,7 +114,7 @@ class StateAttributesManager(BaseLRUTableManager[StateAttributes]): with session.no_autoflush: for hashs_chunk in chunked(hashes, SQLITE_MAX_BIND_VARS): for attributes_id, shared_attrs in execute_stmt_lambda_element( - session, get_shared_attributes(hashs_chunk) + session, get_shared_attributes(hashs_chunk), orm_rows=False ): results[shared_attrs] = self._id_map[shared_attrs] = cast( int, attributes_id diff --git a/homeassistant/components/recorder/table_managers/states_meta.py b/homeassistant/components/recorder/table_managers/states_meta.py index 639e0acaa3a..bc4a8cfd2d9 100644 --- a/homeassistant/components/recorder/table_managers/states_meta.py +++ b/homeassistant/components/recorder/table_managers/states_meta.py @@ -67,7 +67,7 @@ class StatesMetaManager(BaseLRUTableManager[StatesMeta]): cast( Sequence[tuple[int, str]], execute_stmt_lambda_element( - session, find_all_states_metadata_ids() + session, find_all_states_metadata_ids(), orm_rows=False ), ) ) diff --git a/homeassistant/components/recorder/table_managers/statistics_meta.py b/homeassistant/components/recorder/table_managers/statistics_meta.py index ba47b3600d6..75af59d7c7a 100644 --- a/homeassistant/components/recorder/table_managers/statistics_meta.py +++ b/homeassistant/components/recorder/table_managers/statistics_meta.py @@ -109,6 +109,7 @@ class StatisticsMetaManager: _generate_get_metadata_stmt( statistic_ids, statistic_type, statistic_source ), + orm_rows=False, ): statistics_meta = cast(StatisticsMeta, row) id_meta = _statistics_meta_to_id_statistics_metadata(statistics_meta) diff --git a/homeassistant/components/recorder/util.py b/homeassistant/components/recorder/util.py index 5e0fb8b59d6..1c50fd0a77c 100644 --- a/homeassistant/components/recorder/util.py +++ b/homeassistant/components/recorder/util.py @@ -199,6 +199,7 @@ def execute_stmt_lambda_element( start_time: datetime | None = None, end_time: datetime | None = None, yield_per: int = DEFAULT_YIELD_STATES_ROWS, + orm_rows: bool = True, ) -> Sequence[Row] | Result: """Execute a StatementLambdaElement. @@ -211,10 +212,13 @@ def execute_stmt_lambda_element( specific entities) since they are usually faster with .all(). """ - executed = session.execute(stmt) use_all = not start_time or ((end_time or dt_util.utcnow()) - start_time).days <= 1 for tryno in range(RETRIES): try: + if orm_rows: + executed = session.execute(stmt) + else: + executed = session.connection().execute(stmt) if use_all: return executed.all() return executed.yield_per(yield_per) diff --git a/tests/components/recorder/test_util.py b/tests/components/recorder/test_util.py index f71ca773daa..ecfd188db8e 100644 --- a/tests/components/recorder/test_util.py +++ b/tests/components/recorder/test_util.py @@ -893,15 +893,16 @@ def test_execute_stmt_lambda_element( now = dt_util.utcnow() tomorrow = now + timedelta(days=1) one_week_from_now = now + timedelta(days=7) + all_calls = 0 class MockExecutor: def __init__(self, stmt): assert isinstance(stmt, StatementLambdaElement) - self.calls = 0 def all(self): - self.calls += 1 - if self.calls == 2: + nonlocal all_calls + all_calls += 1 + if all_calls == 2: return ["mock_row"] raise SQLAlchemyError @@ -926,6 +927,16 @@ def test_execute_stmt_lambda_element( assert row.state == new_state.state assert row.metadata_id == metadata_id + # Time window >= 2 days, we should not get a ChunkedIteratorResult + # because orm_rows=False + rows = util.execute_stmt_lambda_element( + session, stmt, now, one_week_from_now, orm_rows=False + ) + assert not isinstance(rows, ChunkedIteratorResult) + row = next(rows) + assert row.state == new_state.state + assert row.metadata_id == metadata_id + # Time window < 2 days, we get a list rows = util.execute_stmt_lambda_element(session, stmt, now, tomorrow) assert isinstance(rows, list)