From 81e8d2ab862b78b7936a0ba8b12a3643e76ccd94 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 11 May 2022 17:27:02 -0500 Subject: [PATCH] Significantly improve logbook performance when selecting entities (#71657) --- homeassistant/components/logbook/__init__.py | 277 ++------------ homeassistant/components/logbook/queries.py | 377 +++++++++++++++++++ tests/components/logbook/test_init.py | 96 ++++- 3 files changed, 480 insertions(+), 270 deletions(-) create mode 100644 homeassistant/components/logbook/queries.py diff --git a/homeassistant/components/logbook/__init__.py b/homeassistant/components/logbook/__init__.py index a877cba4ff2..9cf3ad99e57 100644 --- a/homeassistant/components/logbook/__init__.py +++ b/homeassistant/components/logbook/__init__.py @@ -1,23 +1,18 @@ """Event parser and human readable log generator.""" from __future__ import annotations -from collections.abc import Callable, Generator, Iterable +from collections.abc import Callable, Generator from contextlib import suppress from datetime import datetime as dt, timedelta from http import HTTPStatus import json +import logging import re from typing import Any, cast from aiohttp import web -import sqlalchemy -from sqlalchemy import lambda_stmt, select from sqlalchemy.engine.row import Row -from sqlalchemy.orm import aliased from sqlalchemy.orm.query import Query -from sqlalchemy.sql.expression import literal -from sqlalchemy.sql.lambdas import StatementLambdaElement -from sqlalchemy.sql.selectable import Select import voluptuous as vol from homeassistant.components import frontend @@ -27,15 +22,8 @@ from homeassistant.components.history import ( sqlalchemy_filter_from_include_exclude_conf, ) from homeassistant.components.http import HomeAssistantView -from homeassistant.components.proximity import DOMAIN as PROXIMITY_DOMAIN from homeassistant.components.recorder import get_instance -from homeassistant.components.recorder.models import ( - EventData, - Events, - StateAttributes, - States, - process_timestamp_to_utc_isoformat, -) +from homeassistant.components.recorder.models import process_timestamp_to_utc_isoformat from homeassistant.components.recorder.util import session_scope from homeassistant.components.script import EVENT_SCRIPT_STARTED from homeassistant.components.sensor import ATTR_STATE_CLASS, DOMAIN as SENSOR_DOMAIN @@ -75,66 +63,32 @@ from homeassistant.helpers.typing import ConfigType from homeassistant.loader import bind_hass import homeassistant.util.dt as dt_util -ENTITY_ID_JSON_TEMPLATE = '%"entity_id":"{}"%' +from .queries import statement_for_request + +_LOGGER = logging.getLogger(__name__) + + FRIENDLY_NAME_JSON_EXTRACT = re.compile('"friendly_name": ?"([^"]+)"') ENTITY_ID_JSON_EXTRACT = re.compile('"entity_id": ?"([^"]+)"') DOMAIN_JSON_EXTRACT = re.compile('"domain": ?"([^"]+)"') ICON_JSON_EXTRACT = re.compile('"icon": ?"([^"]+)"') ATTR_MESSAGE = "message" -CONTINUOUS_DOMAINS = {PROXIMITY_DOMAIN, SENSOR_DOMAIN} -CONTINUOUS_ENTITY_ID_LIKE = [f"{domain}.%" for domain in CONTINUOUS_DOMAINS] - DOMAIN = "logbook" -EMPTY_JSON_OBJECT = "{}" -UNIT_OF_MEASUREMENT_JSON = '"unit_of_measurement":' -UNIT_OF_MEASUREMENT_JSON_LIKE = f"%{UNIT_OF_MEASUREMENT_JSON}%" HA_DOMAIN_ENTITY_ID = f"{HA_DOMAIN}._" CONFIG_SCHEMA = vol.Schema( {DOMAIN: INCLUDE_EXCLUDE_BASE_FILTER_SCHEMA}, extra=vol.ALLOW_EXTRA ) -HOMEASSISTANT_EVENTS = [ - EVENT_HOMEASSISTANT_START, - EVENT_HOMEASSISTANT_STOP, -] +HOMEASSISTANT_EVENTS = {EVENT_HOMEASSISTANT_START, EVENT_HOMEASSISTANT_STOP} -ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED = [ +ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED = ( EVENT_LOGBOOK_ENTRY, EVENT_CALL_SERVICE, *HOMEASSISTANT_EVENTS, -] - -ALL_EVENT_TYPES = [ - EVENT_STATE_CHANGED, - *ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED, -] - - -EVENT_COLUMNS = [ - Events.event_type.label("event_type"), - Events.event_data.label("event_data"), - Events.time_fired.label("time_fired"), - Events.context_id.label("context_id"), - Events.context_user_id.label("context_user_id"), - Events.context_parent_id.label("context_parent_id"), -] - -STATE_COLUMNS = [ - States.state.label("state"), - States.entity_id.label("entity_id"), - States.attributes.label("attributes"), - StateAttributes.shared_attrs.label("shared_attrs"), -] - -EMPTY_STATE_COLUMNS = [ - literal(value=None, type_=sqlalchemy.String).label("state"), - literal(value=None, type_=sqlalchemy.String).label("entity_id"), - literal(value=None, type_=sqlalchemy.Text).label("attributes"), - literal(value=None, type_=sqlalchemy.Text).label("shared_attrs"), -] +) SCRIPT_AUTOMATION_EVENTS = {EVENT_AUTOMATION_TRIGGERED, EVENT_SCRIPT_STARTED} @@ -295,7 +249,6 @@ class LogbookView(HomeAssistantView): hass = request.app["hass"] - entity_matches_only = "entity_matches_only" in request.query context_id = request.query.get("context_id") if entity_ids and context_id: @@ -313,7 +266,6 @@ class LogbookView(HomeAssistantView): entity_ids, self.filters, self.entities_filter, - entity_matches_only, context_id, ) ) @@ -416,7 +368,6 @@ def _get_events( entity_ids: list[str] | None = None, filters: Filters | None = None, entities_filter: EntityFilter | Callable[[str], bool] | None = None, - entity_matches_only: bool = False, context_id: str | None = None, ) -> list[dict[str, Any]]: """Get events for a period of time.""" @@ -428,10 +379,13 @@ def _get_events( event_data_cache: dict[str, dict[str, Any]] = {} context_lookup: dict[str | None, Row | None] = {None: None} event_cache = EventCache(event_data_cache) - external_events = hass.data.get(DOMAIN, {}) + external_events: dict[ + str, tuple[str, Callable[[LazyEventPartialState], dict[str, Any]]] + ] = hass.data.get(DOMAIN, {}) context_augmenter = ContextAugmenter( context_lookup, entity_name_cache, external_events, event_cache ) + event_types = (*ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED, *external_events) def yield_rows(query: Query) -> Generator[Row, None, None]: """Yield Events that are not filtered away.""" @@ -441,6 +395,8 @@ def _get_events( rows = query.yield_per(1000) for row in rows: context_lookup.setdefault(row.context_id, row) + if row.context_only: + continue event_type = row.event_type if event_type != EVENT_CALL_SERVICE and ( event_type == EVENT_STATE_CHANGED @@ -451,22 +407,15 @@ def _get_events( if entity_ids is not None: entities_filter = generate_filter([], entity_ids, [], []) - event_types = [ - *ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED, - *hass.data.get(DOMAIN, {}), - ] - entity_filter = None - if entity_ids is None and filters: - entity_filter = filters.entity_filter() # type: ignore[no-untyped-call] - stmt = _generate_logbook_query( - start_day, - end_day, - event_types, - entity_ids, - entity_filter, - entity_matches_only, - context_id, + stmt = statement_for_request( + start_day, end_day, event_types, entity_ids, filters, context_id ) + if _LOGGER.isEnabledFor(logging.DEBUG): + _LOGGER.debug( + "Literal statement: %s", + stmt.compile(compile_kwargs={"literal_binds": True}), + ) + with session_scope(hass=hass) as session: return list( _humanify( @@ -479,182 +428,6 @@ def _get_events( ) -def _generate_logbook_query( - start_day: dt, - end_day: dt, - event_types: list[str], - entity_ids: list[str] | None = None, - entity_filter: Any | None = None, - entity_matches_only: bool = False, - context_id: str | None = None, -) -> StatementLambdaElement: - """Generate a logbook query lambda_stmt.""" - stmt = lambda_stmt( - lambda: _generate_events_query_without_states() - .where((Events.time_fired > start_day) & (Events.time_fired < end_day)) - .where(Events.event_type.in_(event_types)) - .outerjoin(EventData, (Events.data_id == EventData.data_id)) - ) - if entity_ids is not None: - if entity_matches_only: - # When entity_matches_only is provided, contexts and events that do not - # contain the entity_ids are not included in the logbook response. - stmt.add_criteria( - lambda s: s.where(_apply_event_entity_id_matchers(entity_ids)), - track_on=entity_ids, - ) - stmt += lambda s: s.union_all( - _generate_states_query() - .filter((States.last_updated > start_day) & (States.last_updated < end_day)) - .where(States.entity_id.in_(entity_ids)) - ) - else: - if context_id is not None: - # Once all the old `state_changed` events - # are gone from the database remove the - # union_all(_generate_legacy_events_context_id_query()....) - stmt += lambda s: s.where(Events.context_id == context_id).union_all( - _generate_legacy_events_context_id_query() - .where((Events.time_fired > start_day) & (Events.time_fired < end_day)) - .where(Events.context_id == context_id), - _generate_states_query() - .where( - (States.last_updated > start_day) & (States.last_updated < end_day) - ) - .outerjoin(Events, (States.event_id == Events.event_id)) - .where(States.context_id == context_id), - ) - elif entity_filter is not None: - stmt += lambda s: s.union_all( - _generate_states_query() - .where( - (States.last_updated > start_day) & (States.last_updated < end_day) - ) - .where(entity_filter) - ) - else: - stmt += lambda s: s.union_all( - _generate_states_query().where( - (States.last_updated > start_day) & (States.last_updated < end_day) - ) - ) - - stmt += lambda s: s.order_by(Events.time_fired) - return stmt - - -def _generate_events_query_without_data() -> Select: - return select( - literal(value=EVENT_STATE_CHANGED, type_=sqlalchemy.String).label("event_type"), - literal(value=None, type_=sqlalchemy.Text).label("event_data"), - States.last_changed.label("time_fired"), - States.context_id.label("context_id"), - States.context_user_id.label("context_user_id"), - States.context_parent_id.label("context_parent_id"), - literal(value=None, type_=sqlalchemy.Text).label("shared_data"), - *STATE_COLUMNS, - ) - - -def _generate_legacy_events_context_id_query() -> Select: - """Generate a legacy events context id query that also joins states.""" - # This can be removed once we no longer have event_ids in the states table - return ( - select( - *EVENT_COLUMNS, - literal(value=None, type_=sqlalchemy.String).label("shared_data"), - States.state, - States.entity_id, - States.attributes, - StateAttributes.shared_attrs, - ) - .outerjoin(States, (Events.event_id == States.event_id)) - .where(States.last_updated == States.last_changed) - .where(_not_continuous_entity_matcher()) - .outerjoin( - StateAttributes, (States.attributes_id == StateAttributes.attributes_id) - ) - ) - - -def _generate_events_query_without_states() -> Select: - return select( - *EVENT_COLUMNS, EventData.shared_data.label("shared_data"), *EMPTY_STATE_COLUMNS - ) - - -def _generate_states_query() -> Select: - old_state = aliased(States, name="old_state") - return ( - _generate_events_query_without_data() - .outerjoin(old_state, (States.old_state_id == old_state.state_id)) - .where(_missing_state_matcher(old_state)) - .where(_not_continuous_entity_matcher()) - .where(States.last_updated == States.last_changed) - .outerjoin( - StateAttributes, (States.attributes_id == StateAttributes.attributes_id) - ) - ) - - -def _missing_state_matcher(old_state: States) -> sqlalchemy.and_: - # The below removes state change events that do not have - # and old_state or the old_state is missing (newly added entities) - # or the new_state is missing (removed entities) - return sqlalchemy.and_( - old_state.state_id.isnot(None), - (States.state != old_state.state), - States.state.isnot(None), - ) - - -def _not_continuous_entity_matcher() -> sqlalchemy.or_: - """Match non continuous entities.""" - return sqlalchemy.or_( - _not_continuous_domain_matcher(), - sqlalchemy.and_( - _continuous_domain_matcher, _not_uom_attributes_matcher() - ).self_group(), - ) - - -def _not_continuous_domain_matcher() -> sqlalchemy.and_: - """Match not continuous domains.""" - return sqlalchemy.and_( - *[ - ~States.entity_id.like(entity_domain) - for entity_domain in CONTINUOUS_ENTITY_ID_LIKE - ], - ).self_group() - - -def _continuous_domain_matcher() -> sqlalchemy.or_: - """Match continuous domains.""" - return sqlalchemy.or_( - *[ - States.entity_id.like(entity_domain) - for entity_domain in CONTINUOUS_ENTITY_ID_LIKE - ], - ).self_group() - - -def _not_uom_attributes_matcher() -> Any: - """Prefilter ATTR_UNIT_OF_MEASUREMENT as its much faster in sql.""" - return ~StateAttributes.shared_attrs.like( - UNIT_OF_MEASUREMENT_JSON_LIKE - ) | ~States.attributes.like(UNIT_OF_MEASUREMENT_JSON_LIKE) - - -def _apply_event_entity_id_matchers(entity_ids: Iterable[str]) -> sqlalchemy.or_: - """Create matchers for the entity_id in the event_data.""" - ors = [] - for entity_id in entity_ids: - like = ENTITY_ID_JSON_TEMPLATE.format(entity_id) - ors.append(Events.event_data.like(like)) - ors.append(EventData.shared_data.like(like)) - return sqlalchemy.or_(*ors) - - def _keep_row( hass: HomeAssistant, event_type: str, diff --git a/homeassistant/components/logbook/queries.py b/homeassistant/components/logbook/queries.py new file mode 100644 index 00000000000..8c13719a1cf --- /dev/null +++ b/homeassistant/components/logbook/queries.py @@ -0,0 +1,377 @@ +"""Queries for logbook.""" +from __future__ import annotations + +from collections.abc import Iterable +from datetime import datetime as dt +from typing import Any + +import sqlalchemy +from sqlalchemy import lambda_stmt, select +from sqlalchemy.orm import aliased +from sqlalchemy.sql.expression import literal +from sqlalchemy.sql.lambdas import StatementLambdaElement +from sqlalchemy.sql.selectable import Select + +from homeassistant.components.history import Filters +from homeassistant.components.proximity import DOMAIN as PROXIMITY_DOMAIN +from homeassistant.components.recorder.models import ( + EventData, + Events, + StateAttributes, + States, +) +from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN +from homeassistant.const import EVENT_STATE_CHANGED + +ENTITY_ID_JSON_TEMPLATE = '%"entity_id":"{}"%' + +CONTINUOUS_DOMAINS = {PROXIMITY_DOMAIN, SENSOR_DOMAIN} +CONTINUOUS_ENTITY_ID_LIKE = [f"{domain}.%" for domain in CONTINUOUS_DOMAINS] + +UNIT_OF_MEASUREMENT_JSON = '"unit_of_measurement":' +UNIT_OF_MEASUREMENT_JSON_LIKE = f"%{UNIT_OF_MEASUREMENT_JSON}%" + + +EVENT_COLUMNS = ( + Events.event_type.label("event_type"), + Events.event_data.label("event_data"), + Events.time_fired.label("time_fired"), + Events.context_id.label("context_id"), + Events.context_user_id.label("context_user_id"), + Events.context_parent_id.label("context_parent_id"), +) + +STATE_COLUMNS = ( + States.state.label("state"), + States.entity_id.label("entity_id"), + States.attributes.label("attributes"), + StateAttributes.shared_attrs.label("shared_attrs"), +) + +EMPTY_STATE_COLUMNS = ( + literal(value=None, type_=sqlalchemy.String).label("state"), + literal(value=None, type_=sqlalchemy.String).label("entity_id"), + literal(value=None, type_=sqlalchemy.Text).label("attributes"), + literal(value=None, type_=sqlalchemy.Text).label("shared_attrs"), +) + +EVENT_ROWS_NO_STATES = ( + *EVENT_COLUMNS, + EventData.shared_data.label("shared_data"), + *EMPTY_STATE_COLUMNS, +) + +# Virtual column to tell logbook if it should avoid processing +# the event as its only used to link contexts +CONTEXT_ONLY = literal("1").label("context_only") +NOT_CONTEXT_ONLY = literal(None).label("context_only") + + +def statement_for_request( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_ids: list[str] | None = None, + filters: Filters | None = None, + context_id: str | None = None, +) -> StatementLambdaElement: + """Generate the logbook statement for a logbook request.""" + + # No entities: logbook sends everything for the timeframe + # limited by the context_id and the yaml configured filter + if not entity_ids: + entity_filter = filters.entity_filter() if filters else None # type: ignore[no-untyped-call] + return _all_stmt(start_day, end_day, event_types, entity_filter, context_id) + + # Multiple entities: logbook sends everything for the timeframe for the entities + # + # This is the least efficient query because we use + # like matching which means part of the query has to be built each + # time when the entity_ids are not in the cache + if len(entity_ids) > 1: + return _entities_stmt(start_day, end_day, event_types, entity_ids) + + # Single entity: logbook sends everything for the timeframe for the entity + entity_id = entity_ids[0] + entity_like = ENTITY_ID_JSON_TEMPLATE.format(entity_id) + return _single_entity_stmt(start_day, end_day, event_types, entity_id, entity_like) + + +def _select_events_context_id_subquery( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], +) -> Select: + """Generate the select for a context_id subquery.""" + return ( + select(Events.context_id) + .where((Events.time_fired > start_day) & (Events.time_fired < end_day)) + .where(Events.event_type.in_(event_types)) + .outerjoin(EventData, (Events.data_id == EventData.data_id)) + ) + + +def _select_entities_context_ids_sub_query( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_ids: list[str], +) -> Select: + """Generate a subquery to find context ids for multiple entities.""" + return ( + _select_events_context_id_subquery(start_day, end_day, event_types) + .where(_apply_event_entity_id_matchers(entity_ids)) + .union_all( + select(States.context_id) + .filter((States.last_updated > start_day) & (States.last_updated < end_day)) + .where(States.entity_id.in_(entity_ids)) + ) + .subquery() + ) + + +def _select_events_context_only() -> Select: + """Generate an events query that mark them as for context_only. + + By marking them as context_only we know they are only for + linking context ids and we can avoid processing them. + """ + return select(*EVENT_ROWS_NO_STATES, CONTEXT_ONLY).outerjoin( + EventData, (Events.data_id == EventData.data_id) + ) + + +def _entities_stmt( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_ids: list[str], +) -> StatementLambdaElement: + """Generate a logbook query for multiple entities.""" + stmt = lambda_stmt( + lambda: _select_events_without_states(start_day, end_day, event_types) + ) + stmt = stmt.add_criteria( + lambda s: s.where(_apply_event_entity_id_matchers(entity_ids)).union_all( + _select_states(start_day, end_day).where(States.entity_id.in_(entity_ids)), + _select_events_context_only().where( + Events.context_id.in_( + _select_entities_context_ids_sub_query( + start_day, + end_day, + event_types, + entity_ids, + ) + ) + ), + ), + # Since _apply_event_entity_id_matchers generates multiple + # like statements we need to use the entity_ids in the + # the cache key since the sql can change based on the + # likes. + track_on=(str(entity_ids),), + ) + stmt += lambda s: s.order_by(Events.time_fired) + return stmt + + +def _select_entity_context_ids_sub_query( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_id: str, + entity_id_like: str, +) -> Select: + """Generate a subquery to find context ids for a single entity.""" + return ( + _select_events_context_id_subquery(start_day, end_day, event_types) + .where( + Events.event_data.like(entity_id_like) + | EventData.shared_data.like(entity_id_like) + ) + .union_all( + select(States.context_id) + .filter((States.last_updated > start_day) & (States.last_updated < end_day)) + .where(States.entity_id == entity_id) + ) + .subquery() + ) + + +def _single_entity_stmt( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_id: str, + entity_id_like: str, +) -> StatementLambdaElement: + """Generate a logbook query for a single entity.""" + stmt = lambda_stmt( + lambda: _select_events_without_states(start_day, end_day, event_types) + .where( + Events.event_data.like(entity_id_like) + | EventData.shared_data.like(entity_id_like) + ) + .union_all( + _select_states(start_day, end_day).where(States.entity_id == entity_id), + _select_events_context_only().where( + Events.context_id.in_( + _select_entity_context_ids_sub_query( + start_day, end_day, event_types, entity_id, entity_id_like + ) + ) + ), + ) + .order_by(Events.time_fired) + ) + return stmt + + +def _all_stmt( + start_day: dt, + end_day: dt, + event_types: tuple[str, ...], + entity_filter: Any | None = None, + context_id: str | None = None, +) -> StatementLambdaElement: + """Generate a logbook query for all entities.""" + stmt = lambda_stmt( + lambda: _select_events_without_states(start_day, end_day, event_types) + ) + if context_id is not None: + # Once all the old `state_changed` events + # are gone from the database remove the + # _legacy_select_events_context_id() + stmt += lambda s: s.where(Events.context_id == context_id).union_all( + _select_states(start_day, end_day).where(States.context_id == context_id), + _legacy_select_events_context_id(start_day, end_day, context_id), + ) + elif entity_filter is not None: + stmt += lambda s: s.union_all( + _select_states(start_day, end_day).where(entity_filter) + ) + else: + stmt += lambda s: s.union_all(_select_states(start_day, end_day)) + stmt += lambda s: s.order_by(Events.time_fired) + return stmt + + +def _legacy_select_events_context_id( + start_day: dt, end_day: dt, context_id: str +) -> Select: + """Generate a legacy events context id select that also joins states.""" + # This can be removed once we no longer have event_ids in the states table + return ( + select( + *EVENT_COLUMNS, + literal(value=None, type_=sqlalchemy.String).label("shared_data"), + *STATE_COLUMNS, + NOT_CONTEXT_ONLY, + ) + .outerjoin(States, (Events.event_id == States.event_id)) + .where(States.last_updated == States.last_changed) + .where(_not_continuous_entity_matcher()) + .outerjoin( + StateAttributes, (States.attributes_id == StateAttributes.attributes_id) + ) + .where((Events.time_fired > start_day) & (Events.time_fired < end_day)) + .where(Events.context_id == context_id) + ) + + +def _select_events_without_states( + start_day: dt, end_day: dt, event_types: tuple[str, ...] +) -> Select: + """Generate an events select that does not join states.""" + return ( + select(*EVENT_ROWS_NO_STATES, NOT_CONTEXT_ONLY) + .where((Events.time_fired > start_day) & (Events.time_fired < end_day)) + .where(Events.event_type.in_(event_types)) + .outerjoin(EventData, (Events.data_id == EventData.data_id)) + ) + + +def _select_states(start_day: dt, end_day: dt) -> Select: + """Generate a states select that formats the states table as event rows.""" + old_state = aliased(States, name="old_state") + return ( + select( + literal(value=EVENT_STATE_CHANGED, type_=sqlalchemy.String).label( + "event_type" + ), + literal(value=None, type_=sqlalchemy.Text).label("event_data"), + States.last_changed.label("time_fired"), + States.context_id.label("context_id"), + States.context_user_id.label("context_user_id"), + States.context_parent_id.label("context_parent_id"), + literal(value=None, type_=sqlalchemy.Text).label("shared_data"), + *STATE_COLUMNS, + NOT_CONTEXT_ONLY, + ) + .filter((States.last_updated > start_day) & (States.last_updated < end_day)) + .outerjoin(old_state, (States.old_state_id == old_state.state_id)) + .where(_missing_state_matcher(old_state)) + .where(_not_continuous_entity_matcher()) + .where(States.last_updated == States.last_changed) + .outerjoin( + StateAttributes, (States.attributes_id == StateAttributes.attributes_id) + ) + ) + + +def _missing_state_matcher(old_state: States) -> sqlalchemy.and_: + # The below removes state change events that do not have + # and old_state or the old_state is missing (newly added entities) + # or the new_state is missing (removed entities) + return sqlalchemy.and_( + old_state.state_id.isnot(None), + (States.state != old_state.state), + States.state.isnot(None), + ) + + +def _not_continuous_entity_matcher() -> sqlalchemy.or_: + """Match non continuous entities.""" + return sqlalchemy.or_( + _not_continuous_domain_matcher(), + sqlalchemy.and_( + _continuous_domain_matcher, _not_uom_attributes_matcher() + ).self_group(), + ) + + +def _not_continuous_domain_matcher() -> sqlalchemy.and_: + """Match not continuous domains.""" + return sqlalchemy.and_( + *[ + ~States.entity_id.like(entity_domain) + for entity_domain in CONTINUOUS_ENTITY_ID_LIKE + ], + ).self_group() + + +def _continuous_domain_matcher() -> sqlalchemy.or_: + """Match continuous domains.""" + return sqlalchemy.or_( + *[ + States.entity_id.like(entity_domain) + for entity_domain in CONTINUOUS_ENTITY_ID_LIKE + ], + ).self_group() + + +def _not_uom_attributes_matcher() -> Any: + """Prefilter ATTR_UNIT_OF_MEASUREMENT as its much faster in sql.""" + return ~StateAttributes.shared_attrs.like( + UNIT_OF_MEASUREMENT_JSON_LIKE + ) | ~States.attributes.like(UNIT_OF_MEASUREMENT_JSON_LIKE) + + +def _apply_event_entity_id_matchers(entity_ids: Iterable[str]) -> sqlalchemy.or_: + """Create matchers for the entity_id in the event_data.""" + ors = [] + for entity_id in entity_ids: + like = ENTITY_ID_JSON_TEMPLATE.format(entity_id) + ors.append(Events.event_data.like(like)) + ors.append(EventData.shared_data.like(like)) + return sqlalchemy.or_(*ors) diff --git a/tests/components/logbook/test_init.py b/tests/components/logbook/test_init.py index 76319ba5a6e..cc10346fc07 100644 --- a/tests/components/logbook/test_init.py +++ b/tests/components/logbook/test_init.py @@ -1321,8 +1321,8 @@ async def test_logbook_context_from_template(hass, hass_client, recorder_mock): assert json_dict[5]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" -async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock): - """Test the logbook view with a single entity and entity_matches_only.""" +async def test_logbook_(hass, hass_client, recorder_mock): + """Test the logbook view with a single entity and .""" await async_setup_component(hass, "logbook", {}) assert await async_setup_component( hass, @@ -1377,7 +1377,7 @@ async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock): # Test today entries with filter by end_time end_time = start + timedelta(hours=24) response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state" ) assert response.status == HTTPStatus.OK json_dict = await response.json() @@ -1390,10 +1390,8 @@ async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock): assert json_dict[1]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" -async def test_logbook_entity_matches_only_multiple_calls( - hass, hass_client, recorder_mock -): - """Test the logbook view with a single entity and entity_matches_only called multiple times.""" +async def test_logbook_many_entities_multiple_calls(hass, hass_client, recorder_mock): + """Test the logbook view with a many entities called multiple times.""" await async_setup_component(hass, "logbook", {}) await async_setup_component(hass, "automation", {}) @@ -1421,7 +1419,7 @@ async def test_logbook_entity_matches_only_multiple_calls( for automation_id in range(5): # Test today entries with filter by end_time response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_{automation_id}_automation&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_{automation_id}_automation" ) assert response.status == HTTPStatus.OK json_dict = await response.json() @@ -1432,7 +1430,7 @@ async def test_logbook_entity_matches_only_multiple_calls( ) response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_0_automation,automation.mock_1_automation,automation.mock_2_automation&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_0_automation,automation.mock_1_automation,automation.mock_2_automation" ) assert response.status == HTTPStatus.OK json_dict = await response.json() @@ -1442,11 +1440,28 @@ async def test_logbook_entity_matches_only_multiple_calls( assert json_dict[1]["entity_id"] == "automation.mock_1_automation" assert json_dict[2]["entity_id"] == "automation.mock_2_automation" + response = await client.get( + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_4_automation,automation.mock_2_automation,automation.mock_0_automation,automation.mock_1_automation" + ) + assert response.status == HTTPStatus.OK + json_dict = await response.json() -async def test_custom_log_entry_discoverable_via_entity_matches_only( - hass, hass_client, recorder_mock -): - """Test if a custom log entry is later discoverable via entity_matches_only.""" + assert len(json_dict) == 4 + assert json_dict[0]["entity_id"] == "automation.mock_0_automation" + assert json_dict[1]["entity_id"] == "automation.mock_1_automation" + assert json_dict[2]["entity_id"] == "automation.mock_2_automation" + assert json_dict[3]["entity_id"] == "automation.mock_4_automation" + + response = await client.get( + f"/api/logbook/{end_time.isoformat()}?end_time={end_time}&entity=automation.mock_4_automation,automation.mock_2_automation,automation.mock_0_automation,automation.mock_1_automation" + ) + assert response.status == HTTPStatus.OK + json_dict = await response.json() + assert len(json_dict) == 0 + + +async def test_custom_log_entry_discoverable_via_(hass, hass_client, recorder_mock): + """Test if a custom log entry is later discoverable via .""" await async_setup_component(hass, "logbook", {}) await async_recorder_block_till_done(hass) @@ -1468,7 +1483,7 @@ async def test_custom_log_entry_discoverable_via_entity_matches_only( # Test today entries with filter by end_time end_time = start + timedelta(hours=24) response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time.isoformat()}&entity=switch.test_switch&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time.isoformat()}&entity=switch.test_switch" ) assert response.status == HTTPStatus.OK json_dict = await response.json() @@ -1480,8 +1495,8 @@ async def test_custom_log_entry_discoverable_via_entity_matches_only( assert json_dict[0]["entity_id"] == "switch.test_switch" -async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_mock): - """Test the logbook view with a multiple entities and entity_matches_only.""" +async def test_logbook_multiple_entities(hass, hass_client, recorder_mock): + """Test the logbook view with a multiple entities.""" await async_setup_component(hass, "logbook", {}) assert await async_setup_component( hass, @@ -1513,12 +1528,14 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_ # Entity added (should not be logged) hass.states.async_set("switch.test_state", STATE_ON) hass.states.async_set("light.test_state", STATE_ON) + hass.states.async_set("binary_sensor.test_state", STATE_ON) await hass.async_block_till_done() # First state change (should be logged) hass.states.async_set("switch.test_state", STATE_OFF) hass.states.async_set("light.test_state", STATE_OFF) + hass.states.async_set("binary_sensor.test_state", STATE_OFF) await hass.async_block_till_done() @@ -1530,6 +1547,9 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_ "switch.test_state", STATE_ON, context=switch_turn_off_context ) hass.states.async_set("light.test_state", STATE_ON, context=switch_turn_off_context) + hass.states.async_set( + "binary_sensor.test_state", STATE_ON, context=switch_turn_off_context + ) await async_wait_recording_done(hass) client = await hass_client() @@ -1541,7 +1561,7 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_ # Test today entries with filter by end_time end_time = start + timedelta(hours=24) response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state,light.test_state&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state,light.test_state" ) assert response.status == HTTPStatus.OK json_dict = await response.json() @@ -1558,6 +1578,46 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_ assert json_dict[3]["entity_id"] == "light.test_state" assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" + # Test today entries with filter by end_time + end_time = start + timedelta(hours=24) + response = await client.get( + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=binary_sensor.test_state,light.test_state" + ) + assert response.status == HTTPStatus.OK + json_dict = await response.json() + + assert len(json_dict) == 4 + + assert json_dict[0]["entity_id"] == "light.test_state" + + assert json_dict[1]["entity_id"] == "binary_sensor.test_state" + + assert json_dict[2]["entity_id"] == "light.test_state" + assert json_dict[2]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" + + assert json_dict[3]["entity_id"] == "binary_sensor.test_state" + assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" + + # Test today entries with filter by end_time + end_time = start + timedelta(hours=24) + response = await client.get( + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=light.test_state,binary_sensor.test_state" + ) + assert response.status == HTTPStatus.OK + json_dict = await response.json() + + assert len(json_dict) == 4 + + assert json_dict[0]["entity_id"] == "light.test_state" + + assert json_dict[1]["entity_id"] == "binary_sensor.test_state" + + assert json_dict[2]["entity_id"] == "light.test_state" + assert json_dict[2]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" + + assert json_dict[3]["entity_id"] == "binary_sensor.test_state" + assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474" + async def test_logbook_invalid_entity(hass, hass_client, recorder_mock): """Test the logbook view with requesting an invalid entity.""" @@ -1572,7 +1632,7 @@ async def test_logbook_invalid_entity(hass, hass_client, recorder_mock): # Test today entries with filter by end_time end_time = start + timedelta(hours=24) response = await client.get( - f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=invalid&entity_matches_only" + f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=invalid" ) assert response.status == HTTPStatus.INTERNAL_SERVER_ERROR