From 81e8d2ab862b78b7936a0ba8b12a3643e76ccd94 Mon Sep 17 00:00:00 2001
From: "J. Nick Koston" <nick@koston.org>
Date: Wed, 11 May 2022 17:27:02 -0500
Subject: [PATCH] Significantly improve logbook performance when selecting
 entities (#71657)

---
 homeassistant/components/logbook/__init__.py | 277 ++------------
 homeassistant/components/logbook/queries.py  | 377 +++++++++++++++++++
 tests/components/logbook/test_init.py        |  96 ++++-
 3 files changed, 480 insertions(+), 270 deletions(-)
 create mode 100644 homeassistant/components/logbook/queries.py

diff --git a/homeassistant/components/logbook/__init__.py b/homeassistant/components/logbook/__init__.py
index a877cba4ff2..9cf3ad99e57 100644
--- a/homeassistant/components/logbook/__init__.py
+++ b/homeassistant/components/logbook/__init__.py
@@ -1,23 +1,18 @@
 """Event parser and human readable log generator."""
 from __future__ import annotations
 
-from collections.abc import Callable, Generator, Iterable
+from collections.abc import Callable, Generator
 from contextlib import suppress
 from datetime import datetime as dt, timedelta
 from http import HTTPStatus
 import json
+import logging
 import re
 from typing import Any, cast
 
 from aiohttp import web
-import sqlalchemy
-from sqlalchemy import lambda_stmt, select
 from sqlalchemy.engine.row import Row
-from sqlalchemy.orm import aliased
 from sqlalchemy.orm.query import Query
-from sqlalchemy.sql.expression import literal
-from sqlalchemy.sql.lambdas import StatementLambdaElement
-from sqlalchemy.sql.selectable import Select
 import voluptuous as vol
 
 from homeassistant.components import frontend
@@ -27,15 +22,8 @@ from homeassistant.components.history import (
     sqlalchemy_filter_from_include_exclude_conf,
 )
 from homeassistant.components.http import HomeAssistantView
-from homeassistant.components.proximity import DOMAIN as PROXIMITY_DOMAIN
 from homeassistant.components.recorder import get_instance
-from homeassistant.components.recorder.models import (
-    EventData,
-    Events,
-    StateAttributes,
-    States,
-    process_timestamp_to_utc_isoformat,
-)
+from homeassistant.components.recorder.models import process_timestamp_to_utc_isoformat
 from homeassistant.components.recorder.util import session_scope
 from homeassistant.components.script import EVENT_SCRIPT_STARTED
 from homeassistant.components.sensor import ATTR_STATE_CLASS, DOMAIN as SENSOR_DOMAIN
@@ -75,66 +63,32 @@ from homeassistant.helpers.typing import ConfigType
 from homeassistant.loader import bind_hass
 import homeassistant.util.dt as dt_util
 
-ENTITY_ID_JSON_TEMPLATE = '%"entity_id":"{}"%'
+from .queries import statement_for_request
+
+_LOGGER = logging.getLogger(__name__)
+
+
 FRIENDLY_NAME_JSON_EXTRACT = re.compile('"friendly_name": ?"([^"]+)"')
 ENTITY_ID_JSON_EXTRACT = re.compile('"entity_id": ?"([^"]+)"')
 DOMAIN_JSON_EXTRACT = re.compile('"domain": ?"([^"]+)"')
 ICON_JSON_EXTRACT = re.compile('"icon": ?"([^"]+)"')
 ATTR_MESSAGE = "message"
 
-CONTINUOUS_DOMAINS = {PROXIMITY_DOMAIN, SENSOR_DOMAIN}
-CONTINUOUS_ENTITY_ID_LIKE = [f"{domain}.%" for domain in CONTINUOUS_DOMAINS]
-
 DOMAIN = "logbook"
 
-EMPTY_JSON_OBJECT = "{}"
-UNIT_OF_MEASUREMENT_JSON = '"unit_of_measurement":'
-UNIT_OF_MEASUREMENT_JSON_LIKE = f"%{UNIT_OF_MEASUREMENT_JSON}%"
 HA_DOMAIN_ENTITY_ID = f"{HA_DOMAIN}._"
 
 CONFIG_SCHEMA = vol.Schema(
     {DOMAIN: INCLUDE_EXCLUDE_BASE_FILTER_SCHEMA}, extra=vol.ALLOW_EXTRA
 )
 
-HOMEASSISTANT_EVENTS = [
-    EVENT_HOMEASSISTANT_START,
-    EVENT_HOMEASSISTANT_STOP,
-]
+HOMEASSISTANT_EVENTS = {EVENT_HOMEASSISTANT_START, EVENT_HOMEASSISTANT_STOP}
 
-ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED = [
+ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED = (
     EVENT_LOGBOOK_ENTRY,
     EVENT_CALL_SERVICE,
     *HOMEASSISTANT_EVENTS,
-]
-
-ALL_EVENT_TYPES = [
-    EVENT_STATE_CHANGED,
-    *ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED,
-]
-
-
-EVENT_COLUMNS = [
-    Events.event_type.label("event_type"),
-    Events.event_data.label("event_data"),
-    Events.time_fired.label("time_fired"),
-    Events.context_id.label("context_id"),
-    Events.context_user_id.label("context_user_id"),
-    Events.context_parent_id.label("context_parent_id"),
-]
-
-STATE_COLUMNS = [
-    States.state.label("state"),
-    States.entity_id.label("entity_id"),
-    States.attributes.label("attributes"),
-    StateAttributes.shared_attrs.label("shared_attrs"),
-]
-
-EMPTY_STATE_COLUMNS = [
-    literal(value=None, type_=sqlalchemy.String).label("state"),
-    literal(value=None, type_=sqlalchemy.String).label("entity_id"),
-    literal(value=None, type_=sqlalchemy.Text).label("attributes"),
-    literal(value=None, type_=sqlalchemy.Text).label("shared_attrs"),
-]
+)
 
 SCRIPT_AUTOMATION_EVENTS = {EVENT_AUTOMATION_TRIGGERED, EVENT_SCRIPT_STARTED}
 
@@ -295,7 +249,6 @@ class LogbookView(HomeAssistantView):
 
         hass = request.app["hass"]
 
-        entity_matches_only = "entity_matches_only" in request.query
         context_id = request.query.get("context_id")
 
         if entity_ids and context_id:
@@ -313,7 +266,6 @@ class LogbookView(HomeAssistantView):
                     entity_ids,
                     self.filters,
                     self.entities_filter,
-                    entity_matches_only,
                     context_id,
                 )
             )
@@ -416,7 +368,6 @@ def _get_events(
     entity_ids: list[str] | None = None,
     filters: Filters | None = None,
     entities_filter: EntityFilter | Callable[[str], bool] | None = None,
-    entity_matches_only: bool = False,
     context_id: str | None = None,
 ) -> list[dict[str, Any]]:
     """Get events for a period of time."""
@@ -428,10 +379,13 @@ def _get_events(
     event_data_cache: dict[str, dict[str, Any]] = {}
     context_lookup: dict[str | None, Row | None] = {None: None}
     event_cache = EventCache(event_data_cache)
-    external_events = hass.data.get(DOMAIN, {})
+    external_events: dict[
+        str, tuple[str, Callable[[LazyEventPartialState], dict[str, Any]]]
+    ] = hass.data.get(DOMAIN, {})
     context_augmenter = ContextAugmenter(
         context_lookup, entity_name_cache, external_events, event_cache
     )
+    event_types = (*ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED, *external_events)
 
     def yield_rows(query: Query) -> Generator[Row, None, None]:
         """Yield Events that are not filtered away."""
@@ -441,6 +395,8 @@ def _get_events(
             rows = query.yield_per(1000)
         for row in rows:
             context_lookup.setdefault(row.context_id, row)
+            if row.context_only:
+                continue
             event_type = row.event_type
             if event_type != EVENT_CALL_SERVICE and (
                 event_type == EVENT_STATE_CHANGED
@@ -451,22 +407,15 @@ def _get_events(
     if entity_ids is not None:
         entities_filter = generate_filter([], entity_ids, [], [])
 
-    event_types = [
-        *ALL_EVENT_TYPES_EXCEPT_STATE_CHANGED,
-        *hass.data.get(DOMAIN, {}),
-    ]
-    entity_filter = None
-    if entity_ids is None and filters:
-        entity_filter = filters.entity_filter()  # type: ignore[no-untyped-call]
-    stmt = _generate_logbook_query(
-        start_day,
-        end_day,
-        event_types,
-        entity_ids,
-        entity_filter,
-        entity_matches_only,
-        context_id,
+    stmt = statement_for_request(
+        start_day, end_day, event_types, entity_ids, filters, context_id
     )
+    if _LOGGER.isEnabledFor(logging.DEBUG):
+        _LOGGER.debug(
+            "Literal statement: %s",
+            stmt.compile(compile_kwargs={"literal_binds": True}),
+        )
+
     with session_scope(hass=hass) as session:
         return list(
             _humanify(
@@ -479,182 +428,6 @@ def _get_events(
         )
 
 
-def _generate_logbook_query(
-    start_day: dt,
-    end_day: dt,
-    event_types: list[str],
-    entity_ids: list[str] | None = None,
-    entity_filter: Any | None = None,
-    entity_matches_only: bool = False,
-    context_id: str | None = None,
-) -> StatementLambdaElement:
-    """Generate a logbook query lambda_stmt."""
-    stmt = lambda_stmt(
-        lambda: _generate_events_query_without_states()
-        .where((Events.time_fired > start_day) & (Events.time_fired < end_day))
-        .where(Events.event_type.in_(event_types))
-        .outerjoin(EventData, (Events.data_id == EventData.data_id))
-    )
-    if entity_ids is not None:
-        if entity_matches_only:
-            # When entity_matches_only is provided, contexts and events that do not
-            # contain the entity_ids are not included in the logbook response.
-            stmt.add_criteria(
-                lambda s: s.where(_apply_event_entity_id_matchers(entity_ids)),
-                track_on=entity_ids,
-            )
-        stmt += lambda s: s.union_all(
-            _generate_states_query()
-            .filter((States.last_updated > start_day) & (States.last_updated < end_day))
-            .where(States.entity_id.in_(entity_ids))
-        )
-    else:
-        if context_id is not None:
-            # Once all the old `state_changed` events
-            # are gone from the database remove the
-            # union_all(_generate_legacy_events_context_id_query()....)
-            stmt += lambda s: s.where(Events.context_id == context_id).union_all(
-                _generate_legacy_events_context_id_query()
-                .where((Events.time_fired > start_day) & (Events.time_fired < end_day))
-                .where(Events.context_id == context_id),
-                _generate_states_query()
-                .where(
-                    (States.last_updated > start_day) & (States.last_updated < end_day)
-                )
-                .outerjoin(Events, (States.event_id == Events.event_id))
-                .where(States.context_id == context_id),
-            )
-        elif entity_filter is not None:
-            stmt += lambda s: s.union_all(
-                _generate_states_query()
-                .where(
-                    (States.last_updated > start_day) & (States.last_updated < end_day)
-                )
-                .where(entity_filter)
-            )
-        else:
-            stmt += lambda s: s.union_all(
-                _generate_states_query().where(
-                    (States.last_updated > start_day) & (States.last_updated < end_day)
-                )
-            )
-
-    stmt += lambda s: s.order_by(Events.time_fired)
-    return stmt
-
-
-def _generate_events_query_without_data() -> Select:
-    return select(
-        literal(value=EVENT_STATE_CHANGED, type_=sqlalchemy.String).label("event_type"),
-        literal(value=None, type_=sqlalchemy.Text).label("event_data"),
-        States.last_changed.label("time_fired"),
-        States.context_id.label("context_id"),
-        States.context_user_id.label("context_user_id"),
-        States.context_parent_id.label("context_parent_id"),
-        literal(value=None, type_=sqlalchemy.Text).label("shared_data"),
-        *STATE_COLUMNS,
-    )
-
-
-def _generate_legacy_events_context_id_query() -> Select:
-    """Generate a legacy events context id query that also joins states."""
-    # This can be removed once we no longer have event_ids in the states table
-    return (
-        select(
-            *EVENT_COLUMNS,
-            literal(value=None, type_=sqlalchemy.String).label("shared_data"),
-            States.state,
-            States.entity_id,
-            States.attributes,
-            StateAttributes.shared_attrs,
-        )
-        .outerjoin(States, (Events.event_id == States.event_id))
-        .where(States.last_updated == States.last_changed)
-        .where(_not_continuous_entity_matcher())
-        .outerjoin(
-            StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
-        )
-    )
-
-
-def _generate_events_query_without_states() -> Select:
-    return select(
-        *EVENT_COLUMNS, EventData.shared_data.label("shared_data"), *EMPTY_STATE_COLUMNS
-    )
-
-
-def _generate_states_query() -> Select:
-    old_state = aliased(States, name="old_state")
-    return (
-        _generate_events_query_without_data()
-        .outerjoin(old_state, (States.old_state_id == old_state.state_id))
-        .where(_missing_state_matcher(old_state))
-        .where(_not_continuous_entity_matcher())
-        .where(States.last_updated == States.last_changed)
-        .outerjoin(
-            StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
-        )
-    )
-
-
-def _missing_state_matcher(old_state: States) -> sqlalchemy.and_:
-    # The below removes state change events that do not have
-    # and old_state or the old_state is missing (newly added entities)
-    # or the new_state is missing (removed entities)
-    return sqlalchemy.and_(
-        old_state.state_id.isnot(None),
-        (States.state != old_state.state),
-        States.state.isnot(None),
-    )
-
-
-def _not_continuous_entity_matcher() -> sqlalchemy.or_:
-    """Match non continuous entities."""
-    return sqlalchemy.or_(
-        _not_continuous_domain_matcher(),
-        sqlalchemy.and_(
-            _continuous_domain_matcher, _not_uom_attributes_matcher()
-        ).self_group(),
-    )
-
-
-def _not_continuous_domain_matcher() -> sqlalchemy.and_:
-    """Match not continuous domains."""
-    return sqlalchemy.and_(
-        *[
-            ~States.entity_id.like(entity_domain)
-            for entity_domain in CONTINUOUS_ENTITY_ID_LIKE
-        ],
-    ).self_group()
-
-
-def _continuous_domain_matcher() -> sqlalchemy.or_:
-    """Match continuous domains."""
-    return sqlalchemy.or_(
-        *[
-            States.entity_id.like(entity_domain)
-            for entity_domain in CONTINUOUS_ENTITY_ID_LIKE
-        ],
-    ).self_group()
-
-
-def _not_uom_attributes_matcher() -> Any:
-    """Prefilter ATTR_UNIT_OF_MEASUREMENT as its much faster in sql."""
-    return ~StateAttributes.shared_attrs.like(
-        UNIT_OF_MEASUREMENT_JSON_LIKE
-    ) | ~States.attributes.like(UNIT_OF_MEASUREMENT_JSON_LIKE)
-
-
-def _apply_event_entity_id_matchers(entity_ids: Iterable[str]) -> sqlalchemy.or_:
-    """Create matchers for the entity_id in the event_data."""
-    ors = []
-    for entity_id in entity_ids:
-        like = ENTITY_ID_JSON_TEMPLATE.format(entity_id)
-        ors.append(Events.event_data.like(like))
-        ors.append(EventData.shared_data.like(like))
-    return sqlalchemy.or_(*ors)
-
-
 def _keep_row(
     hass: HomeAssistant,
     event_type: str,
diff --git a/homeassistant/components/logbook/queries.py b/homeassistant/components/logbook/queries.py
new file mode 100644
index 00000000000..8c13719a1cf
--- /dev/null
+++ b/homeassistant/components/logbook/queries.py
@@ -0,0 +1,377 @@
+"""Queries for logbook."""
+from __future__ import annotations
+
+from collections.abc import Iterable
+from datetime import datetime as dt
+from typing import Any
+
+import sqlalchemy
+from sqlalchemy import lambda_stmt, select
+from sqlalchemy.orm import aliased
+from sqlalchemy.sql.expression import literal
+from sqlalchemy.sql.lambdas import StatementLambdaElement
+from sqlalchemy.sql.selectable import Select
+
+from homeassistant.components.history import Filters
+from homeassistant.components.proximity import DOMAIN as PROXIMITY_DOMAIN
+from homeassistant.components.recorder.models import (
+    EventData,
+    Events,
+    StateAttributes,
+    States,
+)
+from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN
+from homeassistant.const import EVENT_STATE_CHANGED
+
+ENTITY_ID_JSON_TEMPLATE = '%"entity_id":"{}"%'
+
+CONTINUOUS_DOMAINS = {PROXIMITY_DOMAIN, SENSOR_DOMAIN}
+CONTINUOUS_ENTITY_ID_LIKE = [f"{domain}.%" for domain in CONTINUOUS_DOMAINS]
+
+UNIT_OF_MEASUREMENT_JSON = '"unit_of_measurement":'
+UNIT_OF_MEASUREMENT_JSON_LIKE = f"%{UNIT_OF_MEASUREMENT_JSON}%"
+
+
+EVENT_COLUMNS = (
+    Events.event_type.label("event_type"),
+    Events.event_data.label("event_data"),
+    Events.time_fired.label("time_fired"),
+    Events.context_id.label("context_id"),
+    Events.context_user_id.label("context_user_id"),
+    Events.context_parent_id.label("context_parent_id"),
+)
+
+STATE_COLUMNS = (
+    States.state.label("state"),
+    States.entity_id.label("entity_id"),
+    States.attributes.label("attributes"),
+    StateAttributes.shared_attrs.label("shared_attrs"),
+)
+
+EMPTY_STATE_COLUMNS = (
+    literal(value=None, type_=sqlalchemy.String).label("state"),
+    literal(value=None, type_=sqlalchemy.String).label("entity_id"),
+    literal(value=None, type_=sqlalchemy.Text).label("attributes"),
+    literal(value=None, type_=sqlalchemy.Text).label("shared_attrs"),
+)
+
+EVENT_ROWS_NO_STATES = (
+    *EVENT_COLUMNS,
+    EventData.shared_data.label("shared_data"),
+    *EMPTY_STATE_COLUMNS,
+)
+
+# Virtual column to tell logbook if it should avoid processing
+# the event as its only used to link contexts
+CONTEXT_ONLY = literal("1").label("context_only")
+NOT_CONTEXT_ONLY = literal(None).label("context_only")
+
+
+def statement_for_request(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_ids: list[str] | None = None,
+    filters: Filters | None = None,
+    context_id: str | None = None,
+) -> StatementLambdaElement:
+    """Generate the logbook statement for a logbook request."""
+
+    # No entities: logbook sends everything for the timeframe
+    # limited by the context_id and the yaml configured filter
+    if not entity_ids:
+        entity_filter = filters.entity_filter() if filters else None  # type: ignore[no-untyped-call]
+        return _all_stmt(start_day, end_day, event_types, entity_filter, context_id)
+
+    # Multiple entities: logbook sends everything for the timeframe for the entities
+    #
+    # This is the least efficient query because we use
+    # like matching which means part of the query has to be built each
+    # time when the entity_ids are not in the cache
+    if len(entity_ids) > 1:
+        return _entities_stmt(start_day, end_day, event_types, entity_ids)
+
+    # Single entity: logbook sends everything for the timeframe for the entity
+    entity_id = entity_ids[0]
+    entity_like = ENTITY_ID_JSON_TEMPLATE.format(entity_id)
+    return _single_entity_stmt(start_day, end_day, event_types, entity_id, entity_like)
+
+
+def _select_events_context_id_subquery(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+) -> Select:
+    """Generate the select for a context_id subquery."""
+    return (
+        select(Events.context_id)
+        .where((Events.time_fired > start_day) & (Events.time_fired < end_day))
+        .where(Events.event_type.in_(event_types))
+        .outerjoin(EventData, (Events.data_id == EventData.data_id))
+    )
+
+
+def _select_entities_context_ids_sub_query(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_ids: list[str],
+) -> Select:
+    """Generate a subquery to find context ids for multiple entities."""
+    return (
+        _select_events_context_id_subquery(start_day, end_day, event_types)
+        .where(_apply_event_entity_id_matchers(entity_ids))
+        .union_all(
+            select(States.context_id)
+            .filter((States.last_updated > start_day) & (States.last_updated < end_day))
+            .where(States.entity_id.in_(entity_ids))
+        )
+        .subquery()
+    )
+
+
+def _select_events_context_only() -> Select:
+    """Generate an events query that mark them as for context_only.
+
+    By marking them as context_only we know they are only for
+    linking context ids and we can avoid processing them.
+    """
+    return select(*EVENT_ROWS_NO_STATES, CONTEXT_ONLY).outerjoin(
+        EventData, (Events.data_id == EventData.data_id)
+    )
+
+
+def _entities_stmt(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_ids: list[str],
+) -> StatementLambdaElement:
+    """Generate a logbook query for multiple entities."""
+    stmt = lambda_stmt(
+        lambda: _select_events_without_states(start_day, end_day, event_types)
+    )
+    stmt = stmt.add_criteria(
+        lambda s: s.where(_apply_event_entity_id_matchers(entity_ids)).union_all(
+            _select_states(start_day, end_day).where(States.entity_id.in_(entity_ids)),
+            _select_events_context_only().where(
+                Events.context_id.in_(
+                    _select_entities_context_ids_sub_query(
+                        start_day,
+                        end_day,
+                        event_types,
+                        entity_ids,
+                    )
+                )
+            ),
+        ),
+        # Since _apply_event_entity_id_matchers generates multiple
+        # like statements we need to use the entity_ids in the
+        # the cache key since the sql can change based on the
+        # likes.
+        track_on=(str(entity_ids),),
+    )
+    stmt += lambda s: s.order_by(Events.time_fired)
+    return stmt
+
+
+def _select_entity_context_ids_sub_query(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_id: str,
+    entity_id_like: str,
+) -> Select:
+    """Generate a subquery to find context ids for a single entity."""
+    return (
+        _select_events_context_id_subquery(start_day, end_day, event_types)
+        .where(
+            Events.event_data.like(entity_id_like)
+            | EventData.shared_data.like(entity_id_like)
+        )
+        .union_all(
+            select(States.context_id)
+            .filter((States.last_updated > start_day) & (States.last_updated < end_day))
+            .where(States.entity_id == entity_id)
+        )
+        .subquery()
+    )
+
+
+def _single_entity_stmt(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_id: str,
+    entity_id_like: str,
+) -> StatementLambdaElement:
+    """Generate a logbook query for a single entity."""
+    stmt = lambda_stmt(
+        lambda: _select_events_without_states(start_day, end_day, event_types)
+        .where(
+            Events.event_data.like(entity_id_like)
+            | EventData.shared_data.like(entity_id_like)
+        )
+        .union_all(
+            _select_states(start_day, end_day).where(States.entity_id == entity_id),
+            _select_events_context_only().where(
+                Events.context_id.in_(
+                    _select_entity_context_ids_sub_query(
+                        start_day, end_day, event_types, entity_id, entity_id_like
+                    )
+                )
+            ),
+        )
+        .order_by(Events.time_fired)
+    )
+    return stmt
+
+
+def _all_stmt(
+    start_day: dt,
+    end_day: dt,
+    event_types: tuple[str, ...],
+    entity_filter: Any | None = None,
+    context_id: str | None = None,
+) -> StatementLambdaElement:
+    """Generate a logbook query for all entities."""
+    stmt = lambda_stmt(
+        lambda: _select_events_without_states(start_day, end_day, event_types)
+    )
+    if context_id is not None:
+        # Once all the old `state_changed` events
+        # are gone from the database remove the
+        # _legacy_select_events_context_id()
+        stmt += lambda s: s.where(Events.context_id == context_id).union_all(
+            _select_states(start_day, end_day).where(States.context_id == context_id),
+            _legacy_select_events_context_id(start_day, end_day, context_id),
+        )
+    elif entity_filter is not None:
+        stmt += lambda s: s.union_all(
+            _select_states(start_day, end_day).where(entity_filter)
+        )
+    else:
+        stmt += lambda s: s.union_all(_select_states(start_day, end_day))
+    stmt += lambda s: s.order_by(Events.time_fired)
+    return stmt
+
+
+def _legacy_select_events_context_id(
+    start_day: dt, end_day: dt, context_id: str
+) -> Select:
+    """Generate a legacy events context id select that also joins states."""
+    # This can be removed once we no longer have event_ids in the states table
+    return (
+        select(
+            *EVENT_COLUMNS,
+            literal(value=None, type_=sqlalchemy.String).label("shared_data"),
+            *STATE_COLUMNS,
+            NOT_CONTEXT_ONLY,
+        )
+        .outerjoin(States, (Events.event_id == States.event_id))
+        .where(States.last_updated == States.last_changed)
+        .where(_not_continuous_entity_matcher())
+        .outerjoin(
+            StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
+        )
+        .where((Events.time_fired > start_day) & (Events.time_fired < end_day))
+        .where(Events.context_id == context_id)
+    )
+
+
+def _select_events_without_states(
+    start_day: dt, end_day: dt, event_types: tuple[str, ...]
+) -> Select:
+    """Generate an events select that does not join states."""
+    return (
+        select(*EVENT_ROWS_NO_STATES, NOT_CONTEXT_ONLY)
+        .where((Events.time_fired > start_day) & (Events.time_fired < end_day))
+        .where(Events.event_type.in_(event_types))
+        .outerjoin(EventData, (Events.data_id == EventData.data_id))
+    )
+
+
+def _select_states(start_day: dt, end_day: dt) -> Select:
+    """Generate a states select that formats the states table as event rows."""
+    old_state = aliased(States, name="old_state")
+    return (
+        select(
+            literal(value=EVENT_STATE_CHANGED, type_=sqlalchemy.String).label(
+                "event_type"
+            ),
+            literal(value=None, type_=sqlalchemy.Text).label("event_data"),
+            States.last_changed.label("time_fired"),
+            States.context_id.label("context_id"),
+            States.context_user_id.label("context_user_id"),
+            States.context_parent_id.label("context_parent_id"),
+            literal(value=None, type_=sqlalchemy.Text).label("shared_data"),
+            *STATE_COLUMNS,
+            NOT_CONTEXT_ONLY,
+        )
+        .filter((States.last_updated > start_day) & (States.last_updated < end_day))
+        .outerjoin(old_state, (States.old_state_id == old_state.state_id))
+        .where(_missing_state_matcher(old_state))
+        .where(_not_continuous_entity_matcher())
+        .where(States.last_updated == States.last_changed)
+        .outerjoin(
+            StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
+        )
+    )
+
+
+def _missing_state_matcher(old_state: States) -> sqlalchemy.and_:
+    # The below removes state change events that do not have
+    # and old_state or the old_state is missing (newly added entities)
+    # or the new_state is missing (removed entities)
+    return sqlalchemy.and_(
+        old_state.state_id.isnot(None),
+        (States.state != old_state.state),
+        States.state.isnot(None),
+    )
+
+
+def _not_continuous_entity_matcher() -> sqlalchemy.or_:
+    """Match non continuous entities."""
+    return sqlalchemy.or_(
+        _not_continuous_domain_matcher(),
+        sqlalchemy.and_(
+            _continuous_domain_matcher, _not_uom_attributes_matcher()
+        ).self_group(),
+    )
+
+
+def _not_continuous_domain_matcher() -> sqlalchemy.and_:
+    """Match not continuous domains."""
+    return sqlalchemy.and_(
+        *[
+            ~States.entity_id.like(entity_domain)
+            for entity_domain in CONTINUOUS_ENTITY_ID_LIKE
+        ],
+    ).self_group()
+
+
+def _continuous_domain_matcher() -> sqlalchemy.or_:
+    """Match continuous domains."""
+    return sqlalchemy.or_(
+        *[
+            States.entity_id.like(entity_domain)
+            for entity_domain in CONTINUOUS_ENTITY_ID_LIKE
+        ],
+    ).self_group()
+
+
+def _not_uom_attributes_matcher() -> Any:
+    """Prefilter ATTR_UNIT_OF_MEASUREMENT as its much faster in sql."""
+    return ~StateAttributes.shared_attrs.like(
+        UNIT_OF_MEASUREMENT_JSON_LIKE
+    ) | ~States.attributes.like(UNIT_OF_MEASUREMENT_JSON_LIKE)
+
+
+def _apply_event_entity_id_matchers(entity_ids: Iterable[str]) -> sqlalchemy.or_:
+    """Create matchers for the entity_id in the event_data."""
+    ors = []
+    for entity_id in entity_ids:
+        like = ENTITY_ID_JSON_TEMPLATE.format(entity_id)
+        ors.append(Events.event_data.like(like))
+        ors.append(EventData.shared_data.like(like))
+    return sqlalchemy.or_(*ors)
diff --git a/tests/components/logbook/test_init.py b/tests/components/logbook/test_init.py
index 76319ba5a6e..cc10346fc07 100644
--- a/tests/components/logbook/test_init.py
+++ b/tests/components/logbook/test_init.py
@@ -1321,8 +1321,8 @@ async def test_logbook_context_from_template(hass, hass_client, recorder_mock):
     assert json_dict[5]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
 
 
-async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock):
-    """Test the logbook view with a single entity and entity_matches_only."""
+async def test_logbook_(hass, hass_client, recorder_mock):
+    """Test the logbook view with a single entity and ."""
     await async_setup_component(hass, "logbook", {})
     assert await async_setup_component(
         hass,
@@ -1377,7 +1377,7 @@ async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock):
     # Test today entries with filter by end_time
     end_time = start + timedelta(hours=24)
     response = await client.get(
-        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state&entity_matches_only"
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state"
     )
     assert response.status == HTTPStatus.OK
     json_dict = await response.json()
@@ -1390,10 +1390,8 @@ async def test_logbook_entity_matches_only(hass, hass_client, recorder_mock):
     assert json_dict[1]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
 
 
-async def test_logbook_entity_matches_only_multiple_calls(
-    hass, hass_client, recorder_mock
-):
-    """Test the logbook view with a single entity and entity_matches_only called multiple times."""
+async def test_logbook_many_entities_multiple_calls(hass, hass_client, recorder_mock):
+    """Test the logbook view with a many entities called multiple times."""
     await async_setup_component(hass, "logbook", {})
     await async_setup_component(hass, "automation", {})
 
@@ -1421,7 +1419,7 @@ async def test_logbook_entity_matches_only_multiple_calls(
     for automation_id in range(5):
         # Test today entries with filter by end_time
         response = await client.get(
-            f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_{automation_id}_automation&entity_matches_only"
+            f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_{automation_id}_automation"
         )
         assert response.status == HTTPStatus.OK
         json_dict = await response.json()
@@ -1432,7 +1430,7 @@ async def test_logbook_entity_matches_only_multiple_calls(
         )
 
     response = await client.get(
-        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_0_automation,automation.mock_1_automation,automation.mock_2_automation&entity_matches_only"
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_0_automation,automation.mock_1_automation,automation.mock_2_automation"
     )
     assert response.status == HTTPStatus.OK
     json_dict = await response.json()
@@ -1442,11 +1440,28 @@ async def test_logbook_entity_matches_only_multiple_calls(
     assert json_dict[1]["entity_id"] == "automation.mock_1_automation"
     assert json_dict[2]["entity_id"] == "automation.mock_2_automation"
 
+    response = await client.get(
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=automation.mock_4_automation,automation.mock_2_automation,automation.mock_0_automation,automation.mock_1_automation"
+    )
+    assert response.status == HTTPStatus.OK
+    json_dict = await response.json()
 
-async def test_custom_log_entry_discoverable_via_entity_matches_only(
-    hass, hass_client, recorder_mock
-):
-    """Test if a custom log entry is later discoverable via entity_matches_only."""
+    assert len(json_dict) == 4
+    assert json_dict[0]["entity_id"] == "automation.mock_0_automation"
+    assert json_dict[1]["entity_id"] == "automation.mock_1_automation"
+    assert json_dict[2]["entity_id"] == "automation.mock_2_automation"
+    assert json_dict[3]["entity_id"] == "automation.mock_4_automation"
+
+    response = await client.get(
+        f"/api/logbook/{end_time.isoformat()}?end_time={end_time}&entity=automation.mock_4_automation,automation.mock_2_automation,automation.mock_0_automation,automation.mock_1_automation"
+    )
+    assert response.status == HTTPStatus.OK
+    json_dict = await response.json()
+    assert len(json_dict) == 0
+
+
+async def test_custom_log_entry_discoverable_via_(hass, hass_client, recorder_mock):
+    """Test if a custom log entry is later discoverable via ."""
     await async_setup_component(hass, "logbook", {})
     await async_recorder_block_till_done(hass)
 
@@ -1468,7 +1483,7 @@ async def test_custom_log_entry_discoverable_via_entity_matches_only(
     # Test today entries with filter by end_time
     end_time = start + timedelta(hours=24)
     response = await client.get(
-        f"/api/logbook/{start_date.isoformat()}?end_time={end_time.isoformat()}&entity=switch.test_switch&entity_matches_only"
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time.isoformat()}&entity=switch.test_switch"
     )
     assert response.status == HTTPStatus.OK
     json_dict = await response.json()
@@ -1480,8 +1495,8 @@ async def test_custom_log_entry_discoverable_via_entity_matches_only(
     assert json_dict[0]["entity_id"] == "switch.test_switch"
 
 
-async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_mock):
-    """Test the logbook view with a multiple entities and entity_matches_only."""
+async def test_logbook_multiple_entities(hass, hass_client, recorder_mock):
+    """Test the logbook view with a multiple entities."""
     await async_setup_component(hass, "logbook", {})
     assert await async_setup_component(
         hass,
@@ -1513,12 +1528,14 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_
     # Entity added (should not be logged)
     hass.states.async_set("switch.test_state", STATE_ON)
     hass.states.async_set("light.test_state", STATE_ON)
+    hass.states.async_set("binary_sensor.test_state", STATE_ON)
 
     await hass.async_block_till_done()
 
     # First state change (should be logged)
     hass.states.async_set("switch.test_state", STATE_OFF)
     hass.states.async_set("light.test_state", STATE_OFF)
+    hass.states.async_set("binary_sensor.test_state", STATE_OFF)
 
     await hass.async_block_till_done()
 
@@ -1530,6 +1547,9 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_
         "switch.test_state", STATE_ON, context=switch_turn_off_context
     )
     hass.states.async_set("light.test_state", STATE_ON, context=switch_turn_off_context)
+    hass.states.async_set(
+        "binary_sensor.test_state", STATE_ON, context=switch_turn_off_context
+    )
     await async_wait_recording_done(hass)
 
     client = await hass_client()
@@ -1541,7 +1561,7 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_
     # Test today entries with filter by end_time
     end_time = start + timedelta(hours=24)
     response = await client.get(
-        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state,light.test_state&entity_matches_only"
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=switch.test_state,light.test_state"
     )
     assert response.status == HTTPStatus.OK
     json_dict = await response.json()
@@ -1558,6 +1578,46 @@ async def test_logbook_entity_matches_only_multiple(hass, hass_client, recorder_
     assert json_dict[3]["entity_id"] == "light.test_state"
     assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
 
+    # Test today entries with filter by end_time
+    end_time = start + timedelta(hours=24)
+    response = await client.get(
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=binary_sensor.test_state,light.test_state"
+    )
+    assert response.status == HTTPStatus.OK
+    json_dict = await response.json()
+
+    assert len(json_dict) == 4
+
+    assert json_dict[0]["entity_id"] == "light.test_state"
+
+    assert json_dict[1]["entity_id"] == "binary_sensor.test_state"
+
+    assert json_dict[2]["entity_id"] == "light.test_state"
+    assert json_dict[2]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
+
+    assert json_dict[3]["entity_id"] == "binary_sensor.test_state"
+    assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
+
+    # Test today entries with filter by end_time
+    end_time = start + timedelta(hours=24)
+    response = await client.get(
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=light.test_state,binary_sensor.test_state"
+    )
+    assert response.status == HTTPStatus.OK
+    json_dict = await response.json()
+
+    assert len(json_dict) == 4
+
+    assert json_dict[0]["entity_id"] == "light.test_state"
+
+    assert json_dict[1]["entity_id"] == "binary_sensor.test_state"
+
+    assert json_dict[2]["entity_id"] == "light.test_state"
+    assert json_dict[2]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
+
+    assert json_dict[3]["entity_id"] == "binary_sensor.test_state"
+    assert json_dict[3]["context_user_id"] == "9400facee45711eaa9308bfd3d19e474"
+
 
 async def test_logbook_invalid_entity(hass, hass_client, recorder_mock):
     """Test the logbook view with requesting an invalid entity."""
@@ -1572,7 +1632,7 @@ async def test_logbook_invalid_entity(hass, hass_client, recorder_mock):
     # Test today entries with filter by end_time
     end_time = start + timedelta(hours=24)
     response = await client.get(
-        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=invalid&entity_matches_only"
+        f"/api/logbook/{start_date.isoformat()}?end_time={end_time}&entity=invalid"
     )
     assert response.status == HTTPStatus.INTERNAL_SERVER_ERROR