Improve alignment of live logbook and historical logbook models (#123070)

* Improve alignment of live logbook and historical logbook models

- Make EventAsRow as NamedType which is better aligned with
  sqlalchemy Row
- Use getitem to fetch results for both Row and EventAsRow
  since its an order of magnitude faster fetching sqlalchemy
  Row object values.

* final

* fix

* unused

* fix more tests

* cleanup

* reduce

* tweak
This commit is contained in:
J. Nick Koston 2024-08-04 16:09:10 -05:00 committed by GitHub
parent 3353c3c205
commit b09dd95dbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 138 additions and 155 deletions

View File

@ -5,7 +5,7 @@ from __future__ import annotations
from collections.abc import Callable, Mapping from collections.abc import Callable, Mapping
from dataclasses import dataclass from dataclasses import dataclass
from functools import cached_property from functools import cached_property
from typing import TYPE_CHECKING, Any, cast from typing import TYPE_CHECKING, Any, Final, NamedTuple, cast
from sqlalchemy.engine.row import Row from sqlalchemy.engine.row import Row
@ -46,16 +46,16 @@ class LazyEventPartialState:
"""Init the lazy event.""" """Init the lazy event."""
self.row = row self.row = row
# We need to explicitly check for the row is EventAsRow as the unhappy path # We need to explicitly check for the row is EventAsRow as the unhappy path
# to fetch row.data for Row is very expensive # to fetch row[DATA_POS] for Row is very expensive
if type(row) is EventAsRow: if type(row) is EventAsRow:
# If its an EventAsRow we can avoid the whole # If its an EventAsRow we can avoid the whole
# json decode process as we already have the data # json decode process as we already have the data
self.data = row.data self.data = row[DATA_POS]
return return
if TYPE_CHECKING: if TYPE_CHECKING:
source = cast(str, row.event_data) source = cast(str, row[EVENT_DATA_POS])
else: else:
source = row.event_data source = row[EVENT_DATA_POS]
if not source: if not source:
self.data = {} self.data = {}
elif event_data := event_data_cache.get(source): elif event_data := event_data_cache.get(source):
@ -68,51 +68,73 @@ class LazyEventPartialState:
@cached_property @cached_property
def event_type(self) -> EventType[Any] | str | None: def event_type(self) -> EventType[Any] | str | None:
"""Return the event type.""" """Return the event type."""
return self.row.event_type return self.row[EVENT_TYPE_POS]
@cached_property @cached_property
def entity_id(self) -> str | None: def entity_id(self) -> str | None:
"""Return the entity id.""" """Return the entity id."""
return self.row.entity_id return self.row[ENTITY_ID_POS]
@cached_property @cached_property
def state(self) -> str | None: def state(self) -> str | None:
"""Return the state.""" """Return the state."""
return self.row.state return self.row[STATE_POS]
@cached_property @cached_property
def context_id(self) -> str | None: def context_id(self) -> str | None:
"""Return the context id.""" """Return the context id."""
return bytes_to_ulid_or_none(self.row.context_id_bin) return bytes_to_ulid_or_none(self.row[CONTEXT_ID_BIN_POS])
@cached_property @cached_property
def context_user_id(self) -> str | None: def context_user_id(self) -> str | None:
"""Return the context user id.""" """Return the context user id."""
return bytes_to_uuid_hex_or_none(self.row.context_user_id_bin) return bytes_to_uuid_hex_or_none(self.row[CONTEXT_USER_ID_BIN_POS])
@cached_property @cached_property
def context_parent_id(self) -> str | None: def context_parent_id(self) -> str | None:
"""Return the context parent id.""" """Return the context parent id."""
return bytes_to_ulid_or_none(self.row.context_parent_id_bin) return bytes_to_ulid_or_none(self.row[CONTEXT_PARENT_ID_BIN_POS])
@dataclass(slots=True, frozen=True) # Row order must match the query order in queries/common.py
class EventAsRow: # ---------------------------------------------------------
"""Convert an event to a row.""" ROW_ID_POS: Final = 0
EVENT_TYPE_POS: Final = 1
EVENT_DATA_POS: Final = 2
TIME_FIRED_TS_POS: Final = 3
CONTEXT_ID_BIN_POS: Final = 4
CONTEXT_USER_ID_BIN_POS: Final = 5
CONTEXT_PARENT_ID_BIN_POS: Final = 6
STATE_POS: Final = 7
ENTITY_ID_POS: Final = 8
ICON_POS: Final = 9
CONTEXT_ONLY_POS: Final = 10
# - For EventAsRow, additional fields are:
DATA_POS: Final = 11
CONTEXT_POS: Final = 12
class EventAsRow(NamedTuple):
"""Convert an event to a row.
This much always match the order of the columns in queries/common.py
"""
row_id: int
event_type: EventType[Any] | str | None
event_data: str | None
time_fired_ts: float
context_id_bin: bytes
context_user_id_bin: bytes | None
context_parent_id_bin: bytes | None
state: str | None
entity_id: str | None
icon: str | None
context_only: bool | None
# Additional fields for EventAsRow
data: Mapping[str, Any] data: Mapping[str, Any]
context: Context context: Context
context_id_bin: bytes
time_fired_ts: float
row_id: int
event_data: str | None = None
entity_id: str | None = None
icon: str | None = None
context_user_id_bin: bytes | None = None
context_parent_id_bin: bytes | None = None
event_type: EventType[Any] | str | None = None
state: str | None = None
context_only: None = None
@callback @callback
@ -121,14 +143,19 @@ def async_event_to_row(event: Event) -> EventAsRow:
if event.event_type != EVENT_STATE_CHANGED: if event.event_type != EVENT_STATE_CHANGED:
context = event.context context = event.context
return EventAsRow( return EventAsRow(
data=event.data, row_id=hash(event),
context=event.context,
event_type=event.event_type, event_type=event.event_type,
event_data=None,
time_fired_ts=event.time_fired_timestamp,
context_id_bin=ulid_to_bytes(context.id), context_id_bin=ulid_to_bytes(context.id),
context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id), context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id),
context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id), context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id),
time_fired_ts=event.time_fired_timestamp, state=None,
row_id=hash(event), entity_id=None,
icon=None,
context_only=None,
data=event.data,
context=context,
) )
# States are prefiltered so we never get states # States are prefiltered so we never get states
# that are missing new_state or old_state # that are missing new_state or old_state
@ -136,14 +163,17 @@ def async_event_to_row(event: Event) -> EventAsRow:
new_state: State = event.data["new_state"] new_state: State = event.data["new_state"]
context = new_state.context context = new_state.context
return EventAsRow( return EventAsRow(
data=event.data, row_id=hash(event),
context=event.context, event_type=None,
entity_id=new_state.entity_id, event_data=None,
state=new_state.state, time_fired_ts=new_state.last_updated_timestamp,
context_id_bin=ulid_to_bytes(context.id), context_id_bin=ulid_to_bytes(context.id),
context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id), context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id),
context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id), context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id),
time_fired_ts=new_state.last_updated_timestamp, state=new_state.state,
row_id=hash(event), entity_id=new_state.entity_id,
icon=new_state.attributes.get(ATTR_ICON), icon=new_state.attributes.get(ATTR_ICON),
context_only=None,
data=event.data,
context=context,
) )

View File

@ -6,6 +6,7 @@ from collections.abc import Callable, Generator, Sequence
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime as dt from datetime import datetime as dt
import logging import logging
import time
from typing import Any from typing import Any
from sqlalchemy.engine import Result from sqlalchemy.engine import Result
@ -17,7 +18,6 @@ from homeassistant.components.recorder.models import (
bytes_to_uuid_hex_or_none, bytes_to_uuid_hex_or_none,
extract_event_type_ids, extract_event_type_ids,
extract_metadata_ids, extract_metadata_ids,
process_datetime_to_timestamp,
process_timestamp_to_utc_isoformat, process_timestamp_to_utc_isoformat,
) )
from homeassistant.components.recorder.util import ( from homeassistant.components.recorder.util import (
@ -62,7 +62,23 @@ from .const import (
LOGBOOK_ENTRY_WHEN, LOGBOOK_ENTRY_WHEN,
) )
from .helpers import is_sensor_continuous from .helpers import is_sensor_continuous
from .models import EventAsRow, LazyEventPartialState, LogbookConfig, async_event_to_row from .models import (
CONTEXT_ID_BIN_POS,
CONTEXT_ONLY_POS,
CONTEXT_PARENT_ID_BIN_POS,
CONTEXT_POS,
CONTEXT_USER_ID_BIN_POS,
ENTITY_ID_POS,
EVENT_TYPE_POS,
ICON_POS,
ROW_ID_POS,
STATE_POS,
TIME_FIRED_TS_POS,
EventAsRow,
LazyEventPartialState,
LogbookConfig,
async_event_to_row,
)
from .queries import statement_for_request from .queries import statement_for_request
from .queries.common import PSEUDO_EVENT_STATE_CHANGED from .queries.common import PSEUDO_EVENT_STATE_CHANGED
@ -206,17 +222,17 @@ def _humanify(
# Process rows # Process rows
for row in rows: for row in rows:
context_id_bin: bytes = row.context_id_bin context_id_bin: bytes = row[CONTEXT_ID_BIN_POS]
if memoize_new_contexts and context_id_bin not in context_lookup: if memoize_new_contexts and context_id_bin not in context_lookup:
context_lookup[context_id_bin] = row context_lookup[context_id_bin] = row
if row.context_only: if row[CONTEXT_ONLY_POS]:
continue continue
event_type = row.event_type event_type = row[EVENT_TYPE_POS]
if event_type == EVENT_CALL_SERVICE: if event_type == EVENT_CALL_SERVICE:
continue continue
if event_type is PSEUDO_EVENT_STATE_CHANGED: if event_type is PSEUDO_EVENT_STATE_CHANGED:
entity_id = row.entity_id entity_id = row[ENTITY_ID_POS]
assert entity_id is not None assert entity_id is not None
# Skip continuous sensors # Skip continuous sensors
if ( if (
@ -229,12 +245,12 @@ def _humanify(
data = { data = {
LOGBOOK_ENTRY_WHEN: format_time(row), LOGBOOK_ENTRY_WHEN: format_time(row),
LOGBOOK_ENTRY_STATE: row.state, LOGBOOK_ENTRY_STATE: row[STATE_POS],
LOGBOOK_ENTRY_ENTITY_ID: entity_id, LOGBOOK_ENTRY_ENTITY_ID: entity_id,
} }
if include_entity_name: if include_entity_name:
data[LOGBOOK_ENTRY_NAME] = entity_name_cache.get(entity_id) data[LOGBOOK_ENTRY_NAME] = entity_name_cache.get(entity_id)
if icon := row.icon: if icon := row[ICON_POS]:
data[LOGBOOK_ENTRY_ICON] = icon data[LOGBOOK_ENTRY_ICON] = icon
context_augmenter.augment(data, row, context_id_bin) context_augmenter.augment(data, row, context_id_bin)
@ -292,9 +308,11 @@ class ContextAugmenter:
context_row := self.context_lookup.get(context_id_bin) context_row := self.context_lookup.get(context_id_bin)
): ):
return context_row return context_row
if (context := getattr(row, "context", None)) is not None and ( if (
origin_event := context.origin_event type(row) is EventAsRow
) is not None: and (context := row[CONTEXT_POS]) is not None
and (origin_event := context.origin_event) is not None
):
return async_event_to_row(origin_event) return async_event_to_row(origin_event)
return None return None
@ -302,7 +320,7 @@ class ContextAugmenter:
self, data: dict[str, Any], row: Row | EventAsRow, context_id_bin: bytes | None self, data: dict[str, Any], row: Row | EventAsRow, context_id_bin: bytes | None
) -> None: ) -> None:
"""Augment data from the row and cache.""" """Augment data from the row and cache."""
if context_user_id_bin := row.context_user_id_bin: if context_user_id_bin := row[CONTEXT_USER_ID_BIN_POS]:
data[CONTEXT_USER_ID] = bytes_to_uuid_hex_or_none(context_user_id_bin) data[CONTEXT_USER_ID] = bytes_to_uuid_hex_or_none(context_user_id_bin)
if not (context_row := self._get_context_row(context_id_bin, row)): if not (context_row := self._get_context_row(context_id_bin, row)):
@ -311,7 +329,7 @@ class ContextAugmenter:
if _rows_match(row, context_row): if _rows_match(row, context_row):
# This is the first event with the given ID. Was it directly caused by # This is the first event with the given ID. Was it directly caused by
# a parent event? # a parent event?
context_parent_id_bin = row.context_parent_id_bin context_parent_id_bin = row[CONTEXT_PARENT_ID_BIN_POS]
if ( if (
not context_parent_id_bin not context_parent_id_bin
or ( or (
@ -326,10 +344,10 @@ class ContextAugmenter:
# this log entry. # this log entry.
if _rows_match(row, context_row): if _rows_match(row, context_row):
return return
event_type = context_row.event_type event_type = context_row[EVENT_TYPE_POS]
# State change # State change
if context_entity_id := context_row.entity_id: if context_entity_id := context_row[ENTITY_ID_POS]:
data[CONTEXT_STATE] = context_row.state data[CONTEXT_STATE] = context_row[STATE_POS]
data[CONTEXT_ENTITY_ID] = context_entity_id data[CONTEXT_ENTITY_ID] = context_entity_id
if self.include_entity_name: if self.include_entity_name:
data[CONTEXT_ENTITY_ID_NAME] = self.entity_name_cache.get( data[CONTEXT_ENTITY_ID_NAME] = self.entity_name_cache.get(
@ -375,20 +393,22 @@ class ContextAugmenter:
def _rows_match(row: Row | EventAsRow, other_row: Row | EventAsRow) -> bool: def _rows_match(row: Row | EventAsRow, other_row: Row | EventAsRow) -> bool:
"""Check of rows match by using the same method as Events __hash__.""" """Check of rows match by using the same method as Events __hash__."""
return bool( return bool(
row is other_row or (row_id := row.row_id) and row_id == other_row.row_id row is other_row
or (row_id := row[ROW_ID_POS])
and row_id == other_row[ROW_ID_POS]
) )
def _row_time_fired_isoformat(row: Row | EventAsRow) -> str: def _row_time_fired_isoformat(row: Row | EventAsRow) -> str:
"""Convert the row timed_fired to isoformat.""" """Convert the row timed_fired to isoformat."""
return process_timestamp_to_utc_isoformat( return process_timestamp_to_utc_isoformat(
dt_util.utc_from_timestamp(row.time_fired_ts) or dt_util.utcnow() dt_util.utc_from_timestamp(row[TIME_FIRED_TS_POS]) or dt_util.utcnow()
) )
def _row_time_fired_timestamp(row: Row | EventAsRow) -> float: def _row_time_fired_timestamp(row: Row | EventAsRow) -> float:
"""Convert the row timed_fired to timestamp.""" """Convert the row timed_fired to timestamp."""
return row.time_fired_ts or process_datetime_to_timestamp(dt_util.utcnow()) return row[TIME_FIRED_TS_POS] or time.time()
class EntityNameCache: class EntityNameCache:

View File

@ -4,10 +4,8 @@ from __future__ import annotations
import argparse import argparse
import asyncio import asyncio
import collections
from collections.abc import Callable from collections.abc import Callable
from contextlib import suppress from contextlib import suppress
import json
import logging import logging
from timeit import default_timer as timer from timeit import default_timer as timer
@ -18,7 +16,7 @@ from homeassistant.helpers.event import (
async_track_state_change, async_track_state_change,
async_track_state_change_event, async_track_state_change_event,
) )
from homeassistant.helpers.json import JSON_DUMP, JSONEncoder from homeassistant.helpers.json import JSON_DUMP
# mypy: allow-untyped-calls, allow-untyped-defs, no-check-untyped-defs # mypy: allow-untyped-calls, allow-untyped-defs, no-check-untyped-defs
# mypy: no-warn-return-any # mypy: no-warn-return-any
@ -310,48 +308,3 @@ async def json_serialize_states(hass):
start = timer() start = timer()
JSON_DUMP(states) JSON_DUMP(states)
return timer() - start return timer() - start
def _create_state_changed_event_from_old_new(
entity_id, event_time_fired, old_state, new_state
):
"""Create a state changed event from a old and new state."""
attributes = {}
if new_state is not None:
attributes = new_state.get("attributes")
attributes_json = json.dumps(attributes, cls=JSONEncoder)
if attributes_json == "null":
attributes_json = "{}"
row = collections.namedtuple( # noqa: PYI024
"Row",
[
"event_type"
"event_data"
"time_fired"
"context_id"
"context_user_id"
"state"
"entity_id"
"domain"
"attributes"
"state_id",
"old_state_id",
],
)
row.event_type = EVENT_STATE_CHANGED
row.event_data = "{}"
row.attributes = attributes_json
row.time_fired = event_time_fired
row.state = new_state and new_state.get("state")
row.entity_id = entity_id
row.domain = entity_id and core.split_entity_id(entity_id)[0]
row.context_id = None
row.context_user_id = None
row.old_state_id = old_state and 1
row.state_id = new_state and 1
# pylint: disable-next=import-outside-toplevel
from homeassistant.components import logbook
return logbook.LazyEventPartialState(row, {})

View File

@ -7,7 +7,7 @@ from typing import Any
from homeassistant.components import logbook from homeassistant.components import logbook
from homeassistant.components.logbook import processor from homeassistant.components.logbook import processor
from homeassistant.components.logbook.models import LogbookConfig from homeassistant.components.logbook.models import EventAsRow, LogbookConfig
from homeassistant.components.recorder.models import ( from homeassistant.components.recorder.models import (
process_timestamp_to_utc_isoformat, process_timestamp_to_utc_isoformat,
ulid_to_bytes_or_none, ulid_to_bytes_or_none,
@ -18,6 +18,8 @@ from homeassistant.helpers import entity_registry as er
from homeassistant.helpers.json import JSONEncoder from homeassistant.helpers.json import JSONEncoder
import homeassistant.util.dt as dt_util import homeassistant.util.dt as dt_util
IDX_TO_NAME = dict(enumerate(EventAsRow._fields))
class MockRow: class MockRow:
"""Minimal row mock.""" """Minimal row mock."""
@ -48,6 +50,10 @@ class MockRow:
self.attributes = None self.attributes = None
self.context_only = False self.context_only = False
def __getitem__(self, idx: int) -> Any:
"""Get item."""
return getattr(self, IDX_TO_NAME[idx])
@property @property
def time_fired_minute(self): def time_fired_minute(self):
"""Minute the event was fired.""" """Minute the event was fired."""

View File

@ -1,11 +1,9 @@
"""The tests for the logbook component.""" """The tests for the logbook component."""
import asyncio import asyncio
import collections
from collections.abc import Callable from collections.abc import Callable
from datetime import datetime, timedelta from datetime import datetime, timedelta
from http import HTTPStatus from http import HTTPStatus
import json
from unittest.mock import Mock from unittest.mock import Mock
from freezegun import freeze_time from freezegun import freeze_time
@ -15,7 +13,7 @@ import voluptuous as vol
from homeassistant.components import logbook, recorder from homeassistant.components import logbook, recorder
from homeassistant.components.alexa.smart_home import EVENT_ALEXA_SMART_HOME from homeassistant.components.alexa.smart_home import EVENT_ALEXA_SMART_HOME
from homeassistant.components.automation import EVENT_AUTOMATION_TRIGGERED from homeassistant.components.automation import EVENT_AUTOMATION_TRIGGERED
from homeassistant.components.logbook.models import LazyEventPartialState from homeassistant.components.logbook.models import EventAsRow, LazyEventPartialState
from homeassistant.components.logbook.processor import EventProcessor from homeassistant.components.logbook.processor import EventProcessor
from homeassistant.components.logbook.queries.common import PSEUDO_EVENT_STATE_CHANGED from homeassistant.components.logbook.queries.common import PSEUDO_EVENT_STATE_CHANGED
from homeassistant.components.recorder import Recorder from homeassistant.components.recorder import Recorder
@ -44,7 +42,6 @@ import homeassistant.core as ha
from homeassistant.core import Event, HomeAssistant from homeassistant.core import Event, HomeAssistant
from homeassistant.helpers import device_registry as dr, entity_registry as er from homeassistant.helpers import device_registry as dr, entity_registry as er
from homeassistant.helpers.entityfilter import CONF_ENTITY_GLOBS from homeassistant.helpers.entityfilter import CONF_ENTITY_GLOBS
from homeassistant.helpers.json import JSONEncoder
from homeassistant.setup import async_setup_component from homeassistant.setup import async_setup_component
import homeassistant.util.dt as dt_util import homeassistant.util.dt as dt_util
@ -324,50 +321,21 @@ def create_state_changed_event_from_old_new(
entity_id, event_time_fired, old_state, new_state entity_id, event_time_fired, old_state, new_state
): ):
"""Create a state changed event from a old and new state.""" """Create a state changed event from a old and new state."""
attributes = {} row = EventAsRow(
if new_state is not None: row_id=1,
attributes = new_state.get("attributes") event_type=PSEUDO_EVENT_STATE_CHANGED,
attributes_json = json.dumps(attributes, cls=JSONEncoder) event_data="{}",
row = collections.namedtuple( # noqa: PYI024 time_fired_ts=dt_util.utc_to_timestamp(event_time_fired),
"Row", context_id_bin=None,
[ context_user_id_bin=None,
"event_type", context_parent_id_bin=None,
"event_data", state=new_state and new_state.get("state"),
"time_fired", entity_id=entity_id,
"time_fired_ts", icon=None,
"context_id_bin", context_only=False,
"context_user_id_bin", data=None,
"context_parent_id_bin", context=None,
"state",
"entity_id",
"domain",
"attributes",
"state_id",
"old_state_id",
"shared_attrs",
"shared_data",
"context_only",
],
) )
row.event_type = PSEUDO_EVENT_STATE_CHANGED
row.event_data = "{}"
row.shared_data = "{}"
row.attributes = attributes_json
row.shared_attrs = attributes_json
row.time_fired = event_time_fired
row.time_fired_ts = dt_util.utc_to_timestamp(event_time_fired)
row.state = new_state and new_state.get("state")
row.entity_id = entity_id
row.domain = entity_id and ha.split_entity_id(entity_id)[0]
row.context_only = False
row.context_id_bin = None
row.friendly_name = None
row.icon = None
row.context_user_id_bin = None
row.context_parent_id_bin = None
row.old_state_id = old_state and 1
row.state_id = new_state and 1
return LazyEventPartialState(row, {}) return LazyEventPartialState(row, {})

View File

@ -2,20 +2,26 @@
from unittest.mock import Mock from unittest.mock import Mock
from homeassistant.components.logbook.models import LazyEventPartialState from homeassistant.components.logbook.models import EventAsRow, LazyEventPartialState
def test_lazy_event_partial_state_context() -> None: def test_lazy_event_partial_state_context() -> None:
"""Test we can extract context from a lazy event partial state.""" """Test we can extract context from a lazy event partial state."""
state = LazyEventPartialState( state = LazyEventPartialState(
Mock( EventAsRow(
row_id=1,
event_type="event_type",
event_data={},
time_fired_ts=1,
context_id_bin=b"1234123412341234", context_id_bin=b"1234123412341234",
context_user_id_bin=b"1234123412341234", context_user_id_bin=b"1234123412341234",
context_parent_id_bin=b"4444444444444444", context_parent_id_bin=b"4444444444444444",
event_data={},
event_type="event_type",
entity_id="entity_id",
state="state", state="state",
entity_id="entity_id",
icon="icon",
context_only=False,
data={},
context=Mock(),
), ),
{}, {},
) )