Improve alignment of live logbook and historical logbook models (#123070)

* Improve alignment of live logbook and historical logbook models

- Make EventAsRow as NamedType which is better aligned with
  sqlalchemy Row
- Use getitem to fetch results for both Row and EventAsRow
  since its an order of magnitude faster fetching sqlalchemy
  Row object values.

* final

* fix

* unused

* fix more tests

* cleanup

* reduce

* tweak
This commit is contained in:
J. Nick Koston 2024-08-04 16:09:10 -05:00 committed by GitHub
parent 3353c3c205
commit b09dd95dbd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 138 additions and 155 deletions

View File

@ -5,7 +5,7 @@ from __future__ import annotations
from collections.abc import Callable, Mapping
from dataclasses import dataclass
from functools import cached_property
from typing import TYPE_CHECKING, Any, cast
from typing import TYPE_CHECKING, Any, Final, NamedTuple, cast
from sqlalchemy.engine.row import Row
@ -46,16 +46,16 @@ class LazyEventPartialState:
"""Init the lazy event."""
self.row = row
# We need to explicitly check for the row is EventAsRow as the unhappy path
# to fetch row.data for Row is very expensive
# to fetch row[DATA_POS] for Row is very expensive
if type(row) is EventAsRow:
# If its an EventAsRow we can avoid the whole
# json decode process as we already have the data
self.data = row.data
self.data = row[DATA_POS]
return
if TYPE_CHECKING:
source = cast(str, row.event_data)
source = cast(str, row[EVENT_DATA_POS])
else:
source = row.event_data
source = row[EVENT_DATA_POS]
if not source:
self.data = {}
elif event_data := event_data_cache.get(source):
@ -68,51 +68,73 @@ class LazyEventPartialState:
@cached_property
def event_type(self) -> EventType[Any] | str | None:
"""Return the event type."""
return self.row.event_type
return self.row[EVENT_TYPE_POS]
@cached_property
def entity_id(self) -> str | None:
"""Return the entity id."""
return self.row.entity_id
return self.row[ENTITY_ID_POS]
@cached_property
def state(self) -> str | None:
"""Return the state."""
return self.row.state
return self.row[STATE_POS]
@cached_property
def context_id(self) -> str | None:
"""Return the context id."""
return bytes_to_ulid_or_none(self.row.context_id_bin)
return bytes_to_ulid_or_none(self.row[CONTEXT_ID_BIN_POS])
@cached_property
def context_user_id(self) -> str | None:
"""Return the context user id."""
return bytes_to_uuid_hex_or_none(self.row.context_user_id_bin)
return bytes_to_uuid_hex_or_none(self.row[CONTEXT_USER_ID_BIN_POS])
@cached_property
def context_parent_id(self) -> str | None:
"""Return the context parent id."""
return bytes_to_ulid_or_none(self.row.context_parent_id_bin)
return bytes_to_ulid_or_none(self.row[CONTEXT_PARENT_ID_BIN_POS])
@dataclass(slots=True, frozen=True)
class EventAsRow:
"""Convert an event to a row."""
# Row order must match the query order in queries/common.py
# ---------------------------------------------------------
ROW_ID_POS: Final = 0
EVENT_TYPE_POS: Final = 1
EVENT_DATA_POS: Final = 2
TIME_FIRED_TS_POS: Final = 3
CONTEXT_ID_BIN_POS: Final = 4
CONTEXT_USER_ID_BIN_POS: Final = 5
CONTEXT_PARENT_ID_BIN_POS: Final = 6
STATE_POS: Final = 7
ENTITY_ID_POS: Final = 8
ICON_POS: Final = 9
CONTEXT_ONLY_POS: Final = 10
# - For EventAsRow, additional fields are:
DATA_POS: Final = 11
CONTEXT_POS: Final = 12
class EventAsRow(NamedTuple):
"""Convert an event to a row.
This much always match the order of the columns in queries/common.py
"""
row_id: int
event_type: EventType[Any] | str | None
event_data: str | None
time_fired_ts: float
context_id_bin: bytes
context_user_id_bin: bytes | None
context_parent_id_bin: bytes | None
state: str | None
entity_id: str | None
icon: str | None
context_only: bool | None
# Additional fields for EventAsRow
data: Mapping[str, Any]
context: Context
context_id_bin: bytes
time_fired_ts: float
row_id: int
event_data: str | None = None
entity_id: str | None = None
icon: str | None = None
context_user_id_bin: bytes | None = None
context_parent_id_bin: bytes | None = None
event_type: EventType[Any] | str | None = None
state: str | None = None
context_only: None = None
@callback
@ -121,14 +143,19 @@ def async_event_to_row(event: Event) -> EventAsRow:
if event.event_type != EVENT_STATE_CHANGED:
context = event.context
return EventAsRow(
data=event.data,
context=event.context,
row_id=hash(event),
event_type=event.event_type,
event_data=None,
time_fired_ts=event.time_fired_timestamp,
context_id_bin=ulid_to_bytes(context.id),
context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id),
context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id),
time_fired_ts=event.time_fired_timestamp,
row_id=hash(event),
state=None,
entity_id=None,
icon=None,
context_only=None,
data=event.data,
context=context,
)
# States are prefiltered so we never get states
# that are missing new_state or old_state
@ -136,14 +163,17 @@ def async_event_to_row(event: Event) -> EventAsRow:
new_state: State = event.data["new_state"]
context = new_state.context
return EventAsRow(
data=event.data,
context=event.context,
entity_id=new_state.entity_id,
state=new_state.state,
row_id=hash(event),
event_type=None,
event_data=None,
time_fired_ts=new_state.last_updated_timestamp,
context_id_bin=ulid_to_bytes(context.id),
context_user_id_bin=uuid_hex_to_bytes_or_none(context.user_id),
context_parent_id_bin=ulid_to_bytes_or_none(context.parent_id),
time_fired_ts=new_state.last_updated_timestamp,
row_id=hash(event),
state=new_state.state,
entity_id=new_state.entity_id,
icon=new_state.attributes.get(ATTR_ICON),
context_only=None,
data=event.data,
context=context,
)

View File

@ -6,6 +6,7 @@ from collections.abc import Callable, Generator, Sequence
from dataclasses import dataclass
from datetime import datetime as dt
import logging
import time
from typing import Any
from sqlalchemy.engine import Result
@ -17,7 +18,6 @@ from homeassistant.components.recorder.models import (
bytes_to_uuid_hex_or_none,
extract_event_type_ids,
extract_metadata_ids,
process_datetime_to_timestamp,
process_timestamp_to_utc_isoformat,
)
from homeassistant.components.recorder.util import (
@ -62,7 +62,23 @@ from .const import (
LOGBOOK_ENTRY_WHEN,
)
from .helpers import is_sensor_continuous
from .models import EventAsRow, LazyEventPartialState, LogbookConfig, async_event_to_row
from .models import (
CONTEXT_ID_BIN_POS,
CONTEXT_ONLY_POS,
CONTEXT_PARENT_ID_BIN_POS,
CONTEXT_POS,
CONTEXT_USER_ID_BIN_POS,
ENTITY_ID_POS,
EVENT_TYPE_POS,
ICON_POS,
ROW_ID_POS,
STATE_POS,
TIME_FIRED_TS_POS,
EventAsRow,
LazyEventPartialState,
LogbookConfig,
async_event_to_row,
)
from .queries import statement_for_request
from .queries.common import PSEUDO_EVENT_STATE_CHANGED
@ -206,17 +222,17 @@ def _humanify(
# Process rows
for row in rows:
context_id_bin: bytes = row.context_id_bin
context_id_bin: bytes = row[CONTEXT_ID_BIN_POS]
if memoize_new_contexts and context_id_bin not in context_lookup:
context_lookup[context_id_bin] = row
if row.context_only:
if row[CONTEXT_ONLY_POS]:
continue
event_type = row.event_type
event_type = row[EVENT_TYPE_POS]
if event_type == EVENT_CALL_SERVICE:
continue
if event_type is PSEUDO_EVENT_STATE_CHANGED:
entity_id = row.entity_id
entity_id = row[ENTITY_ID_POS]
assert entity_id is not None
# Skip continuous sensors
if (
@ -229,12 +245,12 @@ def _humanify(
data = {
LOGBOOK_ENTRY_WHEN: format_time(row),
LOGBOOK_ENTRY_STATE: row.state,
LOGBOOK_ENTRY_STATE: row[STATE_POS],
LOGBOOK_ENTRY_ENTITY_ID: entity_id,
}
if include_entity_name:
data[LOGBOOK_ENTRY_NAME] = entity_name_cache.get(entity_id)
if icon := row.icon:
if icon := row[ICON_POS]:
data[LOGBOOK_ENTRY_ICON] = icon
context_augmenter.augment(data, row, context_id_bin)
@ -292,9 +308,11 @@ class ContextAugmenter:
context_row := self.context_lookup.get(context_id_bin)
):
return context_row
if (context := getattr(row, "context", None)) is not None and (
origin_event := context.origin_event
) is not None:
if (
type(row) is EventAsRow
and (context := row[CONTEXT_POS]) is not None
and (origin_event := context.origin_event) is not None
):
return async_event_to_row(origin_event)
return None
@ -302,7 +320,7 @@ class ContextAugmenter:
self, data: dict[str, Any], row: Row | EventAsRow, context_id_bin: bytes | None
) -> None:
"""Augment data from the row and cache."""
if context_user_id_bin := row.context_user_id_bin:
if context_user_id_bin := row[CONTEXT_USER_ID_BIN_POS]:
data[CONTEXT_USER_ID] = bytes_to_uuid_hex_or_none(context_user_id_bin)
if not (context_row := self._get_context_row(context_id_bin, row)):
@ -311,7 +329,7 @@ class ContextAugmenter:
if _rows_match(row, context_row):
# This is the first event with the given ID. Was it directly caused by
# a parent event?
context_parent_id_bin = row.context_parent_id_bin
context_parent_id_bin = row[CONTEXT_PARENT_ID_BIN_POS]
if (
not context_parent_id_bin
or (
@ -326,10 +344,10 @@ class ContextAugmenter:
# this log entry.
if _rows_match(row, context_row):
return
event_type = context_row.event_type
event_type = context_row[EVENT_TYPE_POS]
# State change
if context_entity_id := context_row.entity_id:
data[CONTEXT_STATE] = context_row.state
if context_entity_id := context_row[ENTITY_ID_POS]:
data[CONTEXT_STATE] = context_row[STATE_POS]
data[CONTEXT_ENTITY_ID] = context_entity_id
if self.include_entity_name:
data[CONTEXT_ENTITY_ID_NAME] = self.entity_name_cache.get(
@ -375,20 +393,22 @@ class ContextAugmenter:
def _rows_match(row: Row | EventAsRow, other_row: Row | EventAsRow) -> bool:
"""Check of rows match by using the same method as Events __hash__."""
return bool(
row is other_row or (row_id := row.row_id) and row_id == other_row.row_id
row is other_row
or (row_id := row[ROW_ID_POS])
and row_id == other_row[ROW_ID_POS]
)
def _row_time_fired_isoformat(row: Row | EventAsRow) -> str:
"""Convert the row timed_fired to isoformat."""
return process_timestamp_to_utc_isoformat(
dt_util.utc_from_timestamp(row.time_fired_ts) or dt_util.utcnow()
dt_util.utc_from_timestamp(row[TIME_FIRED_TS_POS]) or dt_util.utcnow()
)
def _row_time_fired_timestamp(row: Row | EventAsRow) -> float:
"""Convert the row timed_fired to timestamp."""
return row.time_fired_ts or process_datetime_to_timestamp(dt_util.utcnow())
return row[TIME_FIRED_TS_POS] or time.time()
class EntityNameCache:

View File

@ -4,10 +4,8 @@ from __future__ import annotations
import argparse
import asyncio
import collections
from collections.abc import Callable
from contextlib import suppress
import json
import logging
from timeit import default_timer as timer
@ -18,7 +16,7 @@ from homeassistant.helpers.event import (
async_track_state_change,
async_track_state_change_event,
)
from homeassistant.helpers.json import JSON_DUMP, JSONEncoder
from homeassistant.helpers.json import JSON_DUMP
# mypy: allow-untyped-calls, allow-untyped-defs, no-check-untyped-defs
# mypy: no-warn-return-any
@ -310,48 +308,3 @@ async def json_serialize_states(hass):
start = timer()
JSON_DUMP(states)
return timer() - start
def _create_state_changed_event_from_old_new(
entity_id, event_time_fired, old_state, new_state
):
"""Create a state changed event from a old and new state."""
attributes = {}
if new_state is not None:
attributes = new_state.get("attributes")
attributes_json = json.dumps(attributes, cls=JSONEncoder)
if attributes_json == "null":
attributes_json = "{}"
row = collections.namedtuple( # noqa: PYI024
"Row",
[
"event_type"
"event_data"
"time_fired"
"context_id"
"context_user_id"
"state"
"entity_id"
"domain"
"attributes"
"state_id",
"old_state_id",
],
)
row.event_type = EVENT_STATE_CHANGED
row.event_data = "{}"
row.attributes = attributes_json
row.time_fired = event_time_fired
row.state = new_state and new_state.get("state")
row.entity_id = entity_id
row.domain = entity_id and core.split_entity_id(entity_id)[0]
row.context_id = None
row.context_user_id = None
row.old_state_id = old_state and 1
row.state_id = new_state and 1
# pylint: disable-next=import-outside-toplevel
from homeassistant.components import logbook
return logbook.LazyEventPartialState(row, {})

View File

@ -7,7 +7,7 @@ from typing import Any
from homeassistant.components import logbook
from homeassistant.components.logbook import processor
from homeassistant.components.logbook.models import LogbookConfig
from homeassistant.components.logbook.models import EventAsRow, LogbookConfig
from homeassistant.components.recorder.models import (
process_timestamp_to_utc_isoformat,
ulid_to_bytes_or_none,
@ -18,6 +18,8 @@ from homeassistant.helpers import entity_registry as er
from homeassistant.helpers.json import JSONEncoder
import homeassistant.util.dt as dt_util
IDX_TO_NAME = dict(enumerate(EventAsRow._fields))
class MockRow:
"""Minimal row mock."""
@ -48,6 +50,10 @@ class MockRow:
self.attributes = None
self.context_only = False
def __getitem__(self, idx: int) -> Any:
"""Get item."""
return getattr(self, IDX_TO_NAME[idx])
@property
def time_fired_minute(self):
"""Minute the event was fired."""

View File

@ -1,11 +1,9 @@
"""The tests for the logbook component."""
import asyncio
import collections
from collections.abc import Callable
from datetime import datetime, timedelta
from http import HTTPStatus
import json
from unittest.mock import Mock
from freezegun import freeze_time
@ -15,7 +13,7 @@ import voluptuous as vol
from homeassistant.components import logbook, recorder
from homeassistant.components.alexa.smart_home import EVENT_ALEXA_SMART_HOME
from homeassistant.components.automation import EVENT_AUTOMATION_TRIGGERED
from homeassistant.components.logbook.models import LazyEventPartialState
from homeassistant.components.logbook.models import EventAsRow, LazyEventPartialState
from homeassistant.components.logbook.processor import EventProcessor
from homeassistant.components.logbook.queries.common import PSEUDO_EVENT_STATE_CHANGED
from homeassistant.components.recorder import Recorder
@ -44,7 +42,6 @@ import homeassistant.core as ha
from homeassistant.core import Event, HomeAssistant
from homeassistant.helpers import device_registry as dr, entity_registry as er
from homeassistant.helpers.entityfilter import CONF_ENTITY_GLOBS
from homeassistant.helpers.json import JSONEncoder
from homeassistant.setup import async_setup_component
import homeassistant.util.dt as dt_util
@ -324,50 +321,21 @@ def create_state_changed_event_from_old_new(
entity_id, event_time_fired, old_state, new_state
):
"""Create a state changed event from a old and new state."""
attributes = {}
if new_state is not None:
attributes = new_state.get("attributes")
attributes_json = json.dumps(attributes, cls=JSONEncoder)
row = collections.namedtuple( # noqa: PYI024
"Row",
[
"event_type",
"event_data",
"time_fired",
"time_fired_ts",
"context_id_bin",
"context_user_id_bin",
"context_parent_id_bin",
"state",
"entity_id",
"domain",
"attributes",
"state_id",
"old_state_id",
"shared_attrs",
"shared_data",
"context_only",
],
row = EventAsRow(
row_id=1,
event_type=PSEUDO_EVENT_STATE_CHANGED,
event_data="{}",
time_fired_ts=dt_util.utc_to_timestamp(event_time_fired),
context_id_bin=None,
context_user_id_bin=None,
context_parent_id_bin=None,
state=new_state and new_state.get("state"),
entity_id=entity_id,
icon=None,
context_only=False,
data=None,
context=None,
)
row.event_type = PSEUDO_EVENT_STATE_CHANGED
row.event_data = "{}"
row.shared_data = "{}"
row.attributes = attributes_json
row.shared_attrs = attributes_json
row.time_fired = event_time_fired
row.time_fired_ts = dt_util.utc_to_timestamp(event_time_fired)
row.state = new_state and new_state.get("state")
row.entity_id = entity_id
row.domain = entity_id and ha.split_entity_id(entity_id)[0]
row.context_only = False
row.context_id_bin = None
row.friendly_name = None
row.icon = None
row.context_user_id_bin = None
row.context_parent_id_bin = None
row.old_state_id = old_state and 1
row.state_id = new_state and 1
return LazyEventPartialState(row, {})

View File

@ -2,20 +2,26 @@
from unittest.mock import Mock
from homeassistant.components.logbook.models import LazyEventPartialState
from homeassistant.components.logbook.models import EventAsRow, LazyEventPartialState
def test_lazy_event_partial_state_context() -> None:
"""Test we can extract context from a lazy event partial state."""
state = LazyEventPartialState(
Mock(
EventAsRow(
row_id=1,
event_type="event_type",
event_data={},
time_fired_ts=1,
context_id_bin=b"1234123412341234",
context_user_id_bin=b"1234123412341234",
context_parent_id_bin=b"4444444444444444",
event_data={},
event_type="event_type",
entity_id="entity_id",
state="state",
entity_id="entity_id",
icon="icon",
context_only=False,
data={},
context=Mock(),
),
{},
)