diff --git a/homeassistant/components/recorder/core.py b/homeassistant/components/recorder/core.py index 68b634d7235..8969b7a27e3 100644 --- a/homeassistant/components/recorder/core.py +++ b/homeassistant/components/recorder/core.py @@ -58,6 +58,7 @@ from .const import ( SupportedDialect, ) from .db_schema import ( + LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX, LEGACY_STATES_EVENT_ID_INDEX, SCHEMA_VERSION, TABLE_STATES, @@ -96,6 +97,7 @@ from .tasks import ( CompileMissingStatisticsTask, DatabaseLockTask, EntityIDMigrationTask, + EntityIDPostMigrationTask, EventIdMigrationTask, EventsContextIDMigrationTask, EventTask, @@ -757,6 +759,18 @@ class Recorder(threading.Thread): else: _LOGGER.debug("Activating states_meta manager as all data is migrated") self.states_meta_manager.active = True + with contextlib.suppress(SQLAlchemyError): + # If ix_states_entity_id_last_updated_ts still exists + # on the states table it means the entity id migration + # finished by the EntityIDPostMigrationTask did not + # because they restarted in the middle of it. We need + # to pick back up where we left off. + if get_index_by_name( + session, + TABLE_STATES, + LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX, + ): + self.queue_task(EntityIDPostMigrationTask()) if self.schema_version > LEGACY_STATES_EVENT_ID_INDEX_SCHEMA_VERSION: with contextlib.suppress(SQLAlchemyError): diff --git a/homeassistant/components/recorder/db_schema.py b/homeassistant/components/recorder/db_schema.py index c2252e9f68f..4cef287deda 100644 --- a/homeassistant/components/recorder/db_schema.py +++ b/homeassistant/components/recorder/db_schema.py @@ -119,6 +119,7 @@ METADATA_ID_LAST_UPDATED_INDEX_TS = "ix_states_metadata_id_last_updated_ts" EVENTS_CONTEXT_ID_BIN_INDEX = "ix_events_context_id_bin" STATES_CONTEXT_ID_BIN_INDEX = "ix_states_context_id_bin" LEGACY_STATES_EVENT_ID_INDEX = "ix_states_event_id" +LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX = "ix_states_entity_id_last_updated_ts" CONTEXT_ID_BIN_MAX_LENGTH = 16 MYSQL_COLLATE = "utf8mb4_unicode_ci" diff --git a/homeassistant/components/recorder/migration.py b/homeassistant/components/recorder/migration.py index 7fee3d16e8d..4b0244038e8 100644 --- a/homeassistant/components/recorder/migration.py +++ b/homeassistant/components/recorder/migration.py @@ -48,6 +48,7 @@ from .const import SupportedDialect from .db_schema import ( CONTEXT_ID_BIN_MAX_LENGTH, DOUBLE_PRECISION_TYPE_SQL, + LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX, LEGACY_STATES_EVENT_ID_INDEX, MYSQL_COLLATE, MYSQL_DEFAULT_CHARSET, @@ -1586,7 +1587,7 @@ def post_migrate_entity_ids(instance: Recorder) -> bool: if is_done: # Drop the old indexes since they are no longer needed - _drop_index(session_maker, "states", "ix_states_entity_id_last_updated_ts") + _drop_index(session_maker, "states", LEGACY_STATES_ENTITY_ID_LAST_UPDATED_INDEX) _LOGGER.debug("Cleanup legacy entity_ids done=%s", is_done) return is_done diff --git a/tests/components/recorder/db_schema_32.py b/tests/components/recorder/db_schema_32.py new file mode 100644 index 00000000000..c41775ed386 --- /dev/null +++ b/tests/components/recorder/db_schema_32.py @@ -0,0 +1,752 @@ +"""Models for SQLAlchemy. + +This file contains the model definitions for schema version 30. +It is used to test the schema migration logic. +""" +from __future__ import annotations + +from collections.abc import Callable +from datetime import datetime, timedelta +import logging +import time +from typing import Any, TypedDict, cast, overload + +import ciso8601 +from fnv_hash_fast import fnv1a_32 +from sqlalchemy import ( + JSON, + BigInteger, + Boolean, + Column, + DateTime, + Float, + ForeignKey, + Identity, + Index, + Integer, + LargeBinary, + SmallInteger, + String, + Text, + distinct, + type_coerce, +) +from sqlalchemy.dialects import mysql, oracle, postgresql, sqlite +from sqlalchemy.orm import aliased, declarative_base, relationship +from sqlalchemy.orm.session import Session +from typing_extensions import Self + +from homeassistant.components.recorder.const import SupportedDialect +from homeassistant.const import ( + ATTR_ATTRIBUTION, + ATTR_RESTORED, + ATTR_SUPPORTED_FEATURES, + MAX_LENGTH_EVENT_CONTEXT_ID, + MAX_LENGTH_EVENT_EVENT_TYPE, + MAX_LENGTH_EVENT_ORIGIN, + MAX_LENGTH_STATE_ENTITY_ID, + MAX_LENGTH_STATE_STATE, +) +from homeassistant.core import Context, Event, EventOrigin, State, split_entity_id +from homeassistant.helpers import entity_registry as er +from homeassistant.helpers.json import JSON_DUMP, json_bytes +import homeassistant.util.dt as dt_util +from homeassistant.util.json import JSON_DECODE_EXCEPTIONS, json_loads + +ALL_DOMAIN_EXCLUDE_ATTRS = {ATTR_ATTRIBUTION, ATTR_RESTORED, ATTR_SUPPORTED_FEATURES} + +# SQLAlchemy Schema +# pylint: disable=invalid-name +Base = declarative_base() + +SCHEMA_VERSION = 30 + +_LOGGER = logging.getLogger(__name__) + +TABLE_EVENTS = "events" +TABLE_EVENT_DATA = "event_data" +TABLE_EVENT_TYPES = "event_types" +TABLE_STATES = "states" +TABLE_STATE_ATTRIBUTES = "state_attributes" +TABLE_STATES_META = "states_meta" +TABLE_RECORDER_RUNS = "recorder_runs" +TABLE_SCHEMA_CHANGES = "schema_changes" +TABLE_STATISTICS = "statistics" +TABLE_STATISTICS_META = "statistics_meta" +TABLE_STATISTICS_RUNS = "statistics_runs" +TABLE_STATISTICS_SHORT_TERM = "statistics_short_term" + +ALL_TABLES = [ + TABLE_STATES, + TABLE_STATE_ATTRIBUTES, + TABLE_STATES_META, + TABLE_EVENTS, + TABLE_EVENT_DATA, + TABLE_EVENT_TYPES, + TABLE_RECORDER_RUNS, + TABLE_SCHEMA_CHANGES, + TABLE_STATISTICS, + TABLE_STATISTICS_META, + TABLE_STATISTICS_RUNS, + TABLE_STATISTICS_SHORT_TERM, +] + +TABLES_TO_CHECK = [ + TABLE_STATES, + TABLE_EVENTS, + TABLE_RECORDER_RUNS, + TABLE_SCHEMA_CHANGES, +] + +LAST_UPDATED_INDEX = "ix_states_last_updated" +ENTITY_ID_LAST_UPDATED_TS_INDEX = "ix_states_entity_id_last_updated_ts" +EVENTS_CONTEXT_ID_INDEX = "ix_events_context_id" +STATES_CONTEXT_ID_INDEX = "ix_states_context_id" +CONTEXT_ID_BIN_MAX_LENGTH = 16 +EVENTS_CONTEXT_ID_BIN_INDEX = "ix_events_context_id_bin" +STATES_CONTEXT_ID_BIN_INDEX = "ix_states_context_id_bin" + + +class FAST_PYSQLITE_DATETIME(sqlite.DATETIME): # type: ignore[misc] + """Use ciso8601 to parse datetimes instead of sqlalchemy built-in regex.""" + + def result_processor(self, dialect, coltype): # type: ignore[no-untyped-def] + """Offload the datetime parsing to ciso8601.""" + return lambda value: None if value is None else ciso8601.parse_datetime(value) + + +JSON_VARIANT_CAST = Text().with_variant( + postgresql.JSON(none_as_null=True), "postgresql" +) +JSONB_VARIANT_CAST = Text().with_variant( + postgresql.JSONB(none_as_null=True), "postgresql" +) +DATETIME_TYPE = ( + DateTime(timezone=True) + .with_variant(mysql.DATETIME(timezone=True, fsp=6), "mysql") + .with_variant(FAST_PYSQLITE_DATETIME(), "sqlite") +) +DOUBLE_TYPE = ( + Float() + .with_variant(mysql.DOUBLE(asdecimal=False), "mysql") + .with_variant(oracle.DOUBLE_PRECISION(), "oracle") + .with_variant(postgresql.DOUBLE_PRECISION(), "postgresql") +) + +TIMESTAMP_TYPE = DOUBLE_TYPE + + +class UnsupportedDialect(Exception): + """The dialect or its version is not supported.""" + + +class StatisticResult(TypedDict): + """Statistic result data class. + + Allows multiple datapoints for the same statistic_id. + """ + + meta: StatisticMetaData + stat: StatisticData + + +class StatisticDataBase(TypedDict): + """Mandatory fields for statistic data class.""" + + start: datetime + + +class StatisticData(StatisticDataBase, total=False): + """Statistic data class.""" + + mean: float + min: float + max: float + last_reset: datetime | None + state: float + sum: float + + +class StatisticMetaData(TypedDict): + """Statistic meta data class.""" + + has_mean: bool + has_sum: bool + name: str | None + source: str + statistic_id: str + unit_of_measurement: str | None + + +class JSONLiteral(JSON): # type: ignore[misc] + """Teach SA how to literalize json.""" + + def literal_processor(self, dialect: str) -> Callable[[Any], str]: + """Processor to convert a value to JSON.""" + + def process(value: Any) -> str: + """Dump json.""" + return JSON_DUMP(value) + + return process + + +EVENT_ORIGIN_ORDER = [EventOrigin.local, EventOrigin.remote] +EVENT_ORIGIN_TO_IDX = {origin: idx for idx, origin in enumerate(EVENT_ORIGIN_ORDER)} + + +class Events(Base): # type: ignore[misc,valid-type] + """Event history data.""" + + __table_args__ = ( + # Used for fetching events at a specific time + # see logbook + Index("ix_events_event_type_time_fired", "event_type", "time_fired"), + Index( + EVENTS_CONTEXT_ID_BIN_INDEX, + "context_id_bin", + mysql_length=CONTEXT_ID_BIN_MAX_LENGTH, + mariadb_length=CONTEXT_ID_BIN_MAX_LENGTH, + ), + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_EVENTS + event_id = Column(Integer, Identity(), primary_key=True) + event_type = Column(String(MAX_LENGTH_EVENT_EVENT_TYPE)) + event_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + origin = Column(String(MAX_LENGTH_EVENT_ORIGIN)) # no longer used for new rows + origin_idx = Column(SmallInteger) + time_fired = Column(DATETIME_TYPE, index=True) + time_fired_ts = Column(TIMESTAMP_TYPE, index=True) + context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True) + context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID)) + context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID)) + data_id = Column(Integer, ForeignKey("event_data.data_id"), index=True) + context_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v3v320, only added for recorder to startup ok + context_user_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v32, only added for recorder to startup ok + context_parent_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v32, only added for recorder to startup ok + event_type_id = Column( + Integer, ForeignKey("event_types.event_type_id"), index=True + ) # *** Not originally in v32, only added for recorder to startup ok + event_data_rel = relationship("EventData") + event_type_rel = relationship("EventTypes") + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + "" + ) + + @staticmethod + def from_event(event: Event) -> Events: + """Create an event database object from a native event.""" + return Events( + event_type=event.event_type, + event_data=None, + origin_idx=EVENT_ORIGIN_TO_IDX.get(event.origin), + time_fired=event.time_fired, + context_id=event.context.id, + context_user_id=event.context.user_id, + context_parent_id=event.context.parent_id, + ) + + def to_native(self, validate_entity_id: bool = True) -> Event | None: + """Convert to a native HA Event.""" + context = Context( + id=self.context_id, + user_id=self.context_user_id, + parent_id=self.context_parent_id, + ) + try: + return Event( + self.event_type, + json_loads(self.event_data) if self.event_data else {}, + EventOrigin(self.origin) + if self.origin + else EVENT_ORIGIN_ORDER[self.origin_idx], + process_timestamp(self.time_fired), + context=context, + ) + except JSON_DECODE_EXCEPTIONS: + # When json_loads fails + _LOGGER.exception("Error converting to event: %s", self) + return None + + +class EventData(Base): # type: ignore[misc,valid-type] + """Event data history.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_EVENT_DATA + data_id = Column(Integer, Identity(), primary_key=True) + hash = Column(BigInteger, index=True) + # Note that this is not named attributes to avoid confusion with the states table + shared_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + "" + ) + + @staticmethod + def from_event(event: Event) -> EventData: + """Create object from an event.""" + shared_data = json_bytes(event.data) + return EventData( + shared_data=shared_data.decode("utf-8"), + hash=EventData.hash_shared_data_bytes(shared_data), + ) + + @staticmethod + def shared_data_bytes_from_event( + event: Event, dialect: SupportedDialect | None + ) -> bytes: + """Create shared_data from an event.""" + return json_bytes(event.data) + + @staticmethod + def hash_shared_data_bytes(shared_data_bytes: bytes) -> int: + """Return the hash of json encoded shared data.""" + return cast(int, fnv1a_32(shared_data_bytes)) + + def to_native(self) -> dict[str, Any]: + """Convert to an HA state object.""" + try: + return cast(dict[str, Any], json_loads(self.shared_data)) + except JSON_DECODE_EXCEPTIONS: + _LOGGER.exception("Error converting row to event data: %s", self) + return {} + + +# *** Not originally in v32, only added for recorder to startup ok +# This is not being tested by the v32 statistics migration tests +class EventTypes(Base): # type: ignore[misc,valid-type] + """Event type history.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_EVENT_TYPES + event_type_id = Column(Integer, Identity(), primary_key=True) + event_type = Column(String(MAX_LENGTH_EVENT_EVENT_TYPE)) + + +class States(Base): # type: ignore[misc,valid-type] + """State change history.""" + + __table_args__ = ( + # Used for fetching the state of entities at a specific time + # (get_states in history.py) + Index(ENTITY_ID_LAST_UPDATED_TS_INDEX, "entity_id", "last_updated_ts"), + Index( + STATES_CONTEXT_ID_BIN_INDEX, + "context_id_bin", + mysql_length=CONTEXT_ID_BIN_MAX_LENGTH, + mariadb_length=CONTEXT_ID_BIN_MAX_LENGTH, + ), + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATES + state_id = Column(Integer, Identity(), primary_key=True) + entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID)) + state = Column(String(MAX_LENGTH_STATE_STATE)) + attributes = Column( + Text().with_variant(mysql.LONGTEXT, "mysql") + ) # no longer used for new rows + event_id = Column( # no longer used for new rows + Integer, ForeignKey("events.event_id", ondelete="CASCADE"), index=True + ) + last_changed = Column(DATETIME_TYPE) + last_changed_ts = Column(TIMESTAMP_TYPE) + last_updated = Column(DATETIME_TYPE, default=dt_util.utcnow, index=True) + last_updated_ts = Column(TIMESTAMP_TYPE, default=time.time, index=True) + old_state_id = Column(Integer, ForeignKey("states.state_id"), index=True) + attributes_id = Column( + Integer, ForeignKey("state_attributes.attributes_id"), index=True + ) + context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True) + context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID)) + context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID)) + origin_idx = Column(SmallInteger) # 0 is local, 1 is remote + context_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v32, only added for recorder to startup ok + context_user_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v32, only added for recorder to startup ok + context_parent_id_bin = Column( + LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH) + ) # *** Not originally in v32, only added for recorder to startup ok + metadata_id = Column( + Integer, ForeignKey("states_meta.metadata_id"), index=True + ) # *** Not originally in v32, only added for recorder to startup ok + states_meta_rel = relationship("StatesMeta") + old_state = relationship("States", remote_side=[state_id]) + state_attributes = relationship("StateAttributes") + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + @staticmethod + def from_event(event: Event) -> States: + """Create object from a state_changed event.""" + entity_id = event.data["entity_id"] + state: State | None = event.data.get("new_state") + dbstate = States( + entity_id=entity_id, + attributes=None, + context_id=event.context.id, + context_user_id=event.context.user_id, + context_parent_id=event.context.parent_id, + origin_idx=EVENT_ORIGIN_TO_IDX.get(event.origin), + ) + + # None state means the state was removed from the state machine + if state is None: + dbstate.state = "" + dbstate.last_updated = event.time_fired + dbstate.last_changed = None + return dbstate + + dbstate.state = state.state + dbstate.last_updated = state.last_updated + if state.last_updated == state.last_changed: + dbstate.last_changed = None + else: + dbstate.last_changed = state.last_changed + + return dbstate + + def to_native(self, validate_entity_id: bool = True) -> State | None: + """Convert to an HA state object.""" + context = Context( + id=self.context_id, + user_id=self.context_user_id, + parent_id=self.context_parent_id, + ) + try: + attrs = json_loads(self.attributes) if self.attributes else {} + except JSON_DECODE_EXCEPTIONS: + # When json_loads fails + _LOGGER.exception("Error converting row to state: %s", self) + return None + if self.last_changed is None or self.last_changed == self.last_updated: + last_changed = last_updated = process_timestamp(self.last_updated) + else: + last_updated = process_timestamp(self.last_updated) + last_changed = process_timestamp(self.last_changed) + return State( + self.entity_id, + self.state, + # Join the state_attributes table on attributes_id to get the attributes + # for newer states + attrs, + last_changed, + last_updated, + context=context, + validate_entity_id=validate_entity_id, + ) + + +class StateAttributes(Base): # type: ignore[misc,valid-type] + """State attribute change history.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATE_ATTRIBUTES + attributes_id = Column(Integer, Identity(), primary_key=True) + hash = Column(BigInteger, index=True) + # Note that this is not named attributes to avoid confusion with the states table + shared_attrs = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + @staticmethod + def from_event(event: Event) -> StateAttributes: + """Create object from a state_changed event.""" + state: State | None = event.data.get("new_state") + # None state means the state was removed from the state machine + attr_bytes = b"{}" if state is None else json_bytes(state.attributes) + dbstate = StateAttributes(shared_attrs=attr_bytes.decode("utf-8")) + dbstate.hash = StateAttributes.hash_shared_attrs_bytes(attr_bytes) + return dbstate + + @staticmethod + def shared_attrs_bytes_from_event( + event: Event, + entity_registry: er.EntityRegistry, + exclude_attrs_by_domain: dict[str, set[str]], + dialect: SupportedDialect | None, + ) -> bytes: + """Create shared_attrs from a state_changed event.""" + state: State | None = event.data.get("new_state") + # None state means the state was removed from the state machine + if state is None: + return b"{}" + domain = split_entity_id(state.entity_id)[0] + exclude_attrs = ( + exclude_attrs_by_domain.get(domain, set()) | ALL_DOMAIN_EXCLUDE_ATTRS + ) + return json_bytes( + {k: v for k, v in state.attributes.items() if k not in exclude_attrs} + ) + + @staticmethod + def hash_shared_attrs_bytes(shared_attrs_bytes: bytes) -> int: + """Return the hash of json encoded shared attributes.""" + return cast(int, fnv1a_32(shared_attrs_bytes)) + + def to_native(self) -> dict[str, Any]: + """Convert to an HA state object.""" + try: + return cast(dict[str, Any], json_loads(self.shared_attrs)) + except JSON_DECODE_EXCEPTIONS: + # When json_loads fails + _LOGGER.exception("Error converting row to state attributes: %s", self) + return {} + + +# *** Not originally in v30, only added for recorder to startup ok +# This is not being tested by the v30 statistics migration tests +class StatesMeta(Base): # type: ignore[misc,valid-type] + """Metadata for states.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATES_META + metadata_id = Column(Integer, Identity(), primary_key=True) + entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID)) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + "" + ) + + +class StatisticsBase: + """Statistics base class.""" + + id = Column(Integer, Identity(), primary_key=True) + created = Column(DATETIME_TYPE, default=dt_util.utcnow) + metadata_id = Column( + Integer, + ForeignKey(f"{TABLE_STATISTICS_META}.id", ondelete="CASCADE"), + index=True, + ) + start = Column(DATETIME_TYPE, index=True) + mean = Column(DOUBLE_TYPE) + min = Column(DOUBLE_TYPE) + max = Column(DOUBLE_TYPE) + last_reset = Column(DATETIME_TYPE) + state = Column(DOUBLE_TYPE) + sum = Column(DOUBLE_TYPE) + + @classmethod + def from_stats(cls, metadata_id: int, stats: StatisticData) -> Self: + """Create object from a statistics.""" + return cls( # type: ignore[call-arg,misc] + metadata_id=metadata_id, + **stats, + ) + + +class Statistics(Base, StatisticsBase): # type: ignore[misc,valid-type] + """Long term statistics.""" + + duration = timedelta(hours=1) + + __table_args__ = ( + # Used for fetching statistics for a certain entity at a specific time + Index("ix_statistics_statistic_id_start", "metadata_id", "start", unique=True), + ) + __tablename__ = TABLE_STATISTICS + + +class StatisticsShortTerm(Base, StatisticsBase): # type: ignore[misc,valid-type] + """Short term statistics.""" + + duration = timedelta(minutes=5) + + __table_args__ = ( + # Used for fetching statistics for a certain entity at a specific time + Index( + "ix_statistics_short_term_statistic_id_start", + "metadata_id", + "start", + unique=True, + ), + ) + __tablename__ = TABLE_STATISTICS_SHORT_TERM + + +class StatisticsMeta(Base): # type: ignore[misc,valid-type] + """Statistics meta data.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATISTICS_META + id = Column(Integer, Identity(), primary_key=True) + statistic_id = Column(String(255), index=True, unique=True) + source = Column(String(32)) + unit_of_measurement = Column(String(255)) + has_mean = Column(Boolean) + has_sum = Column(Boolean) + name = Column(String(255)) + + @staticmethod + def from_meta(meta: StatisticMetaData) -> StatisticsMeta: + """Create object from meta data.""" + return StatisticsMeta(**meta) + + +class RecorderRuns(Base): # type: ignore[misc,valid-type] + """Representation of recorder run.""" + + __table_args__ = (Index("ix_recorder_runs_start_end", "start", "end"),) + __tablename__ = TABLE_RECORDER_RUNS + run_id = Column(Integer, Identity(), primary_key=True) + start = Column(DATETIME_TYPE, default=dt_util.utcnow) + end = Column(DATETIME_TYPE) + closed_incorrect = Column(Boolean, default=False) + created = Column(DATETIME_TYPE, default=dt_util.utcnow) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + end = ( + f"'{self.end.isoformat(sep=' ', timespec='seconds')}'" if self.end else None + ) + return ( + f"" + ) + + def entity_ids(self, point_in_time: datetime | None = None) -> list[str]: + """Return the entity ids that existed in this run. + + Specify point_in_time if you want to know which existed at that point + in time inside the run. + """ + session = Session.object_session(self) + + assert session is not None, "RecorderRuns need to be persisted" + + query = session.query(distinct(States.entity_id)).filter( + States.last_updated >= self.start + ) + + if point_in_time is not None: + query = query.filter(States.last_updated < point_in_time) + elif self.end is not None: + query = query.filter(States.last_updated < self.end) + + return [row[0] for row in query] + + def to_native(self, validate_entity_id: bool = True) -> RecorderRuns: + """Return self, native format is this model.""" + return self + + +class SchemaChanges(Base): # type: ignore[misc,valid-type] + """Representation of schema version changes.""" + + __tablename__ = TABLE_SCHEMA_CHANGES + change_id = Column(Integer, Identity(), primary_key=True) + schema_version = Column(Integer) + changed = Column(DATETIME_TYPE, default=dt_util.utcnow) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + "" + ) + + +class StatisticsRuns(Base): # type: ignore[misc,valid-type] + """Representation of statistics run.""" + + __tablename__ = TABLE_STATISTICS_RUNS + run_id = Column(Integer, Identity(), primary_key=True) + start = Column(DATETIME_TYPE, index=True) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + +EVENT_DATA_JSON = type_coerce( + EventData.shared_data.cast(JSONB_VARIANT_CAST), JSONLiteral(none_as_null=True) +) +OLD_FORMAT_EVENT_DATA_JSON = type_coerce( + Events.event_data.cast(JSONB_VARIANT_CAST), JSONLiteral(none_as_null=True) +) + +SHARED_ATTRS_JSON = type_coerce( + StateAttributes.shared_attrs.cast(JSON_VARIANT_CAST), JSON(none_as_null=True) +) +OLD_FORMAT_ATTRS_JSON = type_coerce( + States.attributes.cast(JSON_VARIANT_CAST), JSON(none_as_null=True) +) + +ENTITY_ID_IN_EVENT: Column = EVENT_DATA_JSON["entity_id"] +OLD_ENTITY_ID_IN_EVENT: Column = OLD_FORMAT_EVENT_DATA_JSON["entity_id"] +DEVICE_ID_IN_EVENT: Column = EVENT_DATA_JSON["device_id"] +OLD_STATE = aliased(States, name="old_state") + + +@overload +def process_timestamp(ts: None) -> None: + ... + + +@overload +def process_timestamp(ts: datetime) -> datetime: + ... + + +def process_timestamp(ts: datetime | None) -> datetime | None: + """Process a timestamp into datetime object.""" + if ts is None: + return None + if ts.tzinfo is None: + return ts.replace(tzinfo=dt_util.UTC) + + return dt_util.as_utc(ts) diff --git a/tests/components/recorder/test_v32_migration.py b/tests/components/recorder/test_v32_migration.py index 0b5389ddf7f..6e424558181 100644 --- a/tests/components/recorder/test_v32_migration.py +++ b/tests/components/recorder/test_v32_migration.py @@ -27,7 +27,7 @@ from tests.common import async_test_home_assistant ORIG_TZ = dt_util.DEFAULT_TIME_ZONE CREATE_ENGINE_TARGET = "homeassistant.components.recorder.core.create_engine" -SCHEMA_MODULE = "tests.components.recorder.db_schema_30" +SCHEMA_MODULE = "tests.components.recorder.db_schema_32" def _create_engine_test(*args, **kwargs): @@ -222,3 +222,144 @@ async def test_migrate_times( await hass.async_stop() dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + +async def test_migrate_can_resume_entity_id_post_migration( + caplog: pytest.LogCaptureFixture, tmpdir: py.path.local +) -> None: + """Test we resume the entity id post migration after a restart.""" + test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") + dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}" + + importlib.import_module(SCHEMA_MODULE) + old_db_schema = sys.modules[SCHEMA_MODULE] + now = dt_util.utcnow() + one_second_past = now - timedelta(seconds=1) + mock_state = State( + "sensor.test", + "old", + {"last_reset": now.isoformat()}, + last_changed=one_second_past, + last_updated=now, + ) + state_changed_event = Event( + EVENT_STATE_CHANGED, + { + "entity_id": "sensor.test", + "old_state": None, + "new_state": mock_state, + }, + EventOrigin.local, + time_fired=now, + ) + custom_event = Event( + "custom_event", + {"entity_id": "sensor.custom"}, + EventOrigin.local, + time_fired=now, + ) + number_of_migrations = 5 + + def _get_states_index_names(): + with session_scope(hass=hass) as session: + return inspect(session.connection()).get_indexes("states") + + with patch.object(recorder, "db_schema", old_db_schema), patch.object( + recorder.migration, "SCHEMA_VERSION", old_db_schema.SCHEMA_VERSION + ), patch.object(core, "StatesMeta", old_db_schema.StatesMeta), patch.object( + core, "EventTypes", old_db_schema.EventTypes + ), patch.object( + core, "EventData", old_db_schema.EventData + ), patch.object( + core, "States", old_db_schema.States + ), patch.object( + core, "Events", old_db_schema.Events + ), patch( + CREATE_ENGINE_TARGET, new=_create_engine_test + ), patch( + "homeassistant.components.recorder.Recorder._migrate_events_context_ids", + ), patch( + "homeassistant.components.recorder.Recorder._migrate_states_context_ids", + ), patch( + "homeassistant.components.recorder.Recorder._migrate_event_type_ids", + ), patch( + "homeassistant.components.recorder.Recorder._migrate_entity_ids", + ), patch( + "homeassistant.components.recorder.Recorder._post_migrate_entity_ids" + ), patch( + "homeassistant.components.recorder.Recorder._cleanup_legacy_states_event_ids" + ): + hass = await async_test_home_assistant(asyncio.get_running_loop()) + recorder_helper.async_initialize_recorder(hass) + assert await async_setup_component( + hass, "recorder", {"recorder": {"db_url": dburl}} + ) + await hass.async_block_till_done() + await async_wait_recording_done(hass) + await async_wait_recording_done(hass) + + def _add_data(): + with session_scope(hass=hass) as session: + session.add(old_db_schema.Events.from_event(custom_event)) + session.add(old_db_schema.States.from_event(state_changed_event)) + + await recorder.get_instance(hass).async_add_executor_job(_add_data) + await hass.async_block_till_done() + await recorder.get_instance(hass).async_block_till_done() + + states_indexes = await recorder.get_instance(hass).async_add_executor_job( + _get_states_index_names + ) + states_index_names = {index["name"] for index in states_indexes} + assert recorder.get_instance(hass).use_legacy_events_index is True + + await hass.async_stop() + await hass.async_block_till_done() + + assert "ix_states_event_id" in states_index_names + assert "ix_states_entity_id_last_updated_ts" in states_index_names + + with patch("homeassistant.components.recorder.Recorder._post_migrate_entity_ids"): + hass = await async_test_home_assistant(asyncio.get_running_loop()) + recorder_helper.async_initialize_recorder(hass) + assert await async_setup_component( + hass, "recorder", {"recorder": {"db_url": dburl}} + ) + await hass.async_block_till_done() + + # We need to wait for all the migration tasks to complete + # before we can check the database. + for _ in range(number_of_migrations): + await recorder.get_instance(hass).async_block_till_done() + await async_wait_recording_done(hass) + + states_indexes = await recorder.get_instance(hass).async_add_executor_job( + _get_states_index_names + ) + states_index_names = {index["name"] for index in states_indexes} + await hass.async_stop() + await hass.async_block_till_done() + + assert "ix_states_entity_id_last_updated_ts" in states_index_names + + hass = await async_test_home_assistant(asyncio.get_running_loop()) + recorder_helper.async_initialize_recorder(hass) + assert await async_setup_component( + hass, "recorder", {"recorder": {"db_url": dburl}} + ) + await hass.async_block_till_done() + + # We need to wait for all the migration tasks to complete + # before we can check the database. + for _ in range(number_of_migrations): + await recorder.get_instance(hass).async_block_till_done() + await async_wait_recording_done(hass) + + states_indexes = await recorder.get_instance(hass).async_add_executor_job( + _get_states_index_names + ) + states_index_names = {index["name"] for index in states_indexes} + assert "ix_states_entity_id_last_updated_ts" not in states_index_names + + await hass.async_stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ