diff --git a/tests/components/recorder/models_schema_23_with_newer_columns.py b/tests/components/recorder/models_schema_23_with_newer_columns.py new file mode 100644 index 00000000000..a086aa588d4 --- /dev/null +++ b/tests/components/recorder/models_schema_23_with_newer_columns.py @@ -0,0 +1,613 @@ +"""Models for SQLAlchemy. + +This file contains the model definitions for schema version 23 +used by Home Assistant Core 2021.11.0, which adds the name column +to statistics_meta. + +The v23 schema as been slightly modified to add the EventData table to +allow the recorder to startup successfully. + +It is used to test the schema migration logic. +""" +from __future__ import annotations + +from datetime import datetime, timedelta +import json +import logging +from typing import TypedDict, overload + +from sqlalchemy import ( + BigInteger, + Boolean, + Column, + DateTime, + Float, + ForeignKey, + Identity, + Index, + Integer, + SmallInteger, + String, + Text, + distinct, +) +from sqlalchemy.dialects import mysql, oracle, postgresql +from sqlalchemy.ext.declarative import declared_attr +from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.orm.session import Session + +from homeassistant.const import ( + MAX_LENGTH_EVENT_CONTEXT_ID, + MAX_LENGTH_EVENT_EVENT_TYPE, + MAX_LENGTH_EVENT_ORIGIN, + MAX_LENGTH_STATE_DOMAIN, + MAX_LENGTH_STATE_ENTITY_ID, + MAX_LENGTH_STATE_STATE, +) +from homeassistant.core import Context, Event, EventOrigin, State, split_entity_id +from homeassistant.helpers.json import JSONEncoder +import homeassistant.util.dt as dt_util + +# SQLAlchemy Schema +# pylint: disable=invalid-name +Base = declarative_base() + +SCHEMA_VERSION = 23 + +_LOGGER = logging.getLogger(__name__) + +DB_TIMEZONE = "+00:00" + +TABLE_EVENTS = "events" +TABLE_STATES = "states" +TABLE_RECORDER_RUNS = "recorder_runs" +TABLE_SCHEMA_CHANGES = "schema_changes" +TABLE_STATISTICS = "statistics" +TABLE_STATISTICS_META = "statistics_meta" +TABLE_STATISTICS_RUNS = "statistics_runs" +TABLE_STATISTICS_SHORT_TERM = "statistics_short_term" +TABLE_EVENT_DATA = "event_data" + +ALL_TABLES = [ + TABLE_STATES, + TABLE_EVENTS, + TABLE_RECORDER_RUNS, + TABLE_SCHEMA_CHANGES, + TABLE_STATISTICS, + TABLE_STATISTICS_META, + TABLE_STATISTICS_RUNS, + TABLE_STATISTICS_SHORT_TERM, +] + +DATETIME_TYPE = DateTime(timezone=True).with_variant( + mysql.DATETIME(timezone=True, fsp=6), "mysql" +) +DOUBLE_TYPE = ( + Float() + .with_variant(mysql.DOUBLE(asdecimal=False), "mysql") + .with_variant(oracle.DOUBLE_PRECISION(), "oracle") + .with_variant(postgresql.DOUBLE_PRECISION(), "postgresql") +) + + +class Events(Base): # type: ignore + """Event history data.""" + + __table_args__ = ( + # Used for fetching events at a specific time + # see logbook + Index("ix_events_event_type_time_fired", "event_type", "time_fired"), + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_EVENTS + event_id = Column(Integer, Identity(), primary_key=True) + event_type = Column(String(MAX_LENGTH_EVENT_EVENT_TYPE)) + event_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + origin = Column(String(MAX_LENGTH_EVENT_ORIGIN)) + origin_idx = Column( + SmallInteger + ) # *** Not originally in v23, only added for recorder to startup ok + time_fired = Column(DATETIME_TYPE, index=True) + created = Column(DATETIME_TYPE, default=dt_util.utcnow) + context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True) + context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True) + context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True) + data_id = Column( + Integer, ForeignKey("event_data.data_id"), index=True + ) # *** Not originally in v23, only added for recorder to startup ok + event_data_rel = relationship( + "EventData" + ) # *** Not originally in v23, only added for recorder to startup ok + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + @staticmethod + def from_event(event, event_data=None): + """Create an event database object from a native event.""" + return Events( + event_type=event.event_type, + event_data=event_data + or json.dumps(event.data, cls=JSONEncoder, separators=(",", ":")), + origin=str(event.origin.value), + time_fired=event.time_fired, + context_id=event.context.id, + context_user_id=event.context.user_id, + context_parent_id=event.context.parent_id, + ) + + def to_native(self, validate_entity_id=True): + """Convert to a native HA Event.""" + context = Context( + id=self.context_id, + user_id=self.context_user_id, + parent_id=self.context_parent_id, + ) + try: + return Event( + self.event_type, + json.loads(self.event_data), + EventOrigin(self.origin), + process_timestamp(self.time_fired), + context=context, + ) + except ValueError: + # When json.loads fails + _LOGGER.exception("Error converting to event: %s", self) + return None + + +# *** Not originally in v23, only added for recorder to startup ok +# This is not being tested by the v23 statistics migration tests +class EventData(Base): # type: ignore[misc,valid-type] + """Event data history.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_EVENT_DATA + data_id = Column(Integer, Identity(), primary_key=True) + hash = Column(BigInteger, index=True) + # Note that this is not named attributes to avoid confusion with the states table + shared_data = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + + +class States(Base): # type: ignore + """State change history.""" + + __table_args__ = ( + # Used for fetching the state of entities at a specific time + # (get_states in history.py) + Index("ix_states_entity_id_last_updated", "entity_id", "last_updated"), + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATES + state_id = Column(Integer, Identity(), primary_key=True) + domain = Column(String(MAX_LENGTH_STATE_DOMAIN)) + entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID)) + state = Column(String(MAX_LENGTH_STATE_STATE)) + attributes = Column(Text().with_variant(mysql.LONGTEXT, "mysql")) + event_id = Column( + Integer, ForeignKey("events.event_id", ondelete="CASCADE"), index=True + ) + last_changed = Column(DATETIME_TYPE, default=dt_util.utcnow) + last_updated = Column(DATETIME_TYPE, default=dt_util.utcnow, index=True) + created = Column(DATETIME_TYPE, default=dt_util.utcnow) + old_state_id = Column(Integer, ForeignKey("states.state_id"), index=True) + event = relationship("Events", uselist=False) + old_state = relationship("States", remote_side=[state_id]) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + @staticmethod + def from_event(event): + """Create object from a state_changed event.""" + entity_id = event.data["entity_id"] + state = event.data.get("new_state") + + dbstate = States(entity_id=entity_id) + + # State got deleted + if state is None: + dbstate.state = "" + dbstate.domain = split_entity_id(entity_id)[0] + dbstate.attributes = "{}" + dbstate.last_changed = event.time_fired + dbstate.last_updated = event.time_fired + else: + dbstate.domain = state.domain + dbstate.state = state.state + dbstate.attributes = json.dumps( + dict(state.attributes), cls=JSONEncoder, separators=(",", ":") + ) + dbstate.last_changed = state.last_changed + dbstate.last_updated = state.last_updated + + return dbstate + + def to_native(self, validate_entity_id=True): + """Convert to an HA state object.""" + try: + return State( + self.entity_id, + self.state, + json.loads(self.attributes), + process_timestamp(self.last_changed), + process_timestamp(self.last_updated), + # Join the events table on event_id to get the context instead + # as it will always be there for state_changed events + context=Context(id=None), + validate_entity_id=validate_entity_id, + ) + except ValueError: + # When json.loads fails + _LOGGER.exception("Error converting row to state: %s", self) + return None + + +class StatisticResult(TypedDict): + """Statistic result data class. + + Allows multiple datapoints for the same statistic_id. + """ + + meta: StatisticMetaData + stat: StatisticData + + +class StatisticDataBase(TypedDict): + """Mandatory fields for statistic data class.""" + + start: datetime + + +class StatisticData(StatisticDataBase, total=False): + """Statistic data class.""" + + mean: float + min: float + max: float + last_reset: datetime | None + state: float + sum: float + + +class StatisticsBase: + """Statistics base class.""" + + id = Column(Integer, Identity(), primary_key=True) + created = Column(DATETIME_TYPE, default=dt_util.utcnow) + + @declared_attr + def metadata_id(self): + """Define the metadata_id column for sub classes.""" + return Column( + Integer, + ForeignKey(f"{TABLE_STATISTICS_META}.id", ondelete="CASCADE"), + index=True, + ) + + start = Column(DATETIME_TYPE, index=True) + mean = Column(DOUBLE_TYPE) + min = Column(DOUBLE_TYPE) + max = Column(DOUBLE_TYPE) + last_reset = Column(DATETIME_TYPE) + state = Column(DOUBLE_TYPE) + sum = Column(DOUBLE_TYPE) + + @classmethod + def from_stats(cls, metadata_id: int, stats: StatisticData): + """Create object from a statistics.""" + return cls( # type: ignore + metadata_id=metadata_id, + **stats, + ) + + +class Statistics(Base, StatisticsBase): # type: ignore + """Long term statistics.""" + + duration = timedelta(hours=1) + + __table_args__ = ( + # Used for fetching statistics for a certain entity at a specific time + Index("ix_statistics_statistic_id_start", "metadata_id", "start"), + ) + __tablename__ = TABLE_STATISTICS + + +class StatisticsShortTerm(Base, StatisticsBase): # type: ignore + """Short term statistics.""" + + duration = timedelta(minutes=5) + + __table_args__ = ( + # Used for fetching statistics for a certain entity at a specific time + Index("ix_statistics_short_term_statistic_id_start", "metadata_id", "start"), + ) + __tablename__ = TABLE_STATISTICS_SHORT_TERM + + +class StatisticMetaData(TypedDict): + """Statistic meta data class.""" + + has_mean: bool + has_sum: bool + name: str | None + source: str + statistic_id: str + unit_of_measurement: str | None + + +class StatisticsMeta(Base): # type: ignore + """Statistics meta data.""" + + __table_args__ = ( + {"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"}, + ) + __tablename__ = TABLE_STATISTICS_META + id = Column(Integer, Identity(), primary_key=True) + statistic_id = Column(String(255), index=True) + source = Column(String(32)) + unit_of_measurement = Column(String(255)) + has_mean = Column(Boolean) + has_sum = Column(Boolean) + name = Column(String(255)) + + @staticmethod + def from_meta(meta: StatisticMetaData) -> StatisticsMeta: + """Create object from meta data.""" + return StatisticsMeta(**meta) + + +class RecorderRuns(Base): # type: ignore + """Representation of recorder run.""" + + __table_args__ = (Index("ix_recorder_runs_start_end", "start", "end"),) + __tablename__ = TABLE_RECORDER_RUNS + run_id = Column(Integer, Identity(), primary_key=True) + start = Column(DateTime(timezone=True), default=dt_util.utcnow) + end = Column(DateTime(timezone=True)) + closed_incorrect = Column(Boolean, default=False) + created = Column(DateTime(timezone=True), default=dt_util.utcnow) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + end = ( + f"'{self.end.isoformat(sep=' ', timespec='seconds')}'" if self.end else None + ) + return ( + f"" + ) + + def entity_ids(self, point_in_time=None): + """Return the entity ids that existed in this run. + + Specify point_in_time if you want to know which existed at that point + in time inside the run. + """ + session = Session.object_session(self) + + assert session is not None, "RecorderRuns need to be persisted" + + query = session.query(distinct(States.entity_id)).filter( + States.last_updated >= self.start + ) + + if point_in_time is not None: + query = query.filter(States.last_updated < point_in_time) + elif self.end is not None: + query = query.filter(States.last_updated < self.end) + + return [row[0] for row in query] + + def to_native(self, validate_entity_id=True): + """Return self, native format is this model.""" + return self + + +class SchemaChanges(Base): # type: ignore + """Representation of schema version changes.""" + + __tablename__ = TABLE_SCHEMA_CHANGES + change_id = Column(Integer, Identity(), primary_key=True) + schema_version = Column(Integer) + changed = Column(DateTime(timezone=True), default=dt_util.utcnow) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + +class StatisticsRuns(Base): # type: ignore + """Representation of statistics run.""" + + __tablename__ = TABLE_STATISTICS_RUNS + run_id = Column(Integer, Identity(), primary_key=True) + start = Column(DateTime(timezone=True)) + + def __repr__(self) -> str: + """Return string representation of instance for debugging.""" + return ( + f"" + ) + + +@overload +def process_timestamp(ts: None) -> None: + ... + + +@overload +def process_timestamp(ts: datetime) -> datetime: + ... + + +def process_timestamp(ts: datetime | None) -> datetime | None: + """Process a timestamp into datetime object.""" + if ts is None: + return None + if ts.tzinfo is None: + return ts.replace(tzinfo=dt_util.UTC) + + return dt_util.as_utc(ts) + + +@overload +def process_timestamp_to_utc_isoformat(ts: None) -> None: + ... + + +@overload +def process_timestamp_to_utc_isoformat(ts: datetime) -> str: + ... + + +def process_timestamp_to_utc_isoformat(ts: datetime | None) -> str | None: + """Process a timestamp into UTC isotime.""" + if ts is None: + return None + if ts.tzinfo == dt_util.UTC: + return ts.isoformat() + if ts.tzinfo is None: + return f"{ts.isoformat()}{DB_TIMEZONE}" + return ts.astimezone(dt_util.UTC).isoformat() + + +class LazyState(State): + """A lazy version of core State.""" + + __slots__ = [ + "_row", + "entity_id", + "state", + "_attributes", + "_last_changed", + "_last_updated", + "_context", + ] + + def __init__(self, row): # pylint: disable=super-init-not-called + """Init the lazy state.""" + self._row = row + self.entity_id = self._row.entity_id + self.state = self._row.state or "" + self._attributes = None + self._last_changed = None + self._last_updated = None + self._context = None + + @property # type: ignore + def attributes(self): + """State attributes.""" + if not self._attributes: + try: + self._attributes = json.loads(self._row.attributes) + except ValueError: + # When json.loads fails + _LOGGER.exception("Error converting row to state: %s", self._row) + self._attributes = {} + return self._attributes + + @attributes.setter + def attributes(self, value): + """Set attributes.""" + self._attributes = value + + @property # type: ignore + def context(self): + """State context.""" + if not self._context: + self._context = Context(id=None) + return self._context + + @context.setter + def context(self, value): + """Set context.""" + self._context = value + + @property # type: ignore + def last_changed(self): + """Last changed datetime.""" + if not self._last_changed: + self._last_changed = process_timestamp(self._row.last_changed) + return self._last_changed + + @last_changed.setter + def last_changed(self, value): + """Set last changed datetime.""" + self._last_changed = value + + @property # type: ignore + def last_updated(self): + """Last updated datetime.""" + if not self._last_updated: + self._last_updated = process_timestamp(self._row.last_updated) + return self._last_updated + + @last_updated.setter + def last_updated(self, value): + """Set last updated datetime.""" + self._last_updated = value + + def as_dict(self): + """Return a dict representation of the LazyState. + + Async friendly. + + To be used for JSON serialization. + """ + if self._last_changed: + last_changed_isoformat = self._last_changed.isoformat() + else: + last_changed_isoformat = process_timestamp_to_utc_isoformat( + self._row.last_changed + ) + if self._last_updated: + last_updated_isoformat = self._last_updated.isoformat() + else: + last_updated_isoformat = process_timestamp_to_utc_isoformat( + self._row.last_updated + ) + return { + "entity_id": self.entity_id, + "state": self.state, + "attributes": self._attributes or self.attributes, + "last_changed": last_changed_isoformat, + "last_updated": last_updated_isoformat, + } + + def __eq__(self, other): + """Return the comparison.""" + return ( + other.__class__ in [self.__class__, State] + and self.entity_id == other.entity_id + and self.state == other.state + and self.attributes == other.attributes + ) diff --git a/tests/components/recorder/test_statistics_v23_migration.py b/tests/components/recorder/test_statistics_v23_migration.py new file mode 100644 index 00000000000..d487743a87f --- /dev/null +++ b/tests/components/recorder/test_statistics_v23_migration.py @@ -0,0 +1,618 @@ +"""The tests for sensor recorder platform migrating statistics from v23. + +The v23 schema used for these tests has been slightly modified to add the +EventData table to allow the recorder to startup successfully. +""" +# pylint: disable=protected-access,invalid-name +import importlib +import json +import sys +from unittest.mock import patch + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +from homeassistant.components import recorder +from homeassistant.components.recorder import SQLITE_URL_PREFIX, statistics +from homeassistant.components.recorder.util import session_scope +from homeassistant.setup import setup_component +import homeassistant.util.dt as dt_util + +from tests.common import get_test_home_assistant +from tests.components.recorder.common import wait_recording_done + +ORIG_TZ = dt_util.DEFAULT_TIME_ZONE + +CREATE_ENGINE_TARGET = "homeassistant.components.recorder.core.create_engine" +SCHEMA_MODULE = "tests.components.recorder.models_schema_23_with_newer_columns" + + +def _create_engine_test(*args, **kwargs): + """Test version of create_engine that initializes with old schema. + + This simulates an existing db with the old schema. + """ + importlib.import_module(SCHEMA_MODULE) + old_models = sys.modules[SCHEMA_MODULE] + engine = create_engine(*args, **kwargs) + old_models.Base.metadata.create_all(engine) + with Session(engine) as session: + session.add(recorder.models.StatisticsRuns(start=statistics.get_start_time())) + session.add( + recorder.models.SchemaChanges(schema_version=old_models.SCHEMA_VERSION) + ) + session.commit() + return engine + + +def test_delete_duplicates(caplog, tmpdir): + """Test removal of duplicated statistics.""" + test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") + dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}" + + importlib.import_module(SCHEMA_MODULE) + old_models = sys.modules[SCHEMA_MODULE] + + period1 = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00")) + period2 = dt_util.as_utc(dt_util.parse_datetime("2021-09-30 23:00:00")) + period3 = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + period4 = dt_util.as_utc(dt_util.parse_datetime("2021-10-31 23:00:00")) + + external_energy_statistics_1 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 2, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 3, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 4, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + }, + ) + external_energy_metadata_1 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_1", + "unit_of_measurement": "kWh", + } + external_energy_statistics_2 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 20, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 30, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 40, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + ) + external_energy_metadata_2 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_2", + "unit_of_measurement": "kWh", + } + external_co2_statistics = ( + { + "start": period1, + "last_reset": None, + "mean": 10, + }, + { + "start": period2, + "last_reset": None, + "mean": 30, + }, + { + "start": period3, + "last_reset": None, + "mean": 60, + }, + { + "start": period4, + "last_reset": None, + "mean": 90, + }, + ) + external_co2_metadata = { + "has_mean": True, + "has_sum": False, + "name": "Fossil percentage", + "source": "test", + "statistic_id": "test:fossil_percentage", + "unit_of_measurement": "%", + } + + # Create some duplicated statistics with schema version 23 + with patch.object(recorder, "models", old_models), patch.object( + recorder.migration, "SCHEMA_VERSION", old_models.SCHEMA_VERSION + ), patch(CREATE_ENGINE_TARGET, new=_create_engine_test): + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + wait_recording_done(hass) + wait_recording_done(hass) + + with session_scope(hass=hass) as session: + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_1) + ) + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_2) + ) + session.add(recorder.models.StatisticsMeta.from_meta(external_co2_metadata)) + with session_scope(hass=hass) as session: + for stat in external_energy_statistics_1: + session.add(recorder.models.Statistics.from_stats(1, stat)) + for stat in external_energy_statistics_2: + session.add(recorder.models.Statistics.from_stats(2, stat)) + for stat in external_co2_statistics: + session.add(recorder.models.Statistics.from_stats(3, stat)) + + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + # Test that the duplicates are removed during migration from schema 23 + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + hass.start() + wait_recording_done(hass) + wait_recording_done(hass) + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + assert "Deleted 2 duplicated statistics rows" in caplog.text + assert "Found non identical" not in caplog.text + assert "Found duplicated" not in caplog.text + + +def test_delete_duplicates_many(caplog, tmpdir): + """Test removal of duplicated statistics.""" + test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") + dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}" + + importlib.import_module(SCHEMA_MODULE) + old_models = sys.modules[SCHEMA_MODULE] + + period1 = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00")) + period2 = dt_util.as_utc(dt_util.parse_datetime("2021-09-30 23:00:00")) + period3 = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + period4 = dt_util.as_utc(dt_util.parse_datetime("2021-10-31 23:00:00")) + + external_energy_statistics_1 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 2, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 3, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 4, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + }, + ) + external_energy_metadata_1 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_1", + "unit_of_measurement": "kWh", + } + external_energy_statistics_2 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 20, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 30, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 40, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + ) + external_energy_metadata_2 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_2", + "unit_of_measurement": "kWh", + } + external_co2_statistics = ( + { + "start": period1, + "last_reset": None, + "mean": 10, + }, + { + "start": period2, + "last_reset": None, + "mean": 30, + }, + { + "start": period3, + "last_reset": None, + "mean": 60, + }, + { + "start": period4, + "last_reset": None, + "mean": 90, + }, + ) + external_co2_metadata = { + "has_mean": True, + "has_sum": False, + "name": "Fossil percentage", + "source": "test", + "statistic_id": "test:fossil_percentage", + "unit_of_measurement": "%", + } + + # Create some duplicated statistics with schema version 23 + with patch.object(recorder, "models", old_models), patch.object( + recorder.migration, "SCHEMA_VERSION", old_models.SCHEMA_VERSION + ), patch(CREATE_ENGINE_TARGET, new=_create_engine_test): + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + wait_recording_done(hass) + wait_recording_done(hass) + + with session_scope(hass=hass) as session: + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_1) + ) + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_2) + ) + session.add(recorder.models.StatisticsMeta.from_meta(external_co2_metadata)) + with session_scope(hass=hass) as session: + for stat in external_energy_statistics_1: + session.add(recorder.models.Statistics.from_stats(1, stat)) + for _ in range(3000): + session.add( + recorder.models.Statistics.from_stats( + 1, external_energy_statistics_1[-1] + ) + ) + for stat in external_energy_statistics_2: + session.add(recorder.models.Statistics.from_stats(2, stat)) + for stat in external_co2_statistics: + session.add(recorder.models.Statistics.from_stats(3, stat)) + + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + # Test that the duplicates are removed during migration from schema 23 + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + hass.start() + wait_recording_done(hass) + wait_recording_done(hass) + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + assert "Deleted 3002 duplicated statistics rows" in caplog.text + assert "Found non identical" not in caplog.text + assert "Found duplicated" not in caplog.text + + +@pytest.mark.freeze_time("2021-08-01 00:00:00+00:00") +def test_delete_duplicates_non_identical(caplog, tmpdir): + """Test removal of duplicated statistics.""" + test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") + dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}" + + importlib.import_module(SCHEMA_MODULE) + old_models = sys.modules[SCHEMA_MODULE] + + period1 = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00")) + period2 = dt_util.as_utc(dt_util.parse_datetime("2021-09-30 23:00:00")) + period3 = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + period4 = dt_util.as_utc(dt_util.parse_datetime("2021-10-31 23:00:00")) + + external_energy_statistics_1 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 2, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 3, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 4, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 6, + }, + ) + external_energy_metadata_1 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_1", + "unit_of_measurement": "kWh", + } + external_energy_statistics_2 = ( + { + "start": period1, + "last_reset": None, + "state": 0, + "sum": 20, + }, + { + "start": period2, + "last_reset": None, + "state": 1, + "sum": 30, + }, + { + "start": period3, + "last_reset": None, + "state": 2, + "sum": 40, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 50, + }, + ) + external_energy_metadata_2 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_2", + "unit_of_measurement": "kWh", + } + + # Create some duplicated statistics with schema version 23 + with patch.object(recorder, "models", old_models), patch.object( + recorder.migration, "SCHEMA_VERSION", old_models.SCHEMA_VERSION + ), patch(CREATE_ENGINE_TARGET, new=_create_engine_test): + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + wait_recording_done(hass) + wait_recording_done(hass) + + with session_scope(hass=hass) as session: + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_1) + ) + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_2) + ) + with session_scope(hass=hass) as session: + for stat in external_energy_statistics_1: + session.add(recorder.models.Statistics.from_stats(1, stat)) + for stat in external_energy_statistics_2: + session.add(recorder.models.Statistics.from_stats(2, stat)) + + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + # Test that the duplicates are removed during migration from schema 23 + hass = get_test_home_assistant() + hass.config.config_dir = tmpdir + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + hass.start() + wait_recording_done(hass) + wait_recording_done(hass) + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + assert "Deleted 2 duplicated statistics rows" in caplog.text + assert "Deleted 1 non identical" in caplog.text + assert "Found duplicated" not in caplog.text + + isotime = dt_util.utcnow().isoformat() + backup_file_name = f".storage/deleted_statistics.{isotime}.json" + + with open(hass.config.path(backup_file_name)) as backup_file: + backup = json.load(backup_file) + + assert backup == [ + { + "duplicate": { + "created": "2021-08-01T00:00:00", + "id": 4, + "last_reset": None, + "max": None, + "mean": None, + "metadata_id": 1, + "min": None, + "start": "2021-10-31T23:00:00", + "state": 3.0, + "sum": 5.0, + }, + "original": { + "created": "2021-08-01T00:00:00", + "id": 5, + "last_reset": None, + "max": None, + "mean": None, + "metadata_id": 1, + "min": None, + "start": "2021-10-31T23:00:00", + "state": 3.0, + "sum": 6.0, + }, + } + ] + + +def test_delete_duplicates_short_term(caplog, tmpdir): + """Test removal of duplicated statistics.""" + test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") + dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}" + + importlib.import_module(SCHEMA_MODULE) + old_models = sys.modules[SCHEMA_MODULE] + + period4 = dt_util.as_utc(dt_util.parse_datetime("2021-10-31 23:00:00")) + + external_energy_metadata_1 = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy", + "source": "test", + "statistic_id": "test:total_energy_import_tariff_1", + "unit_of_measurement": "kWh", + } + statistic_row = { + "start": period4, + "last_reset": None, + "state": 3, + "sum": 5, + } + + # Create some duplicated statistics with schema version 23 + with patch.object(recorder, "models", old_models), patch.object( + recorder.migration, "SCHEMA_VERSION", old_models.SCHEMA_VERSION + ), patch(CREATE_ENGINE_TARGET, new=_create_engine_test): + hass = get_test_home_assistant() + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + wait_recording_done(hass) + wait_recording_done(hass) + + with session_scope(hass=hass) as session: + session.add( + recorder.models.StatisticsMeta.from_meta(external_energy_metadata_1) + ) + with session_scope(hass=hass) as session: + session.add( + recorder.models.StatisticsShortTerm.from_stats(1, statistic_row) + ) + session.add( + recorder.models.StatisticsShortTerm.from_stats(1, statistic_row) + ) + + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + # Test that the duplicates are removed during migration from schema 23 + hass = get_test_home_assistant() + hass.config.config_dir = tmpdir + setup_component(hass, "recorder", {"recorder": {"db_url": dburl}}) + hass.start() + wait_recording_done(hass) + wait_recording_done(hass) + hass.stop() + dt_util.DEFAULT_TIME_ZONE = ORIG_TZ + + assert "duplicated statistics rows" not in caplog.text + assert "Found non identical" not in caplog.text + assert "Deleted duplicated short term statistic" in caplog.text