From e798c30b8b2332361951742dc4df12cc355ae596 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Mar 2023 18:06:23 -1000 Subject: [PATCH] Fix statistics schema auto repair when there is bad data (#89903) - If the user had previously duplicated data we could end up picking the next metadata_id and there could be stale rows in the database that have that metadata_id. This can only happen from bad manual migrations (which is what this is function is validating in the first place). To solve this we now insert data with a future date and look at the latest inserted row instead of the first. Example ``` ['stored_statistics', defaultdict(, {'recorder.db_test_schema': [{'end': 948589200.0, 'last_reset': None, 'max': None, 'mean': 2021.0, 'min': None, 'start': 948585600.0, 'state': None, 'sum': 394.5068}, {'end': 1601946000.000001, 'last_reset': 1601942400.000001, 'max': 1.000000000000001, 'mean': 1.000000000000001, 'min': 1.000000000000001, 'start': 1601942400.000001, 'state': 1.000000000000001, 'sum': 1.000000000000001}]})] ``` --- .../components/recorder/statistics.py | 28 +++++++++++++++---- tests/components/recorder/test_statistics.py | 7 ++++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 2f93a8a833e..b117556b0af 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -2527,6 +2527,11 @@ def _validate_db_schema_utf8( return schema_errors +def _get_future_year() -> int: + """Get a year in the future.""" + return datetime.now().year + 1 + + def _validate_db_schema( hass: HomeAssistant, instance: Recorder, session_maker: Callable[[], Session] ) -> set[str]: @@ -2544,9 +2549,16 @@ def _validate_db_schema( # This number can't be accurately represented as a 32-bit float precise_number = 1.000000000000001 # This time can't be accurately represented unless datetimes have µs precision - precise_time = datetime(2020, 10, 6, microsecond=1, tzinfo=dt_util.UTC) - - start_time = datetime(2020, 10, 6, tzinfo=dt_util.UTC) + # + # We want to insert statistics for a time in the future, in case they + # have conflicting metadata_id's with existing statistics that were + # never cleaned up. By inserting in the future, we can be sure that + # that by selecting the last inserted row, we will get the one we + # just inserted. + # + future_year = _get_future_year() + precise_time = datetime(future_year, 10, 6, microsecond=1, tzinfo=dt_util.UTC) + start_time = datetime(future_year, 10, 6, tzinfo=dt_util.UTC) statistic_id = f"{DOMAIN}.db_test" metadata: StatisticMetaData = { @@ -2614,9 +2626,15 @@ def _validate_db_schema( ) continue + # We want to look at the last inserted row to make sure there + # is not previous garbage data in the table that would cause + # the test to produce an incorrect result. To achieve this, + # we inserted a row in the future, and now we select the last + # inserted row back. + last_stored_statistic = stored_statistic[-1] check_columns( schema_errors, - stored_statistic[0], + last_stored_statistic, statistics, ("max", "mean", "min", "state", "sum"), table.__tablename__, @@ -2625,7 +2643,7 @@ def _validate_db_schema( assert statistics["last_reset"] check_columns( schema_errors, - stored_statistic[0], + last_stored_statistic, { "last_reset": datetime_to_timestamp_or_none( statistics["last_reset"] diff --git a/tests/components/recorder/test_statistics.py b/tests/components/recorder/test_statistics.py index 4863c6c0547..ad4d0de410e 100644 --- a/tests/components/recorder/test_statistics.py +++ b/tests/components/recorder/test_statistics.py @@ -26,6 +26,7 @@ from homeassistant.components.recorder.statistics import ( _generate_max_mean_min_statistic_in_sub_period_stmt, _generate_statistics_at_time_stmt, _generate_statistics_during_period_stmt, + _get_future_year, _statistics_during_period_with_session, async_add_external_statistics, async_import_statistics, @@ -1633,7 +1634,8 @@ async def test_validate_db_schema_fix_float_issue( orig_error = MagicMock() orig_error.args = [1366] precise_number = 1.000000000000001 - precise_time = datetime(2020, 10, 6, microsecond=1, tzinfo=dt_util.UTC) + fixed_future_year = _get_future_year() + precise_time = datetime(fixed_future_year, 10, 6, microsecond=1, tzinfo=dt_util.UTC) statistics = { "recorder.db_test": [ { @@ -1653,6 +1655,9 @@ async def test_validate_db_schema_fix_float_issue( with patch( "homeassistant.components.recorder.core.Recorder.dialect_name", db_engine + ), patch( + "homeassistant.components.recorder.statistics._get_future_year", + return_value=fixed_future_year, ), patch( "homeassistant.components.recorder.statistics._statistics_during_period_with_session", side_effect=fake_statistics,