Fix statistics schema auto repair when there is bad data (#89903)

- If the user had previously duplicated data we could end up
  picking the next metadata_id and there could be stale rows
  in the database that have that metadata_id. This can only happen
  from bad manual migrations (which is what this is function
  is validating in the first place). To solve this we now insert
  data with a future date and look at the latest inserted row
  instead of the first.

Example
```
['stored_statistics',
  defaultdict(<class 'list'>,
              {'recorder.db_test_schema': [{'end': 948589200.0,
                                            'last_reset': None,
                                            'max': None,
                                            'mean': 2021.0,
                                            'min': None,
                                            'start': 948585600.0,
                                            'state': None,
                                            'sum': 394.5068},
                                          {'end': 1601946000.000001,
                                            'last_reset': 1601942400.000001,
                                            'max': 1.000000000000001,
                                            'mean': 1.000000000000001,
                                            'min': 1.000000000000001,
                                            'start': 1601942400.000001,
                                            'state': 1.000000000000001,
                                            'sum': 1.000000000000001}]})]
```
This commit is contained in:
J. Nick Koston 2023-03-19 18:06:23 -10:00 committed by GitHub
parent 9721ba59b6
commit e798c30b8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 6 deletions

View File

@ -2527,6 +2527,11 @@ def _validate_db_schema_utf8(
return schema_errors
def _get_future_year() -> int:
"""Get a year in the future."""
return datetime.now().year + 1
def _validate_db_schema(
hass: HomeAssistant, instance: Recorder, session_maker: Callable[[], Session]
) -> set[str]:
@ -2544,9 +2549,16 @@ def _validate_db_schema(
# This number can't be accurately represented as a 32-bit float
precise_number = 1.000000000000001
# This time can't be accurately represented unless datetimes have µs precision
precise_time = datetime(2020, 10, 6, microsecond=1, tzinfo=dt_util.UTC)
start_time = datetime(2020, 10, 6, tzinfo=dt_util.UTC)
#
# We want to insert statistics for a time in the future, in case they
# have conflicting metadata_id's with existing statistics that were
# never cleaned up. By inserting in the future, we can be sure that
# that by selecting the last inserted row, we will get the one we
# just inserted.
#
future_year = _get_future_year()
precise_time = datetime(future_year, 10, 6, microsecond=1, tzinfo=dt_util.UTC)
start_time = datetime(future_year, 10, 6, tzinfo=dt_util.UTC)
statistic_id = f"{DOMAIN}.db_test"
metadata: StatisticMetaData = {
@ -2614,9 +2626,15 @@ def _validate_db_schema(
)
continue
# We want to look at the last inserted row to make sure there
# is not previous garbage data in the table that would cause
# the test to produce an incorrect result. To achieve this,
# we inserted a row in the future, and now we select the last
# inserted row back.
last_stored_statistic = stored_statistic[-1]
check_columns(
schema_errors,
stored_statistic[0],
last_stored_statistic,
statistics,
("max", "mean", "min", "state", "sum"),
table.__tablename__,
@ -2625,7 +2643,7 @@ def _validate_db_schema(
assert statistics["last_reset"]
check_columns(
schema_errors,
stored_statistic[0],
last_stored_statistic,
{
"last_reset": datetime_to_timestamp_or_none(
statistics["last_reset"]

View File

@ -26,6 +26,7 @@ from homeassistant.components.recorder.statistics import (
_generate_max_mean_min_statistic_in_sub_period_stmt,
_generate_statistics_at_time_stmt,
_generate_statistics_during_period_stmt,
_get_future_year,
_statistics_during_period_with_session,
async_add_external_statistics,
async_import_statistics,
@ -1633,7 +1634,8 @@ async def test_validate_db_schema_fix_float_issue(
orig_error = MagicMock()
orig_error.args = [1366]
precise_number = 1.000000000000001
precise_time = datetime(2020, 10, 6, microsecond=1, tzinfo=dt_util.UTC)
fixed_future_year = _get_future_year()
precise_time = datetime(fixed_future_year, 10, 6, microsecond=1, tzinfo=dt_util.UTC)
statistics = {
"recorder.db_test": [
{
@ -1653,6 +1655,9 @@ async def test_validate_db_schema_fix_float_issue(
with patch(
"homeassistant.components.recorder.core.Recorder.dialect_name", db_engine
), patch(
"homeassistant.components.recorder.statistics._get_future_year",
return_value=fixed_future_year,
), patch(
"homeassistant.components.recorder.statistics._statistics_during_period_with_session",
side_effect=fake_statistics,