Remove limit of amount of duplicated statistics (#65641)

This commit is contained in:
Erik Montnemery 2022-02-04 14:49:45 +01:00 committed by Paulus Schoutsen
parent 67a9932c5c
commit 4e3cd1471a
2 changed files with 0 additions and 153 deletions

View File

@ -119,8 +119,6 @@ QUERY_STATISTIC_META_ID = [
StatisticsMeta.statistic_id, StatisticsMeta.statistic_id,
] ]
MAX_DUPLICATES = 1000000
STATISTICS_BAKERY = "recorder_statistics_bakery" STATISTICS_BAKERY = "recorder_statistics_bakery"
STATISTICS_META_BAKERY = "recorder_statistics_meta_bakery" STATISTICS_META_BAKERY = "recorder_statistics_meta_bakery"
STATISTICS_SHORT_TERM_BAKERY = "recorder_statistics_short_term_bakery" STATISTICS_SHORT_TERM_BAKERY = "recorder_statistics_short_term_bakery"
@ -351,8 +349,6 @@ def _delete_duplicates_from_table(
.delete(synchronize_session=False) .delete(synchronize_session=False)
) )
total_deleted_rows += deleted_rows total_deleted_rows += deleted_rows
if total_deleted_rows >= MAX_DUPLICATES:
break
return (total_deleted_rows, all_non_identical_duplicates) return (total_deleted_rows, all_non_identical_duplicates)
@ -389,13 +385,6 @@ def delete_duplicates(instance: Recorder, session: scoped_session) -> None:
backup_path, backup_path,
) )
if deleted_statistics_rows >= MAX_DUPLICATES:
_LOGGER.warning(
"Found more than %s duplicated statistic rows, please report at "
'https://github.com/home-assistant/core/issues?q=is%%3Aissue+label%%3A"integration%%3A+recorder"+',
MAX_DUPLICATES - 1,
)
deleted_short_term_statistics_rows, _ = _delete_duplicates_from_table( deleted_short_term_statistics_rows, _ = _delete_duplicates_from_table(
session, StatisticsShortTerm session, StatisticsShortTerm
) )

View File

@ -852,7 +852,6 @@ def test_delete_duplicates(caplog, tmpdir):
assert "Deleted 2 duplicated statistics rows" in caplog.text assert "Deleted 2 duplicated statistics rows" in caplog.text
assert "Found non identical" not in caplog.text assert "Found non identical" not in caplog.text
assert "Found more than" not in caplog.text
assert "Found duplicated" not in caplog.text assert "Found duplicated" not in caplog.text
@ -989,7 +988,6 @@ def test_delete_duplicates_non_identical(caplog, tmpdir):
assert "Deleted 2 duplicated statistics rows" in caplog.text assert "Deleted 2 duplicated statistics rows" in caplog.text
assert "Deleted 1 non identical" in caplog.text assert "Deleted 1 non identical" in caplog.text
assert "Found more than" not in caplog.text
assert "Found duplicated" not in caplog.text assert "Found duplicated" not in caplog.text
isotime = dt_util.utcnow().isoformat() isotime = dt_util.utcnow().isoformat()
@ -1028,144 +1026,6 @@ def test_delete_duplicates_non_identical(caplog, tmpdir):
] ]
@patch.object(statistics, "MAX_DUPLICATES", 2)
def test_delete_duplicates_too_many(caplog, tmpdir):
"""Test removal of duplicated statistics."""
test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db")
dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}"
module = "tests.components.recorder.models_schema_23"
importlib.import_module(module)
old_models = sys.modules[module]
period1 = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00"))
period2 = dt_util.as_utc(dt_util.parse_datetime("2021-09-30 23:00:00"))
period3 = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
period4 = dt_util.as_utc(dt_util.parse_datetime("2021-10-31 23:00:00"))
external_energy_statistics_1 = (
{
"start": period1,
"last_reset": None,
"state": 0,
"sum": 2,
},
{
"start": period2,
"last_reset": None,
"state": 1,
"sum": 3,
},
{
"start": period3,
"last_reset": None,
"state": 2,
"sum": 4,
},
{
"start": period4,
"last_reset": None,
"state": 3,
"sum": 5,
},
{
"start": period4,
"last_reset": None,
"state": 3,
"sum": 5,
},
)
external_energy_metadata_1 = {
"has_mean": False,
"has_sum": True,
"name": "Total imported energy",
"source": "test",
"statistic_id": "test:total_energy_import_tariff_1",
"unit_of_measurement": "kWh",
}
external_energy_statistics_2 = (
{
"start": period1,
"last_reset": None,
"state": 0,
"sum": 20,
},
{
"start": period2,
"last_reset": None,
"state": 1,
"sum": 30,
},
{
"start": period3,
"last_reset": None,
"state": 2,
"sum": 40,
},
{
"start": period4,
"last_reset": None,
"state": 3,
"sum": 50,
},
{
"start": period4,
"last_reset": None,
"state": 3,
"sum": 50,
},
)
external_energy_metadata_2 = {
"has_mean": False,
"has_sum": True,
"name": "Total imported energy",
"source": "test",
"statistic_id": "test:total_energy_import_tariff_2",
"unit_of_measurement": "kWh",
}
# Create some duplicated statistics with schema version 23
with patch.object(recorder, "models", old_models), patch.object(
recorder.migration, "SCHEMA_VERSION", old_models.SCHEMA_VERSION
), patch(
"homeassistant.components.recorder.create_engine", new=_create_engine_test
):
hass = get_test_home_assistant()
setup_component(hass, "recorder", {"recorder": {"db_url": dburl}})
wait_recording_done(hass)
wait_recording_done(hass)
with session_scope(hass=hass) as session:
session.add(
recorder.models.StatisticsMeta.from_meta(external_energy_metadata_1)
)
session.add(
recorder.models.StatisticsMeta.from_meta(external_energy_metadata_2)
)
with session_scope(hass=hass) as session:
for stat in external_energy_statistics_1:
session.add(recorder.models.Statistics.from_stats(1, stat))
for stat in external_energy_statistics_2:
session.add(recorder.models.Statistics.from_stats(2, stat))
hass.stop()
# Test that the duplicates are removed during migration from schema 23
hass = get_test_home_assistant()
hass.config.config_dir = tmpdir
setup_component(hass, "recorder", {"recorder": {"db_url": dburl}})
hass.start()
wait_recording_done(hass)
wait_recording_done(hass)
hass.stop()
assert "Deleted 2 duplicated statistics rows" in caplog.text
assert "Found non identical" not in caplog.text
assert "Found more than 1 duplicated statistic rows" in caplog.text
assert "Found duplicated" not in caplog.text
@patch.object(statistics, "MAX_DUPLICATES", 2)
def test_delete_duplicates_short_term(caplog, tmpdir): def test_delete_duplicates_short_term(caplog, tmpdir):
"""Test removal of duplicated statistics.""" """Test removal of duplicated statistics."""
test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db") test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db")
@ -1228,7 +1088,6 @@ def test_delete_duplicates_short_term(caplog, tmpdir):
assert "duplicated statistics rows" not in caplog.text assert "duplicated statistics rows" not in caplog.text
assert "Found non identical" not in caplog.text assert "Found non identical" not in caplog.text
assert "Found more than" not in caplog.text
assert "Deleted duplicated short term statistic" in caplog.text assert "Deleted duplicated short term statistic" in caplog.text
@ -1240,7 +1099,6 @@ def test_delete_duplicates_no_duplicates(hass_recorder, caplog):
delete_duplicates(hass.data[DATA_INSTANCE], session) delete_duplicates(hass.data[DATA_INSTANCE], session)
assert "duplicated statistics rows" not in caplog.text assert "duplicated statistics rows" not in caplog.text
assert "Found non identical" not in caplog.text assert "Found non identical" not in caplog.text
assert "Found more than" not in caplog.text
assert "Found duplicated" not in caplog.text assert "Found duplicated" not in caplog.text