mirror of
https://github.com/home-assistant/core.git
synced 2025-07-19 03:07:37 +00:00
Cache the latest short term stat id for each metadata_id on each run (#100535)
This commit is contained in:
parent
4b39bf7e5b
commit
c9a55c7f84
@ -24,6 +24,7 @@ import voluptuous as vol
|
|||||||
from homeassistant.const import ATTR_UNIT_OF_MEASUREMENT
|
from homeassistant.const import ATTR_UNIT_OF_MEASUREMENT
|
||||||
from homeassistant.core import HomeAssistant, callback, valid_entity_id
|
from homeassistant.core import HomeAssistant, callback, valid_entity_id
|
||||||
from homeassistant.exceptions import HomeAssistantError
|
from homeassistant.exceptions import HomeAssistantError
|
||||||
|
from homeassistant.helpers.singleton import singleton
|
||||||
from homeassistant.helpers.typing import UNDEFINED, UndefinedType
|
from homeassistant.helpers.typing import UNDEFINED, UndefinedType
|
||||||
from homeassistant.util import dt as dt_util
|
from homeassistant.util import dt as dt_util
|
||||||
from homeassistant.util.unit_conversion import (
|
from homeassistant.util.unit_conversion import (
|
||||||
@ -141,10 +142,39 @@ STATISTIC_UNIT_TO_UNIT_CONVERTER: dict[str | None, type[BaseUnitConverter]] = {
|
|||||||
**{unit: VolumeConverter for unit in VolumeConverter.VALID_UNITS},
|
**{unit: VolumeConverter for unit in VolumeConverter.VALID_UNITS},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DATA_SHORT_TERM_STATISTICS_RUN_CACHE = "recorder_short_term_statistics_run_cache"
|
||||||
|
|
||||||
|
|
||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(slots=True)
|
||||||
|
class ShortTermStatisticsRunCache:
|
||||||
|
"""Cache for short term statistics runs."""
|
||||||
|
|
||||||
|
# This is a mapping of metadata_id:id of the last short term
|
||||||
|
# statistics run for each metadata_id
|
||||||
|
_latest_id_by_metadata_id: dict[int, int] = dataclasses.field(default_factory=dict)
|
||||||
|
|
||||||
|
def get_latest_ids(self, metadata_ids: set[int]) -> dict[int, int]:
|
||||||
|
"""Return the latest short term statistics ids for the metadata_ids."""
|
||||||
|
return {
|
||||||
|
metadata_id: id_
|
||||||
|
for metadata_id, id_ in self._latest_id_by_metadata_id.items()
|
||||||
|
if metadata_id in metadata_ids
|
||||||
|
}
|
||||||
|
|
||||||
|
def set_latest_id_for_metadata_id(self, metadata_id: int, id_: int) -> None:
|
||||||
|
"""Cache the latest id for the metadata_id."""
|
||||||
|
self._latest_id_by_metadata_id[metadata_id] = id_
|
||||||
|
|
||||||
|
def set_latest_ids_for_metadata_ids(
|
||||||
|
self, metadata_id_to_id: dict[int, int]
|
||||||
|
) -> None:
|
||||||
|
"""Cache the latest id for the each metadata_id."""
|
||||||
|
self._latest_id_by_metadata_id.update(metadata_id_to_id)
|
||||||
|
|
||||||
|
|
||||||
class BaseStatisticsRow(TypedDict, total=False):
|
class BaseStatisticsRow(TypedDict, total=False):
|
||||||
"""A processed row of statistic data."""
|
"""A processed row of statistic data."""
|
||||||
|
|
||||||
@ -508,6 +538,8 @@ def _compile_statistics(
|
|||||||
platform_stats.extend(compiled.platform_stats)
|
platform_stats.extend(compiled.platform_stats)
|
||||||
current_metadata.update(compiled.current_metadata)
|
current_metadata.update(compiled.current_metadata)
|
||||||
|
|
||||||
|
new_short_term_stats: list[StatisticsBase] = []
|
||||||
|
updated_metadata_ids: set[int] = set()
|
||||||
# Insert collected statistics in the database
|
# Insert collected statistics in the database
|
||||||
for stats in platform_stats:
|
for stats in platform_stats:
|
||||||
modified_statistic_id, metadata_id = statistics_meta_manager.update_or_add(
|
modified_statistic_id, metadata_id = statistics_meta_manager.update_or_add(
|
||||||
@ -515,12 +547,14 @@ def _compile_statistics(
|
|||||||
)
|
)
|
||||||
if modified_statistic_id is not None:
|
if modified_statistic_id is not None:
|
||||||
modified_statistic_ids.add(modified_statistic_id)
|
modified_statistic_ids.add(modified_statistic_id)
|
||||||
_insert_statistics(
|
updated_metadata_ids.add(metadata_id)
|
||||||
|
if new_stat := _insert_statistics(
|
||||||
session,
|
session,
|
||||||
StatisticsShortTerm,
|
StatisticsShortTerm,
|
||||||
metadata_id,
|
metadata_id,
|
||||||
stats["stat"],
|
stats["stat"],
|
||||||
)
|
):
|
||||||
|
new_short_term_stats.append(new_stat)
|
||||||
|
|
||||||
if start.minute == 55:
|
if start.minute == 55:
|
||||||
# A full hour is ready, summarize it
|
# A full hour is ready, summarize it
|
||||||
@ -533,6 +567,23 @@ def _compile_statistics(
|
|||||||
if start.minute == 55:
|
if start.minute == 55:
|
||||||
instance.hass.bus.fire(EVENT_RECORDER_HOURLY_STATISTICS_GENERATED)
|
instance.hass.bus.fire(EVENT_RECORDER_HOURLY_STATISTICS_GENERATED)
|
||||||
|
|
||||||
|
if updated_metadata_ids:
|
||||||
|
# These are always the newest statistics, so we can update
|
||||||
|
# the run cache without having to check the start_ts.
|
||||||
|
session.flush() # populate the ids of the new StatisticsShortTerm rows
|
||||||
|
run_cache = get_short_term_statistics_run_cache(instance.hass)
|
||||||
|
# metadata_id is typed to allow None, but we know it's not None here
|
||||||
|
# so we can safely cast it to int.
|
||||||
|
run_cache.set_latest_ids_for_metadata_ids(
|
||||||
|
cast(
|
||||||
|
dict[int, int],
|
||||||
|
{
|
||||||
|
new_stat.metadata_id: new_stat.id
|
||||||
|
for new_stat in new_short_term_stats
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return modified_statistic_ids
|
return modified_statistic_ids
|
||||||
|
|
||||||
|
|
||||||
@ -566,16 +617,19 @@ def _insert_statistics(
|
|||||||
table: type[StatisticsBase],
|
table: type[StatisticsBase],
|
||||||
metadata_id: int,
|
metadata_id: int,
|
||||||
statistic: StatisticData,
|
statistic: StatisticData,
|
||||||
) -> None:
|
) -> StatisticsBase | None:
|
||||||
"""Insert statistics in the database."""
|
"""Insert statistics in the database."""
|
||||||
try:
|
try:
|
||||||
session.add(table.from_stats(metadata_id, statistic))
|
stat = table.from_stats(metadata_id, statistic)
|
||||||
|
session.add(stat)
|
||||||
|
return stat
|
||||||
except SQLAlchemyError:
|
except SQLAlchemyError:
|
||||||
_LOGGER.exception(
|
_LOGGER.exception(
|
||||||
"Unexpected exception when inserting statistics %s:%s ",
|
"Unexpected exception when inserting statistics %s:%s ",
|
||||||
metadata_id,
|
metadata_id,
|
||||||
statistic,
|
statistic,
|
||||||
)
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _update_statistics(
|
def _update_statistics(
|
||||||
@ -1809,24 +1863,26 @@ def get_last_short_term_statistics(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _latest_short_term_statistics_stmt(
|
def get_latest_short_term_statistics_by_ids(
|
||||||
metadata_ids: list[int],
|
session: Session, ids: Iterable[int]
|
||||||
|
) -> list[Row]:
|
||||||
|
"""Return the latest short term statistics for a list of ids."""
|
||||||
|
stmt = _latest_short_term_statistics_by_ids_stmt(ids)
|
||||||
|
return list(
|
||||||
|
cast(
|
||||||
|
Sequence[Row],
|
||||||
|
execute_stmt_lambda_element(session, stmt, orm_rows=False),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _latest_short_term_statistics_by_ids_stmt(
|
||||||
|
ids: Iterable[int],
|
||||||
) -> StatementLambdaElement:
|
) -> StatementLambdaElement:
|
||||||
"""Create the statement for finding the latest short term stat rows."""
|
"""Create the statement for finding the latest short term stat rows by id."""
|
||||||
return lambda_stmt(
|
return lambda_stmt(
|
||||||
lambda: select(*QUERY_STATISTICS_SHORT_TERM).join(
|
lambda: select(*QUERY_STATISTICS_SHORT_TERM).filter(
|
||||||
(
|
StatisticsShortTerm.id.in_(ids)
|
||||||
most_recent_statistic_row := (
|
|
||||||
select(
|
|
||||||
StatisticsShortTerm.metadata_id,
|
|
||||||
func.max(StatisticsShortTerm.start_ts).label("start_max"),
|
|
||||||
)
|
|
||||||
.where(StatisticsShortTerm.metadata_id.in_(metadata_ids))
|
|
||||||
.group_by(StatisticsShortTerm.metadata_id)
|
|
||||||
).subquery()
|
|
||||||
),
|
|
||||||
(StatisticsShortTerm.metadata_id == most_recent_statistic_row.c.metadata_id)
|
|
||||||
& (StatisticsShortTerm.start_ts == most_recent_statistic_row.c.start_max),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1846,11 +1902,38 @@ def get_latest_short_term_statistics(
|
|||||||
)
|
)
|
||||||
if not metadata:
|
if not metadata:
|
||||||
return {}
|
return {}
|
||||||
metadata_ids = _extract_metadata_and_discard_impossible_columns(metadata, types)
|
metadata_ids = set(
|
||||||
stmt = _latest_short_term_statistics_stmt(metadata_ids)
|
_extract_metadata_and_discard_impossible_columns(metadata, types)
|
||||||
stats = cast(
|
|
||||||
Sequence[Row], execute_stmt_lambda_element(session, stmt, orm_rows=False)
|
|
||||||
)
|
)
|
||||||
|
run_cache = get_short_term_statistics_run_cache(hass)
|
||||||
|
# Try to find the latest short term statistics ids for the metadata_ids
|
||||||
|
# from the run cache first if we have it. If the run cache references
|
||||||
|
# a non-existent id because of a purge, we will detect it missing in the
|
||||||
|
# next step and run a query to re-populate the cache.
|
||||||
|
stats: list[Row] = []
|
||||||
|
if metadata_id_to_id := run_cache.get_latest_ids(metadata_ids):
|
||||||
|
stats = get_latest_short_term_statistics_by_ids(
|
||||||
|
session, metadata_id_to_id.values()
|
||||||
|
)
|
||||||
|
# If we are missing some metadata_ids in the run cache, we need run a query
|
||||||
|
# to populate the cache for each metadata_id, and then run another query
|
||||||
|
# to get the latest short term statistics for the missing metadata_ids.
|
||||||
|
if (missing_metadata_ids := metadata_ids - set(metadata_id_to_id)) and (
|
||||||
|
found_latest_ids := {
|
||||||
|
latest_id
|
||||||
|
for metadata_id in missing_metadata_ids
|
||||||
|
if (
|
||||||
|
latest_id := cache_latest_short_term_statistic_id_for_metadata_id(
|
||||||
|
run_cache, session, metadata_id
|
||||||
|
)
|
||||||
|
)
|
||||||
|
is not None
|
||||||
|
}
|
||||||
|
):
|
||||||
|
stats.extend(
|
||||||
|
get_latest_short_term_statistics_by_ids(session, found_latest_ids)
|
||||||
|
)
|
||||||
|
|
||||||
if not stats:
|
if not stats:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@ -2221,9 +2304,77 @@ def _import_statistics_with_session(
|
|||||||
else:
|
else:
|
||||||
_insert_statistics(session, table, metadata_id, stat)
|
_insert_statistics(session, table, metadata_id, stat)
|
||||||
|
|
||||||
|
if table != StatisticsShortTerm:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# We just inserted new short term statistics, so we need to update the
|
||||||
|
# ShortTermStatisticsRunCache with the latest id for the metadata_id
|
||||||
|
run_cache = get_short_term_statistics_run_cache(instance.hass)
|
||||||
|
cache_latest_short_term_statistic_id_for_metadata_id(
|
||||||
|
run_cache, session, metadata_id
|
||||||
|
)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
@singleton(DATA_SHORT_TERM_STATISTICS_RUN_CACHE)
|
||||||
|
def get_short_term_statistics_run_cache(
|
||||||
|
hass: HomeAssistant,
|
||||||
|
) -> ShortTermStatisticsRunCache:
|
||||||
|
"""Get the short term statistics run cache."""
|
||||||
|
return ShortTermStatisticsRunCache()
|
||||||
|
|
||||||
|
|
||||||
|
def cache_latest_short_term_statistic_id_for_metadata_id(
|
||||||
|
run_cache: ShortTermStatisticsRunCache, session: Session, metadata_id: int
|
||||||
|
) -> int | None:
|
||||||
|
"""Cache the latest short term statistic for a given metadata_id.
|
||||||
|
|
||||||
|
Returns the id of the latest short term statistic for the metadata_id
|
||||||
|
that was added to the cache, or None if no latest short term statistic
|
||||||
|
was found for the metadata_id.
|
||||||
|
"""
|
||||||
|
if latest := cast(
|
||||||
|
Sequence[Row],
|
||||||
|
execute_stmt_lambda_element(
|
||||||
|
session,
|
||||||
|
_find_latest_short_term_statistic_for_metadata_id_stmt(metadata_id),
|
||||||
|
orm_rows=False,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
id_: int = latest[0].id
|
||||||
|
run_cache.set_latest_id_for_metadata_id(metadata_id, id_)
|
||||||
|
return id_
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _find_latest_short_term_statistic_for_metadata_id_stmt(
|
||||||
|
metadata_id: int,
|
||||||
|
) -> StatementLambdaElement:
|
||||||
|
"""Create a statement to find the latest short term statistics for a metadata_id."""
|
||||||
|
#
|
||||||
|
# This code only looks up one row, and should not be refactored to
|
||||||
|
# lookup multiple using func.max
|
||||||
|
# or similar, as that will cause the query to be significantly slower
|
||||||
|
# for DBMs such as PostgreSQL that will have to do a full scan
|
||||||
|
#
|
||||||
|
# For PostgreSQL a combined query plan looks like:
|
||||||
|
# (actual time=2.218..893.909 rows=170531 loops=1)
|
||||||
|
#
|
||||||
|
# For PostgreSQL a separate query plan looks like:
|
||||||
|
# (actual time=0.301..0.301 rows=1 loops=1)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
return lambda_stmt(
|
||||||
|
lambda: select(
|
||||||
|
StatisticsShortTerm.id,
|
||||||
|
)
|
||||||
|
.where(StatisticsShortTerm.metadata_id == metadata_id)
|
||||||
|
.order_by(StatisticsShortTerm.start_ts.desc())
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@retryable_database_job("statistics")
|
@retryable_database_job("statistics")
|
||||||
def import_statistics(
|
def import_statistics(
|
||||||
instance: Recorder,
|
instance: Recorder,
|
||||||
|
@ -24,6 +24,7 @@ from homeassistant.components.recorder.statistics import (
|
|||||||
get_last_statistics,
|
get_last_statistics,
|
||||||
get_latest_short_term_statistics,
|
get_latest_short_term_statistics,
|
||||||
get_metadata,
|
get_metadata,
|
||||||
|
get_short_term_statistics_run_cache,
|
||||||
list_statistic_ids,
|
list_statistic_ids,
|
||||||
)
|
)
|
||||||
from homeassistant.components.recorder.table_managers.statistics_meta import (
|
from homeassistant.components.recorder.table_managers.statistics_meta import (
|
||||||
@ -176,6 +177,15 @@ def test_compile_hourly_statistics(hass_recorder: Callable[..., HomeAssistant])
|
|||||||
)
|
)
|
||||||
assert stats == {"sensor.test1": [expected_2]}
|
assert stats == {"sensor.test1": [expected_2]}
|
||||||
|
|
||||||
|
# Now wipe the latest_short_term_statistics_ids table and test again
|
||||||
|
# to make sure we can rebuild the missing data
|
||||||
|
run_cache = get_short_term_statistics_run_cache(instance.hass)
|
||||||
|
run_cache._latest_id_by_metadata_id = {}
|
||||||
|
stats = get_latest_short_term_statistics(
|
||||||
|
hass, {"sensor.test1"}, {"last_reset", "max", "mean", "min", "state", "sum"}
|
||||||
|
)
|
||||||
|
assert stats == {"sensor.test1": [expected_2]}
|
||||||
|
|
||||||
metadata = get_metadata(hass, statistic_ids={"sensor.test1"})
|
metadata = get_metadata(hass, statistic_ids={"sensor.test1"})
|
||||||
|
|
||||||
stats = get_latest_short_term_statistics(
|
stats = get_latest_short_term_statistics(
|
||||||
@ -220,6 +230,17 @@ def test_compile_hourly_statistics(hass_recorder: Callable[..., HomeAssistant])
|
|||||||
)
|
)
|
||||||
assert stats == {}
|
assert stats == {}
|
||||||
|
|
||||||
|
# Delete again, and manually wipe the cache since we deleted all the data
|
||||||
|
instance.get_session().query(StatisticsShortTerm).delete()
|
||||||
|
run_cache = get_short_term_statistics_run_cache(instance.hass)
|
||||||
|
run_cache._latest_id_by_metadata_id = {}
|
||||||
|
|
||||||
|
# And test again to make sure there is no data
|
||||||
|
stats = get_latest_short_term_statistics(
|
||||||
|
hass, {"sensor.test1"}, {"last_reset", "max", "mean", "min", "state", "sum"}
|
||||||
|
)
|
||||||
|
assert stats == {}
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def mock_sensor_statistics():
|
def mock_sensor_statistics():
|
||||||
|
@ -15,6 +15,7 @@ from homeassistant.components.recorder.statistics import (
|
|||||||
async_add_external_statistics,
|
async_add_external_statistics,
|
||||||
get_last_statistics,
|
get_last_statistics,
|
||||||
get_metadata,
|
get_metadata,
|
||||||
|
get_short_term_statistics_run_cache,
|
||||||
list_statistic_ids,
|
list_statistic_ids,
|
||||||
)
|
)
|
||||||
from homeassistant.components.recorder.websocket_api import UNIT_SCHEMA
|
from homeassistant.components.recorder.websocket_api import UNIT_SCHEMA
|
||||||
@ -302,6 +303,13 @@ async def test_statistic_during_period(
|
|||||||
)
|
)
|
||||||
await async_wait_recording_done(hass)
|
await async_wait_recording_done(hass)
|
||||||
|
|
||||||
|
metadata = get_metadata(hass, statistic_ids={"sensor.test"})
|
||||||
|
metadata_id = metadata["sensor.test"][0]
|
||||||
|
run_cache = get_short_term_statistics_run_cache(hass)
|
||||||
|
# Verify the import of the short term statistics
|
||||||
|
# also updates the run cache
|
||||||
|
assert run_cache.get_latest_ids({metadata_id}) is not None
|
||||||
|
|
||||||
# No data for this period yet
|
# No data for this period yet
|
||||||
await client.send_json(
|
await client.send_json(
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user