Add support for daily and monthly statistics (#57576)

* Add support for daily and monthly statistics

* Remove debug code

* Format code

* Don't use dateutil package

* Remove 2 TODOs

* Remove TODO

* Add comments
This commit is contained in:
Erik Montnemery 2021-10-19 08:29:23 +02:00 committed by GitHub
parent 6576225c48
commit 9a26a8cfd8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 243 additions and 17 deletions

View File

@ -7,6 +7,7 @@ import dataclasses
from datetime import datetime, timedelta from datetime import datetime, timedelta
from itertools import chain, groupby from itertools import chain, groupby
import logging import logging
from statistics import mean
from typing import TYPE_CHECKING, Any, Literal from typing import TYPE_CHECKING, Any, Literal
from sqlalchemy import bindparam, func from sqlalchemy import bindparam, func
@ -583,13 +584,107 @@ def _statistics_during_period_query(
return baked_query # type: ignore[no-any-return] return baked_query # type: ignore[no-any-return]
def _reduce_statistics(
stats: dict[str, list[dict[str, Any]]],
same_period: Callable[[datetime, datetime], bool],
period_start_end: Callable[[datetime], tuple[datetime, datetime]],
period: timedelta,
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to daily or monthly statistics."""
result: dict[str, list[dict[str, Any]]] = defaultdict(list)
for statistic_id, stat_list in stats.items():
max_values: list[float] = []
mean_values: list[float] = []
min_values: list[float] = []
prev_stat: dict[str, Any] = stat_list[0]
# Loop over the hourly statistics + a fake entry to end the period
for statistic in chain(
stat_list, ({"start": stat_list[-1]["start"] + period},)
):
if not same_period(prev_stat["start"], statistic["start"]):
start, end = period_start_end(prev_stat["start"])
# The previous statistic was the last entry of the period
result[statistic_id].append(
{
"statistic_id": statistic_id,
"start": start.isoformat(),
"end": end.isoformat(),
"mean": mean(mean_values) if mean_values else None,
"min": min(min_values) if min_values else None,
"max": max(max_values) if max_values else None,
"last_reset": prev_stat["last_reset"],
"state": prev_stat["state"],
"sum": prev_stat["sum"],
}
)
max_values = []
mean_values = []
min_values = []
if statistic.get("max") is not None:
max_values.append(statistic["max"])
if statistic.get("mean") is not None:
mean_values.append(statistic["mean"])
if statistic.get("min") is not None:
min_values.append(statistic["min"])
prev_stat = statistic
return result
def _reduce_statistics_per_day(
stats: dict[str, list[dict[str, Any]]]
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to daily statistics."""
def same_period(time1: datetime, time2: datetime) -> bool:
"""Return True if time1 and time2 are in the same date."""
date1 = dt_util.as_local(time1).date()
date2 = dt_util.as_local(time2).date()
return date1 == date2
def period_start_end(time: datetime) -> tuple[datetime, datetime]:
"""Return the start and end of the period (day) time is within."""
start = dt_util.as_utc(
dt_util.as_local(time).replace(hour=0, minute=0, second=0, microsecond=0)
)
end = start + timedelta(days=1)
return (start, end)
return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=1))
def _reduce_statistics_per_month(
stats: dict[str, list[dict[str, Any]]]
) -> dict[str, list[dict[str, Any]]]:
"""Reduce hourly statistics to monthly statistics."""
def same_period(time1: datetime, time2: datetime) -> bool:
"""Return True if time1 and time2 are in the same year and month."""
date1 = dt_util.as_local(time1).date()
date2 = dt_util.as_local(time2).date()
return (date1.year, date1.month) == (date2.year, date2.month)
def period_start_end(time: datetime) -> tuple[datetime, datetime]:
"""Return the start and end of the period (month) time is within."""
start = dt_util.as_utc(
dt_util.as_local(time).replace(
day=1, hour=0, minute=0, second=0, microsecond=0
)
)
end = (start + timedelta(days=31)).replace(day=1)
return (start, end)
return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=31))
def statistics_during_period( def statistics_during_period(
hass: HomeAssistant, hass: HomeAssistant,
start_time: datetime, start_time: datetime,
end_time: datetime | None = None, end_time: datetime | None = None,
statistic_ids: list[str] | None = None, statistic_ids: list[str] | None = None,
period: Literal["hour"] | Literal["5minute"] = "hour", period: Literal["5minute", "day", "hour", "month"] = "hour",
) -> dict[str, list[dict[str, str]]]: ) -> dict[str, list[dict[str, Any]]]:
"""Return statistics during UTC period start_time - end_time for the statistic_ids. """Return statistics during UTC period start_time - end_time for the statistic_ids.
If end_time is omitted, returns statistics newer than or equal to start_time. If end_time is omitted, returns statistics newer than or equal to start_time.
@ -606,14 +701,14 @@ def statistics_during_period(
if statistic_ids is not None: if statistic_ids is not None:
metadata_ids = [metadata_id for metadata_id, _ in metadata.values()] metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]
if period == "hour": if period == "5minute":
bakery = STATISTICS_BAKERY
base_query = QUERY_STATISTICS
table = Statistics
else:
bakery = STATISTICS_SHORT_TERM_BAKERY bakery = STATISTICS_SHORT_TERM_BAKERY
base_query = QUERY_STATISTICS_SHORT_TERM base_query = QUERY_STATISTICS_SHORT_TERM
table = StatisticsShortTerm table = StatisticsShortTerm
else:
bakery = STATISTICS_BAKERY
base_query = QUERY_STATISTICS
table = Statistics
baked_query = _statistics_during_period_query( baked_query = _statistics_during_period_query(
hass, end_time, statistic_ids, bakery, base_query, table hass, end_time, statistic_ids, bakery, base_query, table
@ -627,10 +722,20 @@ def statistics_during_period(
if not stats: if not stats:
return {} return {}
# Return statistics combined with metadata # Return statistics combined with metadata
return _sorted_statistics_to_dict( if period not in ("day", "month"):
hass, session, stats, statistic_ids, metadata, True, table, start_time return _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time
)
result = _sorted_statistics_to_dict(
hass, session, stats, statistic_ids, metadata, True, table, start_time, True
) )
if period == "day":
return _reduce_statistics_per_day(result)
return _reduce_statistics_per_month(result)
def get_last_statistics( def get_last_statistics(
hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool
@ -718,6 +823,7 @@ def _sorted_statistics_to_dict(
convert_units: bool, convert_units: bool,
table: type[Statistics | StatisticsShortTerm], table: type[Statistics | StatisticsShortTerm],
start_time: datetime | None, start_time: datetime | None,
start_time_as_datetime: bool = False,
) -> dict[str, list[dict]]: ) -> dict[str, list[dict]]:
"""Convert SQL results into JSON friendly data structure.""" """Convert SQL results into JSON friendly data structure."""
result: dict = defaultdict(list) result: dict = defaultdict(list)
@ -765,7 +871,7 @@ def _sorted_statistics_to_dict(
ent_results.append( ent_results.append(
{ {
"statistic_id": statistic_id, "statistic_id": statistic_id,
"start": start.isoformat(), "start": start if start_time_as_datetime else start.isoformat(),
"end": end.isoformat(), "end": end.isoformat(),
"mean": convert(db_state.mean, units), "mean": convert(db_state.mean, units),
"min": convert(db_state.min, units), "min": convert(db_state.min, units),

View File

@ -2017,16 +2017,19 @@ def test_compile_hourly_statistics_changing_statistics(
"db_supports_row_number,in_log,not_in_log", "db_supports_row_number,in_log,not_in_log",
[(True, "row_number", None), (False, None, "row_number")], [(True, "row_number", None), (False, None, "row_number")],
) )
def test_compile_statistics_hourly_summary( def test_compile_statistics_hourly_daily_monthly_summary(
hass_recorder, caplog, db_supports_row_number, in_log, not_in_log hass_recorder, caplog, db_supports_row_number, in_log, not_in_log
): ):
"""Test compiling hourly statistics.""" """Test compiling hourly statistics + monthly and daily summary."""
zero = dt_util.utcnow() zero = dt_util.utcnow()
zero = zero.replace(minute=0, second=0, microsecond=0) # August 31st, 23:00 local time
# Travel to the future, recorder gets confused otherwise because states are added zero = zero.replace(
# before the start of the recorder_run year=2021, month=9, day=1, hour=5, minute=0, second=0, microsecond=0
zero += timedelta(hours=1) )
hass = hass_recorder() with patch(
"homeassistant.components.recorder.models.dt_util.utcnow", return_value=zero
):
hass = hass_recorder()
recorder = hass.data[DATA_INSTANCE] recorder = hass.data[DATA_INSTANCE]
recorder._db_supports_row_number = db_supports_row_number recorder._db_supports_row_number = db_supports_row_number
setup_component(hass, "sensor", {}) setup_component(hass, "sensor", {})
@ -2265,6 +2268,123 @@ def test_compile_statistics_hourly_summary(
start += timedelta(hours=1) start += timedelta(hours=1)
end += timedelta(hours=1) end += timedelta(hours=1)
assert stats == expected_stats assert stats == expected_stats
stats = statistics_during_period(hass, zero, period="day")
expected_stats = {
"sensor.test1": [],
"sensor.test2": [],
"sensor.test3": [],
"sensor.test4": [],
}
start = dt_util.parse_datetime("2021-08-31T06:00:00+00:00")
end = start + timedelta(days=1)
for i in range(2):
for entity_id in [
"sensor.test1",
"sensor.test2",
"sensor.test3",
"sensor.test4",
]:
expected_average = (
mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_averages
else None
)
expected_minimum = (
min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_minima
else None
)
expected_maximum = (
max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_maxima
else None
)
expected_state = (
expected_states[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_states
else None
)
expected_sum = (
expected_sums[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_sums
else None
)
expected_stats[entity_id].append(
{
"statistic_id": entity_id,
"start": process_timestamp_to_utc_isoformat(start),
"end": process_timestamp_to_utc_isoformat(end),
"mean": approx(expected_average),
"min": approx(expected_minimum),
"max": approx(expected_maximum),
"last_reset": None,
"state": expected_state,
"sum": expected_sum,
}
)
start += timedelta(days=1)
end += timedelta(days=1)
assert stats == expected_stats
stats = statistics_during_period(hass, zero, period="month")
expected_stats = {
"sensor.test1": [],
"sensor.test2": [],
"sensor.test3": [],
"sensor.test4": [],
}
start = dt_util.parse_datetime("2021-08-01T06:00:00+00:00")
end = dt_util.parse_datetime("2021-09-01T06:00:00+00:00")
for i in range(2):
for entity_id in [
"sensor.test1",
"sensor.test2",
"sensor.test3",
"sensor.test4",
]:
expected_average = (
mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_averages
else None
)
expected_minimum = (
min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_minima
else None
)
expected_maximum = (
max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
if entity_id in expected_maxima
else None
)
expected_state = (
expected_states[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_states
else None
)
expected_sum = (
expected_sums[entity_id][(i + 1) * 12 - 1]
if entity_id in expected_sums
else None
)
expected_stats[entity_id].append(
{
"statistic_id": entity_id,
"start": process_timestamp_to_utc_isoformat(start),
"end": process_timestamp_to_utc_isoformat(end),
"mean": approx(expected_average),
"min": approx(expected_minimum),
"max": approx(expected_maximum),
"last_reset": None,
"state": expected_state,
"sum": expected_sum,
}
)
start = (start + timedelta(days=31)).replace(day=1)
end = (end + timedelta(days=31)).replace(day=1)
assert stats == expected_stats
assert "Error while processing event StatisticsTask" not in caplog.text assert "Error while processing event StatisticsTask" not in caplog.text
if in_log: if in_log:
assert in_log in caplog.text assert in_log in caplog.text