Add support for daily and monthly statistics (#57576)

* Add support for daily and monthly statistics * Remove debug code * Format code * Don't use dateutil package * Remove 2 TODOs * Remove TODO * Add comments
2025-07-19 11:17:21 +00:00 · 2021-10-19 08:29:23 +02:00 · 2021-10-19 08:29:23 +02:00 · 9a26a8cfd8
commit 9a26a8cfd8
parent 6576225c48
2 changed files with 243 additions and 17 deletions
--- a/homeassistant/components/recorder/statistics.py
+++ b/homeassistant/components/recorder/statistics.py
@ -7,6 +7,7 @@ import dataclasses
 from datetime import datetime, timedelta
 from itertools import chain, groupby
 import logging
 from statistics import mean
 from typing import TYPE_CHECKING, Any, Literal
 from sqlalchemy import bindparam, func
@ -583,13 +584,107 @@ def _statistics_during_period_query(
    return baked_query  # type: ignore[no-any-return]
 def _reduce_statistics(
    stats: dict[str, list[dict[str, Any]]],
    same_period: Callable[[datetime, datetime], bool],
    period_start_end: Callable[[datetime], tuple[datetime, datetime]],
    period: timedelta,
 ) -> dict[str, list[dict[str, Any]]]:
    """Reduce hourly statistics to daily or monthly statistics."""
    result: dict[str, list[dict[str, Any]]] = defaultdict(list)
    for statistic_id, stat_list in stats.items():
        max_values: list[float] = []
        mean_values: list[float] = []
        min_values: list[float] = []
        prev_stat: dict[str, Any] = stat_list[0]
        # Loop over the hourly statistics + a fake entry to end the period
        for statistic in chain(
            stat_list, ({"start": stat_list[-1]["start"] + period},)
        ):
            if not same_period(prev_stat["start"], statistic["start"]):
                start, end = period_start_end(prev_stat["start"])
                # The previous statistic was the last entry of the period
                result[statistic_id].append(
                    {
                        "statistic_id": statistic_id,
                        "start": start.isoformat(),
                        "end": end.isoformat(),
                        "mean": mean(mean_values) if mean_values else None,
                        "min": min(min_values) if min_values else None,
                        "max": max(max_values) if max_values else None,
                        "last_reset": prev_stat["last_reset"],
                        "state": prev_stat["state"],
                        "sum": prev_stat["sum"],
                    }
                )
                max_values = []
                mean_values = []
                min_values = []
            if statistic.get("max") is not None:
                max_values.append(statistic["max"])
            if statistic.get("mean") is not None:
                mean_values.append(statistic["mean"])
            if statistic.get("min") is not None:
                min_values.append(statistic["min"])
            prev_stat = statistic
    return result
 def _reduce_statistics_per_day(
    stats: dict[str, list[dict[str, Any]]]
 ) -> dict[str, list[dict[str, Any]]]:
    """Reduce hourly statistics to daily statistics."""
    def same_period(time1: datetime, time2: datetime) -> bool:
        """Return True if time1 and time2 are in the same date."""
        date1 = dt_util.as_local(time1).date()
        date2 = dt_util.as_local(time2).date()
        return date1 == date2
    def period_start_end(time: datetime) -> tuple[datetime, datetime]:
        """Return the start and end of the period (day) time is within."""
        start = dt_util.as_utc(
            dt_util.as_local(time).replace(hour=0, minute=0, second=0, microsecond=0)
        )
        end = start + timedelta(days=1)
        return (start, end)
    return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=1))
 def _reduce_statistics_per_month(
    stats: dict[str, list[dict[str, Any]]]
 ) -> dict[str, list[dict[str, Any]]]:
    """Reduce hourly statistics to monthly statistics."""
    def same_period(time1: datetime, time2: datetime) -> bool:
        """Return True if time1 and time2 are in the same year and month."""
        date1 = dt_util.as_local(time1).date()
        date2 = dt_util.as_local(time2).date()
        return (date1.year, date1.month) == (date2.year, date2.month)
    def period_start_end(time: datetime) -> tuple[datetime, datetime]:
        """Return the start and end of the period (month) time is within."""
        start = dt_util.as_utc(
            dt_util.as_local(time).replace(
                day=1, hour=0, minute=0, second=0, microsecond=0
            )
        )
        end = (start + timedelta(days=31)).replace(day=1)
        return (start, end)
    return _reduce_statistics(stats, same_period, period_start_end, timedelta(days=31))
 def statistics_during_period(
    hass: HomeAssistant,
    start_time: datetime,
    end_time: datetime | None = None,
    statistic_ids: list[str] | None = None,
-    period: Literal["hour"] | Literal["5minute"] = "hour",
+    period: Literal["5minute", "day", "hour", "month"] = "hour",
-) -> dict[str, list[dict[str, str]]]:
+) -> dict[str, list[dict[str, Any]]]:
    """Return statistics during UTC period start_time - end_time for the statistic_ids.
    If end_time is omitted, returns statistics newer than or equal to start_time.
@ -606,14 +701,14 @@ def statistics_during_period(
        if statistic_ids is not None:
            metadata_ids = [metadata_id for metadata_id, _ in metadata.values()]
-        if period == "hour":
+        if period == "5minute":
            bakery = STATISTICS_BAKERY
            base_query = QUERY_STATISTICS
            table = Statistics
        else:
            bakery = STATISTICS_SHORT_TERM_BAKERY
            base_query = QUERY_STATISTICS_SHORT_TERM
            table = StatisticsShortTerm
        else:
            bakery = STATISTICS_BAKERY
            base_query = QUERY_STATISTICS
            table = Statistics
        baked_query = _statistics_during_period_query(
            hass, end_time, statistic_ids, bakery, base_query, table
@ -627,10 +722,20 @@ def statistics_during_period(
        if not stats:
            return {}
        # Return statistics combined with metadata
-        return _sorted_statistics_to_dict(
+        if period not in ("day", "month"):
-            hass, session, stats, statistic_ids, metadata, True, table, start_time
+            return _sorted_statistics_to_dict(
                hass, session, stats, statistic_ids, metadata, True, table, start_time
            )
        result = _sorted_statistics_to_dict(
            hass, session, stats, statistic_ids, metadata, True, table, start_time, True
        )
        if period == "day":
            return _reduce_statistics_per_day(result)
        return _reduce_statistics_per_month(result)
 def get_last_statistics(
    hass: HomeAssistant, number_of_stats: int, statistic_id: str, convert_units: bool
@ -718,6 +823,7 @@ def _sorted_statistics_to_dict(
    convert_units: bool,
    table: type[Statistics | StatisticsShortTerm],
    start_time: datetime | None,
    start_time_as_datetime: bool = False,
 ) -> dict[str, list[dict]]:
    """Convert SQL results into JSON friendly data structure."""
    result: dict = defaultdict(list)
@ -765,7 +871,7 @@ def _sorted_statistics_to_dict(
            ent_results.append(
                {
                    "statistic_id": statistic_id,
-                    "start": start.isoformat(),
+                    "start": start if start_time_as_datetime else start.isoformat(),
                    "end": end.isoformat(),
                    "mean": convert(db_state.mean, units),
                    "min": convert(db_state.min, units),
--- a/tests/components/sensor/test_recorder.py
+++ b/tests/components/sensor/test_recorder.py
@ -2017,16 +2017,19 @@ def test_compile_hourly_statistics_changing_statistics(
    "db_supports_row_number,in_log,not_in_log",
    [(True, "row_number", None), (False, None, "row_number")],
 )
-def test_compile_statistics_hourly_summary(
+def test_compile_statistics_hourly_daily_monthly_summary(
    hass_recorder, caplog, db_supports_row_number, in_log, not_in_log
 ):
-    """Test compiling hourly statistics."""
+    """Test compiling hourly statistics + monthly and daily summary."""
    zero = dt_util.utcnow()
-    zero = zero.replace(minute=0, second=0, microsecond=0)
+    # August 31st, 23:00 local time
-    # Travel to the future, recorder gets confused otherwise because states are added
+    zero = zero.replace(
-    # before the start of the recorder_run
+        year=2021, month=9, day=1, hour=5, minute=0, second=0, microsecond=0
-    zero += timedelta(hours=1)
+    )
-    hass = hass_recorder()
+    with patch(
        "homeassistant.components.recorder.models.dt_util.utcnow", return_value=zero
    ):
        hass = hass_recorder()
    recorder = hass.data[DATA_INSTANCE]
    recorder._db_supports_row_number = db_supports_row_number
    setup_component(hass, "sensor", {})
@ -2265,6 +2268,123 @@ def test_compile_statistics_hourly_summary(
        start += timedelta(hours=1)
        end += timedelta(hours=1)
    assert stats == expected_stats
    stats = statistics_during_period(hass, zero, period="day")
    expected_stats = {
        "sensor.test1": [],
        "sensor.test2": [],
        "sensor.test3": [],
        "sensor.test4": [],
    }
    start = dt_util.parse_datetime("2021-08-31T06:00:00+00:00")
    end = start + timedelta(days=1)
    for i in range(2):
        for entity_id in [
            "sensor.test1",
            "sensor.test2",
            "sensor.test3",
            "sensor.test4",
        ]:
            expected_average = (
                mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_averages
                else None
            )
            expected_minimum = (
                min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_minima
                else None
            )
            expected_maximum = (
                max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_maxima
                else None
            )
            expected_state = (
                expected_states[entity_id][(i + 1) * 12 - 1]
                if entity_id in expected_states
                else None
            )
            expected_sum = (
                expected_sums[entity_id][(i + 1) * 12 - 1]
                if entity_id in expected_sums
                else None
            )
            expected_stats[entity_id].append(
                {
                    "statistic_id": entity_id,
                    "start": process_timestamp_to_utc_isoformat(start),
                    "end": process_timestamp_to_utc_isoformat(end),
                    "mean": approx(expected_average),
                    "min": approx(expected_minimum),
                    "max": approx(expected_maximum),
                    "last_reset": None,
                    "state": expected_state,
                    "sum": expected_sum,
                }
            )
        start += timedelta(days=1)
        end += timedelta(days=1)
    assert stats == expected_stats
    stats = statistics_during_period(hass, zero, period="month")
    expected_stats = {
        "sensor.test1": [],
        "sensor.test2": [],
        "sensor.test3": [],
        "sensor.test4": [],
    }
    start = dt_util.parse_datetime("2021-08-01T06:00:00+00:00")
    end = dt_util.parse_datetime("2021-09-01T06:00:00+00:00")
    for i in range(2):
        for entity_id in [
            "sensor.test1",
            "sensor.test2",
            "sensor.test3",
            "sensor.test4",
        ]:
            expected_average = (
                mean(expected_averages[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_averages
                else None
            )
            expected_minimum = (
                min(expected_minima[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_minima
                else None
            )
            expected_maximum = (
                max(expected_maxima[entity_id][i * 12 : (i + 1) * 12])
                if entity_id in expected_maxima
                else None
            )
            expected_state = (
                expected_states[entity_id][(i + 1) * 12 - 1]
                if entity_id in expected_states
                else None
            )
            expected_sum = (
                expected_sums[entity_id][(i + 1) * 12 - 1]
                if entity_id in expected_sums
                else None
            )
            expected_stats[entity_id].append(
                {
                    "statistic_id": entity_id,
                    "start": process_timestamp_to_utc_isoformat(start),
                    "end": process_timestamp_to_utc_isoformat(end),
                    "mean": approx(expected_average),
                    "min": approx(expected_minimum),
                    "max": approx(expected_maximum),
                    "last_reset": None,
                    "state": expected_state,
                    "sum": expected_sum,
                }
            )
        start = (start + timedelta(days=31)).replace(day=1)
        end = (end + timedelta(days=31)).replace(day=1)
    assert stats == expected_stats
    assert "Error while processing event StatisticsTask" not in caplog.text
    if in_log:
        assert in_log in caplog.text