From e1a5ad069c07753b4fbc0215c04d8c71c2960961 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 13 Apr 2023 11:52:38 -1000 Subject: [PATCH] Improve performance of sums in the energy dashboard (#91342) --- .../components/recorder/statistics.py | 72 ++++++++++++++----- tests/components/recorder/test_statistics.py | 53 ++++++++++++++ 2 files changed, 106 insertions(+), 19 deletions(-) diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 70e82fad5d7..bd496680fa7 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -857,10 +857,13 @@ def _reduce_statistics( } if _want_mean: row["mean"] = mean(mean_values) if mean_values else None + mean_values.clear() if _want_min: row["min"] = min(min_values) if min_values else None + min_values.clear() if _want_max: row["max"] = max(max_values) if max_values else None + max_values.clear() if _want_last_reset: row["last_reset"] = prev_stat.get("last_reset") if _want_state: @@ -868,10 +871,6 @@ def _reduce_statistics( if _want_sum: row["sum"] = prev_stat["sum"] result[statistic_id].append(row) - - max_values = [] - mean_values = [] - min_values = [] if _want_max and (_max := statistic.get("max")) is not None: max_values.append(_max) if _want_mean and (_mean := statistic.get("mean")) is not None: @@ -1560,20 +1559,6 @@ def _statistics_during_period_with_session( if not stats: return {} - # Return statistics combined with metadata - if period not in ("day", "week", "month"): - return _sorted_statistics_to_dict( - hass, - session, - stats, - statistic_ids, - metadata, - True, - table, - start_time, - units, - types, - ) result = _sorted_statistics_to_dict( hass, @@ -1588,6 +1573,10 @@ def _statistics_during_period_with_session( types, ) + # Return statistics combined with metadata + if period not in ("day", "week", "month"): + return result + if period == "day": return _reduce_statistics_per_day(result, types) @@ -1829,7 +1818,34 @@ def _statistics_at_time( return cast(Sequence[Row], execute_stmt_lambda_element(session, stmt)) -def _sorted_statistics_to_dict( +def _fast_build_sum_list( + stats_list: list[Row], + table_duration_seconds: float, + convert: Callable | None, + start_ts_idx: int, + sum_idx: int, +) -> list[StatisticsRow]: + """Build a list of sum statistics.""" + if convert: + return [ + { + "start": (start_ts := db_state[start_ts_idx]), + "end": start_ts + table_duration_seconds, + "sum": convert(db_state[sum_idx]), + } + for db_state in stats_list + ] + return [ + { + "start": (start_ts := db_state[start_ts_idx]), + "end": start_ts + table_duration_seconds, + "sum": db_state[sum_idx], + } + for db_state in stats_list + ] + + +def _sorted_statistics_to_dict( # noqa: C901 hass: HomeAssistant, session: Session, stats: Sequence[Row[Any]], @@ -1888,6 +1904,7 @@ def _sorted_statistics_to_dict( last_reset_ts_idx = field_map["last_reset_ts"] if "last_reset" in types else None state_idx = field_map["state"] if "state" in types else None sum_idx = field_map["sum"] if "sum" in types else None + sum_only = len(types) == 1 and sum_idx is not None # Append all statistic entries, and optionally do unit conversion table_duration_seconds = table.duration.total_seconds() for meta_id, stats_list in stats_by_meta_id.items(): @@ -1900,6 +1917,23 @@ def _sorted_statistics_to_dict( convert = _get_statistic_to_display_unit_converter(unit, state_unit, units) else: convert = None + + if sum_only: + # This function is extremely flexible and can handle all types of + # statistics, but in practice we only ever use a few combinations. + # + # For energy, we only need sum statistics, so we can optimize + # this path to avoid the overhead of the more generic function. + assert sum_idx is not None + result[statistic_id] = _fast_build_sum_list( + stats_list, + table_duration_seconds, + convert, + start_ts_idx, + sum_idx, + ) + continue + ent_results_append = result[statistic_id].append # # The below loop is a red hot path for energy, and every diff --git a/tests/components/recorder/test_statistics.py b/tests/components/recorder/test_statistics.py index e36db0ce53d..59178f52c8b 100644 --- a/tests/components/recorder/test_statistics.py +++ b/tests/components/recorder/test_statistics.py @@ -1223,6 +1223,59 @@ def test_monthly_statistics( ] } + stats = statistics_during_period( + hass, + start_time=zero, + statistic_ids=["not", "the", "same", "test:total_energy_import"], + period="month", + types={"sum"}, + ) + sep_start = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00")) + sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00")) + assert stats == { + "test:total_energy_import": [ + { + "start": sep_start.timestamp(), + "end": sep_end.timestamp(), + "sum": pytest.approx(3.0), + }, + { + "start": oct_start.timestamp(), + "end": oct_end.timestamp(), + "sum": pytest.approx(5.0), + }, + ] + } + + stats = statistics_during_period( + hass, + start_time=zero, + statistic_ids=["not", "the", "same", "test:total_energy_import"], + period="month", + types={"sum"}, + units={"energy": "Wh"}, + ) + sep_start = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00")) + sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00")) + oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00")) + assert stats == { + "test:total_energy_import": [ + { + "start": sep_start.timestamp(), + "end": sep_end.timestamp(), + "sum": pytest.approx(3000.0), + }, + { + "start": oct_start.timestamp(), + "end": oct_end.timestamp(), + "sum": pytest.approx(5000.0), + }, + ] + } + # Use 5minute to ensure table switch works stats = statistics_during_period( hass,