From aa419686cb0cd5d4550bdc7082ba20ab5c25aa2f Mon Sep 17 00:00:00 2001 From: karwosts <32912880+karwosts@users.noreply.github.com> Date: Mon, 10 Jun 2024 11:23:21 -0700 Subject: [PATCH] Fix statistic_during_period wrongly prioritizing ST statistics over LT (#115291) * Fix statistic_during_period wrongly prioritizing ST statistics over LT * comment * start of a test * more testcases * fix sts insertion range * update from review * remove unneeded comments * update logic * min/mean/max testing --- .../components/recorder/statistics.py | 20 +- .../components/recorder/test_websocket_api.py | 328 ++++++++++++++++++ 2 files changed, 343 insertions(+), 5 deletions(-) diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 84c82f35264..8d077e19344 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -1262,6 +1262,7 @@ def _get_oldest_sum_statistic( main_start_time: datetime | None, tail_start_time: datetime | None, oldest_stat: datetime | None, + oldest_5_min_stat: datetime | None, tail_only: bool, metadata_id: int, ) -> float | None: @@ -1306,6 +1307,15 @@ def _get_oldest_sum_statistic( if ( head_start_time is not None + and oldest_5_min_stat is not None + and ( + # If we want stats older than the short term purge window, don't lookup + # the oldest sum in the short term table, as it would be prioritized + # over older LongTermStats. + (oldest_stat is None) + or (oldest_5_min_stat < oldest_stat) + or (oldest_5_min_stat <= head_start_time) + ) and ( oldest_sum := _get_oldest_sum_statistic_in_sub_period( session, head_start_time, StatisticsShortTerm, metadata_id @@ -1477,12 +1487,11 @@ def statistic_during_period( tail_end_time: datetime | None = None if end_time is None: tail_start_time = now.replace(minute=0, second=0, microsecond=0) + elif tail_only: + tail_start_time = start_time + tail_end_time = end_time elif end_time.minute: - tail_start_time = ( - start_time - if tail_only - else end_time.replace(minute=0, second=0, microsecond=0) - ) + tail_start_time = end_time.replace(minute=0, second=0, microsecond=0) tail_end_time = end_time # Calculate the main period @@ -1517,6 +1526,7 @@ def statistic_during_period( main_start_time, tail_start_time, oldest_stat, + oldest_5_min_stat, tail_only, metadata_id, ) diff --git a/tests/components/recorder/test_websocket_api.py b/tests/components/recorder/test_websocket_api.py index 3d35aafb2b3..0dd9241776d 100644 --- a/tests/components/recorder/test_websocket_api.py +++ b/tests/components/recorder/test_websocket_api.py @@ -794,6 +794,334 @@ async def test_statistic_during_period_hole( } +@pytest.mark.freeze_time(datetime.datetime(2022, 10, 21, 6, 31, tzinfo=datetime.UTC)) +async def test_statistic_during_period_partial_overlap( + recorder_mock: Recorder, + hass: HomeAssistant, + hass_ws_client: WebSocketGenerator, +) -> None: + """Test statistic_during_period.""" + now = dt_util.utcnow() + + await async_recorder_block_till_done(hass) + client = await hass_ws_client() + + zero = now + start = zero.replace(hour=0, minute=0, second=0, microsecond=0) + + # Sum shall be tracking a hypothetical sensor that is 0 at midnight, and grows by 1 per minute. + # The test will have 4 hours of LTS-only data (0:00-3:59:59), followed by 2 hours of overlapping STS/LTS (4:00-5:59:59), followed by 30 minutes of STS only (6:00-6:29:59) + # similar to how a real recorder might look after purging STS. + + # The datapoint at i=0 (start = 0:00) will be 60 as that is the growth during the hour starting at the start period + imported_stats_hours = [ + { + "start": (start + timedelta(hours=i)), + "min": i * 60, + "max": i * 60 + 60, + "mean": i * 60 + 30, + "sum": (i + 1) * 60, + } + for i in range(6) + ] + + # The datapoint at i=0 (start = 4:00) would be the sensor's value at t=4:05, or 245 + imported_stats_5min = [ + { + "start": (start + timedelta(hours=4, minutes=5 * i)), + "min": 4 * 60 + i * 5, + "max": 4 * 60 + i * 5 + 5, + "mean": 4 * 60 + i * 5 + 2.5, + "sum": 4 * 60 + (i + 1) * 5, + } + for i in range(30) + ] + + assert imported_stats_hours[-1]["sum"] == 360 + assert imported_stats_hours[-1]["start"] == start.replace( + hour=5, minute=0, second=0, microsecond=0 + ) + assert imported_stats_5min[-1]["sum"] == 390 + assert imported_stats_5min[-1]["start"] == start.replace( + hour=6, minute=25, second=0, microsecond=0 + ) + + statId = "sensor.test_overlapping" + imported_metadata = { + "has_mean": False, + "has_sum": True, + "name": "Total imported energy overlapping", + "source": "recorder", + "statistic_id": statId, + "unit_of_measurement": "kWh", + } + + recorder.get_instance(hass).async_import_statistics( + imported_metadata, + imported_stats_hours, + Statistics, + ) + recorder.get_instance(hass).async_import_statistics( + imported_metadata, + imported_stats_5min, + StatisticsShortTerm, + ) + await async_wait_recording_done(hass) + + metadata = get_metadata(hass, statistic_ids={statId}) + metadata_id = metadata[statId][0] + run_cache = get_short_term_statistics_run_cache(hass) + # Verify the import of the short term statistics + # also updates the run cache + assert run_cache.get_latest_ids({metadata_id}) is not None + + # Get all the stats, should consider all hours and 5mins + await client.send_json_auto_id( + { + "type": "recorder/statistic_during_period", + "statistic_id": statId, + } + ) + response = await client.receive_json() + assert response["success"] + assert response["result"] == { + "change": 390, + "max": 390, + "min": 0, + "mean": 195, + } + + async def assert_stat_during_fixed(client, start_time, end_time, expect): + json = { + "type": "recorder/statistic_during_period", + "types": list(expect.keys()), + "statistic_id": statId, + "fixed_period": {}, + } + if start_time: + json["fixed_period"]["start_time"] = start_time.isoformat() + if end_time: + json["fixed_period"]["end_time"] = end_time.isoformat() + + await client.send_json_auto_id(json) + response = await client.receive_json() + assert response["success"] + assert response["result"] == expect + + # One hours worth of growth in LTS-only + start_time = start.replace(hour=1) + end_time = start.replace(hour=2) + await assert_stat_during_fixed( + client, start_time, end_time, {"change": 60, "min": 60, "max": 120, "mean": 90} + ) + + # Five minutes of growth in STS-only + start_time = start.replace(hour=6, minute=15) + end_time = start.replace(hour=6, minute=20) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 5, + "min": 6 * 60 + 15, + "max": 6 * 60 + 20, + "mean": 6 * 60 + (15 + 20) / 2, + }, + ) + + # Six minutes of growth in STS-only + start_time = start.replace(hour=6, minute=14) + end_time = start.replace(hour=6, minute=20) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 5, + "min": 6 * 60 + 15, + "max": 6 * 60 + 20, + "mean": 6 * 60 + (15 + 20) / 2, + }, + ) + + # Six minutes of growth in STS-only + # 5-minute Change includes start times exactly on or before a statistics start, but end times are not counted unless they are greater than start. + start_time = start.replace(hour=6, minute=15) + end_time = start.replace(hour=6, minute=21) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 10, + "min": 6 * 60 + 15, + "max": 6 * 60 + 25, + "mean": 6 * 60 + (15 + 25) / 2, + }, + ) + + # Five minutes of growth in overlapping LTS+STS + start_time = start.replace(hour=5, minute=15) + end_time = start.replace(hour=5, minute=20) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 5, + "min": 5 * 60 + 15, + "max": 5 * 60 + 20, + "mean": 5 * 60 + (15 + 20) / 2, + }, + ) + + # Five minutes of growth in overlapping LTS+STS (start of hour) + start_time = start.replace(hour=5, minute=0) + end_time = start.replace(hour=5, minute=5) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 5, "min": 5 * 60, "max": 5 * 60 + 5, "mean": 5 * 60 + (5) / 2}, + ) + + # Five minutes of growth in overlapping LTS+STS (end of hour) + start_time = start.replace(hour=4, minute=55) + end_time = start.replace(hour=5, minute=0) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 5, + "min": 4 * 60 + 55, + "max": 5 * 60, + "mean": 4 * 60 + (55 + 60) / 2, + }, + ) + + # Five minutes of growth in STS-only, with a minute offset. Despite that this does not cover the full period, result is still 5 + start_time = start.replace(hour=6, minute=16) + end_time = start.replace(hour=6, minute=21) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 5, + "min": 6 * 60 + 20, + "max": 6 * 60 + 25, + "mean": 6 * 60 + (20 + 25) / 2, + }, + ) + + # 7 minutes of growth in STS-only, spanning two intervals + start_time = start.replace(hour=6, minute=14) + end_time = start.replace(hour=6, minute=21) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 10, + "min": 6 * 60 + 15, + "max": 6 * 60 + 25, + "mean": 6 * 60 + (15 + 25) / 2, + }, + ) + + # One hours worth of growth in LTS-only, with arbitrary minute offsets + # Since this does not fully cover the hour, result is None? + start_time = start.replace(hour=1, minute=40) + end_time = start.replace(hour=2, minute=12) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": None, "min": None, "max": None, "mean": None}, + ) + + # One hours worth of growth in LTS-only, with arbitrary minute offsets, covering a whole 1-hour period + start_time = start.replace(hour=1, minute=40) + end_time = start.replace(hour=3, minute=12) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 60, "min": 120, "max": 180, "mean": 150}, + ) + + # 90 minutes of growth in window overlapping LTS+STS/STS-only (4:41 - 6:11) + start_time = start.replace(hour=4, minute=41) + end_time = start_time + timedelta(minutes=90) + await assert_stat_during_fixed( + client, + start_time, + end_time, + { + "change": 90, + "min": 4 * 60 + 45, + "max": 4 * 60 + 45 + 90, + "mean": 4 * 60 + 45 + 45, + }, + ) + + # 4 hours of growth in overlapping LTS-only/LTS+STS (2:01-6:01) + start_time = start.replace(hour=2, minute=1) + end_time = start_time + timedelta(minutes=240) + # 60 from LTS (3:00-3:59), 125 from STS (25 intervals) (4:00-6:01) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 185, "min": 3 * 60, "max": 3 * 60 + 185, "mean": 3 * 60 + 185 / 2}, + ) + + # 4 hours of growth in overlapping LTS-only/LTS+STS (1:31-5:31) + start_time = start.replace(hour=1, minute=31) + end_time = start_time + timedelta(minutes=240) + # 120 from LTS (2:00-3:59), 95 from STS (19 intervals) 4:00-5:31 + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 215, "min": 2 * 60, "max": 2 * 60 + 215, "mean": 2 * 60 + 215 / 2}, + ) + + # 5 hours of growth, start time only (1:31-end) + start_time = start.replace(hour=1, minute=31) + end_time = None + # will be actually 2:00 - end + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 4 * 60 + 30, "min": 120, "max": 390, "mean": (390 + 120) / 2}, + ) + + # 5 hours of growth, end_time_only (0:00-5:00) + start_time = None + end_time = start.replace(hour=5) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 5 * 60, "min": 0, "max": 5 * 60, "mean": (5 * 60) / 2}, + ) + + # 5 hours 1 minute of growth, end_time_only (0:00-5:01) + start_time = None + end_time = start.replace(hour=5, minute=1) + # 4 hours LTS, 1 hour and 5 minutes STS (4:00-5:01) + await assert_stat_during_fixed( + client, + start_time, + end_time, + {"change": 5 * 60 + 5, "min": 0, "max": 5 * 60 + 5, "mean": (5 * 60 + 5) / 2}, + ) + + @pytest.mark.freeze_time(datetime.datetime(2022, 10, 21, 7, 25, tzinfo=datetime.UTC)) @pytest.mark.parametrize( ("calendar_period", "start_time", "end_time"),