From 4568207f9bcc915b5d5ce92e900a0c62be67c688 Mon Sep 17 00:00:00 2001
From: Erik Montnemery <erik@montnemery.com>
Date: Thu, 11 May 2023 10:05:58 +0200
Subject: [PATCH] Support calculating changes between consecutive sum
 statistics (#92823)

* Support calculating changes between consecutive sum statistics

* Add support for unit conversion when calculating change

* Don't include sum in WS response unless requested

* Improve tests

* Break out calculating change to its own function

* Improve test coverage
---
 .../components/recorder/statistics.py         |  97 +-
 .../components/recorder/websocket_api.py      |   6 +-
 tests/components/recorder/test_statistics.py  | 826 +++++++++++++++++-
 .../components/recorder/test_websocket_api.py |   2 +
 tests/components/sensor/test_recorder.py      |   8 +-
 5 files changed, 863 insertions(+), 76 deletions(-)

diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py
index 0e49c09e17c..2ef790024fd 100644
--- a/homeassistant/components/recorder/statistics.py
+++ b/homeassistant/components/recorder/statistics.py
@@ -165,6 +165,7 @@ class StatisticsRow(BaseStatisticsRow, total=False):
     min: float | None
     max: float | None
     mean: float | None
+    change: float | None
 
 
 def _get_unit_class(unit: str | None) -> str | None:
@@ -1540,6 +1541,57 @@ def _extract_metadata_and_discard_impossible_columns(
     return metadata_ids
 
 
+def _augment_result_with_change(
+    hass: HomeAssistant,
+    session: Session,
+    start_time: datetime,
+    units: dict[str, str] | None,
+    _types: set[Literal["change", "last_reset", "max", "mean", "min", "state", "sum"]],
+    table: type[Statistics | StatisticsShortTerm],
+    metadata: dict[str, tuple[int, StatisticMetaData]],
+    result: dict[str, list[StatisticsRow]],
+) -> None:
+    """Add change to the result."""
+    drop_sum = "sum" not in _types
+    prev_sums = {}
+    if tmp := _statistics_at_time(
+        session,
+        {metadata[statistic_id][0] for statistic_id in result},
+        table,
+        start_time,
+        {"sum"},
+    ):
+        _metadata = dict(metadata.values())
+        for row in tmp:
+            metadata_by_id = _metadata[row.metadata_id]
+            statistic_id = metadata_by_id["statistic_id"]
+
+            state_unit = unit = metadata_by_id["unit_of_measurement"]
+            if state := hass.states.get(statistic_id):
+                state_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT)
+            convert = _get_statistic_to_display_unit_converter(unit, state_unit, units)
+
+            if convert is not None:
+                prev_sums[statistic_id] = convert(row.sum)
+            else:
+                prev_sums[statistic_id] = row.sum
+
+    for statistic_id, rows in result.items():
+        prev_sum = prev_sums.get(statistic_id) or 0
+        for statistics_row in rows:
+            if "sum" not in statistics_row:
+                continue
+            if drop_sum:
+                _sum = statistics_row.pop("sum")
+            else:
+                _sum = statistics_row["sum"]
+            if _sum is None:
+                statistics_row["change"] = None
+                continue
+            statistics_row["change"] = _sum - prev_sum
+            prev_sum = _sum
+
+
 def _statistics_during_period_with_session(
     hass: HomeAssistant,
     session: Session,
@@ -1548,7 +1600,7 @@ def _statistics_during_period_with_session(
     statistic_ids: set[str] | None,
     period: Literal["5minute", "day", "hour", "week", "month"],
     units: dict[str, str] | None,
-    types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
+    _types: set[Literal["change", "last_reset", "max", "mean", "min", "state", "sum"]],
 ) -> dict[str, list[StatisticsRow]]:
     """Return statistic data points during UTC period start_time - end_time.
 
@@ -1559,7 +1611,6 @@ def _statistics_during_period_with_session(
         # This is for backwards compatibility to avoid a breaking change
         # for custom integrations that call this method.
         statistic_ids = set(statistic_ids)  # type: ignore[unreachable]
-    metadata = None
     # Fetch metadata for the given (or all) statistic_ids
     metadata = get_instance(hass).statistics_meta_manager.get_many(
         session, statistic_ids=statistic_ids
@@ -1567,6 +1618,13 @@ def _statistics_during_period_with_session(
     if not metadata:
         return {}
 
+    types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]] = set()
+    for stat_type in _types:
+        if stat_type == "change":
+            types.add("sum")
+            continue
+        types.add(stat_type)
+
     metadata_ids = None
     if statistic_ids is not None:
         metadata_ids = _extract_metadata_and_discard_impossible_columns(metadata, types)
@@ -1597,17 +1655,22 @@ def _statistics_during_period_with_session(
         types,
     )
 
-    # Return statistics combined with metadata
-    if period not in ("day", "week", "month"):
-        return result
-
     if period == "day":
-        return _reduce_statistics_per_day(result, types)
+        result = _reduce_statistics_per_day(result, types)
 
     if period == "week":
-        return _reduce_statistics_per_week(result, types)
+        result = _reduce_statistics_per_week(result, types)
 
-    return _reduce_statistics_per_month(result, types)
+    if period == "month":
+        result = _reduce_statistics_per_month(result, types)
+
+    if "change" in _types:
+        _augment_result_with_change(
+            hass, session, start_time, units, _types, table, metadata, result
+        )
+
+    # Return statistics combined with metadata
+    return result
 
 
 def statistics_during_period(
@@ -1617,7 +1680,7 @@ def statistics_during_period(
     statistic_ids: set[str] | None,
     period: Literal["5minute", "day", "hour", "week", "month"],
     units: dict[str, str] | None,
-    types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
+    types: set[Literal["change", "last_reset", "max", "mean", "min", "state", "sum"]],
 ) -> dict[str, list[StatisticsRow]]:
     """Return statistic data points during UTC period start_time - end_time.
 
@@ -1886,8 +1949,6 @@ def _sorted_statistics_to_dict(  # noqa: C901
     assert stats, "stats must not be empty"  # Guard against implementation error
     result: dict[str, list[StatisticsRow]] = defaultdict(list)
     metadata = dict(_metadata.values())
-    need_stat_at_start_time: set[int] = set()
-    start_time_ts = start_time.timestamp() if start_time else None
     # Identify metadata IDs for which no data was available at the requested start time
     field_map: dict[str, int] = {key: idx for idx, key in enumerate(stats[0]._fields)}
     metadata_id_idx = field_map["metadata_id"]
@@ -1898,9 +1959,6 @@ def _sorted_statistics_to_dict(  # noqa: C901
     for meta_id, group in groupby(stats, key_func):
         stats_list = stats_by_meta_id[meta_id] = list(group)
         seen_statistic_ids.add(metadata[meta_id]["statistic_id"])
-        first_start_time_ts = stats_list[0][start_ts_idx]
-        if start_time_ts and first_start_time_ts > start_time_ts:
-            need_stat_at_start_time.add(meta_id)
 
     # Set all statistic IDs to empty lists in result set to maintain the order
     if statistic_ids is not None:
@@ -1911,15 +1969,6 @@ def _sorted_statistics_to_dict(  # noqa: C901
             if stat_id in seen_statistic_ids:
                 result[stat_id] = []
 
-    # Fetch last known statistics for the needed metadata IDs
-    if need_stat_at_start_time:
-        assert start_time  # Can not be None if need_stat_at_start_time is not empty
-        if tmp := _statistics_at_time(
-            session, need_stat_at_start_time, table, start_time, types
-        ):
-            for stat in tmp:
-                stats_by_meta_id[stat[metadata_id_idx]].insert(0, stat)
-
     # Figure out which fields we need to extract from the SQL result
     # and which indices they have in the result so we can avoid the overhead
     # of doing a dict lookup for each row
diff --git a/homeassistant/components/recorder/websocket_api.py b/homeassistant/components/recorder/websocket_api.py
index c52df1b25e3..733dafeba27 100644
--- a/homeassistant/components/recorder/websocket_api.py
+++ b/homeassistant/components/recorder/websocket_api.py
@@ -154,7 +154,7 @@ def _ws_get_statistics_during_period(
     statistic_ids: set[str] | None,
     period: Literal["5minute", "day", "hour", "week", "month"],
     units: dict[str, str],
-    types: set[Literal["last_reset", "max", "mean", "min", "state", "sum"]],
+    types: set[Literal["change", "last_reset", "max", "mean", "min", "state", "sum"]],
 ) -> str:
     """Fetch statistics and convert them to json in the executor."""
     result = statistics_during_period(
@@ -200,7 +200,7 @@ async def ws_handle_get_statistics_during_period(
         end_time = None
 
     if (types := msg.get("types")) is None:
-        types = {"last_reset", "max", "mean", "min", "state", "sum"}
+        types = {"change", "last_reset", "max", "mean", "min", "state", "sum"}
     connection.send_message(
         await get_instance(hass).async_add_executor_job(
             _ws_get_statistics_during_period,
@@ -225,7 +225,7 @@ async def ws_handle_get_statistics_during_period(
         vol.Required("period"): vol.Any("5minute", "hour", "day", "week", "month"),
         vol.Optional("units"): UNIT_SCHEMA,
         vol.Optional("types"): vol.All(
-            [vol.Any("last_reset", "max", "mean", "min", "state", "sum")],
+            [vol.Any("change", "last_reset", "max", "mean", "min", "state", "sum")],
             vol.Coerce(set),
         ),
     }
diff --git a/tests/components/recorder/test_statistics.py b/tests/components/recorder/test_statistics.py
index 329ceba6e77..d90b9a64eef 100644
--- a/tests/components/recorder/test_statistics.py
+++ b/tests/components/recorder/test_statistics.py
@@ -959,6 +959,151 @@ def test_import_statistics_errors(
     assert get_metadata(hass, statistic_ids={"sensor.total_energy_import"}) == {}
 
 
+@pytest.mark.parametrize("timezone", ["America/Regina", "Europe/Vienna", "UTC"])
+@pytest.mark.freeze_time("2022-10-01 00:00:00+00:00")
+def test_daily_statistics(
+    hass_recorder: Callable[..., HomeAssistant],
+    caplog: pytest.LogCaptureFixture,
+    timezone,
+) -> None:
+    """Test daily statistics."""
+    dt_util.set_default_time_zone(dt_util.get_time_zone(timezone))
+
+    hass = hass_recorder()
+    wait_recording_done(hass)
+    assert "Compiling statistics for" not in caplog.text
+    assert "Statistics already compiled" not in caplog.text
+
+    zero = dt_util.utcnow()
+    period1 = dt_util.as_utc(dt_util.parse_datetime("2022-10-03 00:00:00"))
+    period2 = dt_util.as_utc(dt_util.parse_datetime("2022-10-03 23:00:00"))
+    period3 = dt_util.as_utc(dt_util.parse_datetime("2022-10-04 00:00:00"))
+    period4 = dt_util.as_utc(dt_util.parse_datetime("2022-10-04 23:00:00"))
+
+    external_statistics = (
+        {
+            "start": period1,
+            "last_reset": None,
+            "state": 0,
+            "sum": 2,
+        },
+        {
+            "start": period2,
+            "last_reset": None,
+            "state": 1,
+            "sum": 3,
+        },
+        {
+            "start": period3,
+            "last_reset": None,
+            "state": 2,
+            "sum": 4,
+        },
+        {
+            "start": period4,
+            "last_reset": None,
+            "state": 3,
+            "sum": 5,
+        },
+    )
+    external_metadata = {
+        "has_mean": False,
+        "has_sum": True,
+        "name": "Total imported energy",
+        "source": "test",
+        "statistic_id": "test:total_energy_import",
+        "unit_of_measurement": "kWh",
+    }
+
+    async_add_external_statistics(hass, external_metadata, external_statistics)
+    wait_recording_done(hass)
+    stats = statistics_during_period(
+        hass, zero, period="day", statistic_ids={"test:total_energy_import"}
+    )
+    day1_start = dt_util.as_utc(dt_util.parse_datetime("2022-10-03 00:00:00"))
+    day1_end = dt_util.as_utc(dt_util.parse_datetime("2022-10-04 00:00:00"))
+    day2_start = dt_util.as_utc(dt_util.parse_datetime("2022-10-04 00:00:00"))
+    day2_end = dt_util.as_utc(dt_util.parse_datetime("2022-10-05 00:00:00"))
+    expected_stats = {
+        "test:total_energy_import": [
+            {
+                "start": day1_start.timestamp(),
+                "end": day1_end.timestamp(),
+                "last_reset": None,
+                "state": 1.0,
+                "sum": 3.0,
+            },
+            {
+                "start": day2_start.timestamp(),
+                "end": day2_end.timestamp(),
+                "last_reset": None,
+                "state": 3.0,
+                "sum": 5.0,
+            },
+        ]
+    }
+    assert stats == expected_stats
+
+    # Get change
+    stats = statistics_during_period(
+        hass,
+        start_time=period1,
+        statistic_ids={"test:total_energy_import"},
+        period="day",
+        types={"change"},
+    )
+    assert stats == {
+        "test:total_energy_import": [
+            {
+                "start": day1_start.timestamp(),
+                "end": day1_end.timestamp(),
+                "change": 3.0,
+            },
+            {
+                "start": day2_start.timestamp(),
+                "end": day2_end.timestamp(),
+                "change": 2.0,
+            },
+        ]
+    }
+
+    # Get data with start during the first period
+    stats = statistics_during_period(
+        hass,
+        start_time=period1 + timedelta(hours=1),
+        statistic_ids={"test:total_energy_import"},
+        period="day",
+    )
+    assert stats == expected_stats
+
+    # Try to get data for entities which do not exist
+    stats = statistics_during_period(
+        hass,
+        start_time=zero,
+        statistic_ids={"not", "the", "same", "test:total_energy_import"},
+        period="day",
+    )
+    assert stats == expected_stats
+
+    # Use 5minute to ensure table switch works
+    stats = statistics_during_period(
+        hass,
+        start_time=zero,
+        statistic_ids=["test:total_energy_import", "with_other"],
+        period="5minute",
+    )
+    assert stats == {}
+
+    # Ensure future date has not data
+    future = dt_util.as_utc(dt_util.parse_datetime("2221-11-01 00:00:00"))
+    stats = statistics_during_period(
+        hass, start_time=future, end_time=future, period="day"
+    )
+    assert stats == {}
+
+    dt_util.set_default_time_zone(dt_util.get_time_zone("UTC"))
+
+
 @pytest.mark.parametrize("timezone", ["America/Regina", "Europe/Vienna", "UTC"])
 @pytest.mark.freeze_time("2022-10-01 00:00:00+00:00")
 def test_weekly_statistics(
@@ -1024,7 +1169,7 @@ def test_weekly_statistics(
     week1_end = dt_util.as_utc(dt_util.parse_datetime("2022-10-10 00:00:00"))
     week2_start = dt_util.as_utc(dt_util.parse_datetime("2022-10-10 00:00:00"))
     week2_end = dt_util.as_utc(dt_util.parse_datetime("2022-10-17 00:00:00"))
-    assert stats == {
+    expected_stats = {
         "test:total_energy_import": [
             {
                 "start": week1_start.timestamp(),
@@ -1042,32 +1187,49 @@ def test_weekly_statistics(
             },
         ]
     }
+    assert stats == expected_stats
 
+    # Get change
     stats = statistics_during_period(
         hass,
-        start_time=zero,
-        statistic_ids=["not", "the", "same", "test:total_energy_import"],
+        start_time=period1,
+        statistic_ids={"test:total_energy_import"},
         period="week",
+        types={"change"},
     )
     assert stats == {
         "test:total_energy_import": [
             {
                 "start": week1_start.timestamp(),
                 "end": week1_end.timestamp(),
-                "last_reset": None,
-                "state": 1.0,
-                "sum": 3.0,
+                "change": 3.0,
             },
             {
                 "start": week2_start.timestamp(),
                 "end": week2_end.timestamp(),
-                "last_reset": None,
-                "state": 3.0,
-                "sum": 5.0,
+                "change": 2.0,
             },
         ]
     }
 
+    # Get data with start during the first period
+    stats = statistics_during_period(
+        hass,
+        start_time=period1 + timedelta(days=1),
+        statistic_ids={"test:total_energy_import"},
+        period="week",
+    )
+    assert stats == expected_stats
+
+    # Try to get data for entities which do not exist
+    stats = statistics_during_period(
+        hass,
+        start_time=zero,
+        statistic_ids={"not", "the", "same", "test:total_energy_import"},
+        period="week",
+    )
+    assert stats == expected_stats
+
     # Use 5minute to ensure table switch works
     stats = statistics_during_period(
         hass,
@@ -1080,7 +1242,7 @@ def test_weekly_statistics(
     # Ensure future date has not data
     future = dt_util.as_utc(dt_util.parse_datetime("2221-11-01 00:00:00"))
     stats = statistics_during_period(
-        hass, start_time=future, end_time=future, period="month"
+        hass, start_time=future, end_time=future, period="week"
     )
     assert stats == {}
 
@@ -1152,7 +1314,7 @@ def test_monthly_statistics(
     sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
     oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
     oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00"))
-    assert stats == {
+    expected_stats = {
         "test:total_energy_import": [
             {
                 "start": sep_start.timestamp(),
@@ -1170,47 +1332,56 @@ def test_monthly_statistics(
             },
         ]
     }
+    assert stats == expected_stats
 
+    # Get change
     stats = statistics_during_period(
         hass,
-        start_time=zero,
-        statistic_ids=["not", "the", "same", "test:total_energy_import"],
+        start_time=period1,
+        statistic_ids={"test:total_energy_import"},
+        period="month",
+        types={"change"},
+    )
+    assert stats == {
+        "test:total_energy_import": [
+            {
+                "start": sep_start.timestamp(),
+                "end": sep_end.timestamp(),
+                "change": 3.0,
+            },
+            {
+                "start": oct_start.timestamp(),
+                "end": oct_end.timestamp(),
+                "change": 2.0,
+            },
+        ]
+    }
+    # Get data with start during the first period
+    stats = statistics_during_period(
+        hass,
+        start_time=period1 + timedelta(days=1),
+        statistic_ids={"test:total_energy_import"},
         period="month",
     )
-    sep_start = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00"))
-    sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00"))
-    assert stats == {
-        "test:total_energy_import": [
-            {
-                "start": sep_start.timestamp(),
-                "end": sep_end.timestamp(),
-                "last_reset": None,
-                "state": pytest.approx(1.0),
-                "sum": pytest.approx(3.0),
-            },
-            {
-                "start": oct_start.timestamp(),
-                "end": oct_end.timestamp(),
-                "last_reset": None,
-                "state": pytest.approx(3.0),
-                "sum": pytest.approx(5.0),
-            },
-        ]
-    }
+    assert stats == expected_stats
 
+    # Try to get data for entities which do not exist
     stats = statistics_during_period(
         hass,
         start_time=zero,
-        statistic_ids=["not", "the", "same", "test:total_energy_import"],
+        statistic_ids={"not", "the", "same", "test:total_energy_import"},
+        period="month",
+    )
+    assert stats == expected_stats
+
+    # Get only sum
+    stats = statistics_during_period(
+        hass,
+        start_time=zero,
+        statistic_ids={"not", "the", "same", "test:total_energy_import"},
         period="month",
         types={"sum"},
     )
-    sep_start = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00"))
-    sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00"))
     assert stats == {
         "test:total_energy_import": [
             {
@@ -1226,18 +1397,15 @@ def test_monthly_statistics(
         ]
     }
 
+    # Get only sum + convert units
     stats = statistics_during_period(
         hass,
         start_time=zero,
-        statistic_ids=["not", "the", "same", "test:total_energy_import"],
+        statistic_ids={"not", "the", "same", "test:total_energy_import"},
         period="month",
         types={"sum"},
         units={"energy": "Wh"},
     )
-    sep_start = dt_util.as_utc(dt_util.parse_datetime("2021-09-01 00:00:00"))
-    sep_end = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_start = dt_util.as_utc(dt_util.parse_datetime("2021-10-01 00:00:00"))
-    oct_end = dt_util.as_utc(dt_util.parse_datetime("2021-11-01 00:00:00"))
     assert stats == {
         "test:total_energy_import": [
             {
@@ -1354,3 +1522,571 @@ def test_cache_key_for_generate_statistics_at_time_stmt() -> None:
     )
     cache_key_3 = stmt3._generate_cache_key()
     assert cache_key_1 != cache_key_3
+
+
+@pytest.mark.parametrize("timezone", ["America/Regina", "Europe/Vienna", "UTC"])
+@pytest.mark.freeze_time("2022-10-01 00:00:00+00:00")
+def test_change(
+    hass_recorder: Callable[..., HomeAssistant],
+    caplog: pytest.LogCaptureFixture,
+    timezone,
+) -> None:
+    """Test deriving change from sum statistic."""
+    dt_util.set_default_time_zone(dt_util.get_time_zone(timezone))
+
+    hass = hass_recorder()
+    wait_recording_done(hass)
+    assert "Compiling statistics for" not in caplog.text
+    assert "Statistics already compiled" not in caplog.text
+
+    zero = dt_util.utcnow()
+    period1 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 00:00:00"))
+    period2 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 01:00:00"))
+    period3 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 02:00:00"))
+    period4 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 03:00:00"))
+
+    external_statistics = (
+        {
+            "start": period1,
+            "last_reset": None,
+            "state": 0,
+            "sum": 2,
+        },
+        {
+            "start": period2,
+            "last_reset": None,
+            "state": 1,
+            "sum": 3,
+        },
+        {
+            "start": period3,
+            "last_reset": None,
+            "state": 2,
+            "sum": 5,
+        },
+        {
+            "start": period4,
+            "last_reset": None,
+            "state": 3,
+            "sum": 8,
+        },
+    )
+    external_metadata = {
+        "has_mean": False,
+        "has_sum": True,
+        "name": "Total imported energy",
+        "source": "recorder",
+        "statistic_id": "sensor.total_energy_import",
+        "unit_of_measurement": "kWh",
+    }
+
+    async_import_statistics(hass, external_metadata, external_statistics)
+    wait_recording_done(hass)
+    # Get change from far in the past
+    stats = statistics_during_period(
+        hass,
+        zero,
+        period="hour",
+        statistic_ids={"sensor.total_energy_import"},
+        types={"change"},
+    )
+    hour1_start = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 00:00:00"))
+    hour1_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 01:00:00"))
+    hour2_start = hour1_end
+    hour2_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 02:00:00"))
+    hour3_start = hour2_end
+    hour3_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 03:00:00"))
+    hour4_start = hour3_end
+    hour4_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 04:00:00"))
+    expected_stats = {
+        "sensor.total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": 2.0,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 3.0,
+            },
+        ]
+    }
+    assert stats == expected_stats
+
+    # Get change + sum from far in the past
+    stats = statistics_during_period(
+        hass,
+        zero,
+        period="hour",
+        statistic_ids={"sensor.total_energy_import"},
+        types={"change", "sum"},
+    )
+    hour1_start = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 00:00:00"))
+    hour1_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 01:00:00"))
+    hour2_start = hour1_end
+    hour2_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 02:00:00"))
+    hour3_start = hour2_end
+    hour3_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 03:00:00"))
+    hour4_start = hour3_end
+    hour4_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 04:00:00"))
+    expected_stats_change_sum = {
+        "sensor.total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0,
+                "sum": 2.0,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0,
+                "sum": 3.0,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": 2.0,
+                "sum": 5.0,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 3.0,
+                "sum": 8.0,
+            },
+        ]
+    }
+    assert stats == expected_stats_change_sum
+
+    # Get change from far in the past with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    expected_stats_wh = {
+        "sensor.total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0 * 1000,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0 * 1000,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": 2.0 * 1000,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 3.0 * 1000,
+            },
+        ]
+    }
+    assert stats == expected_stats_wh
+
+    # Get change from far in the past with implicit unit conversion
+    hass.states.async_set(
+        "sensor.total_energy_import", "unknown", {"unit_of_measurement": "MWh"}
+    )
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    expected_stats_mwh = {
+        "sensor.total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0 / 1000,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0 / 1000,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": 2.0 / 1000,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 3.0 / 1000,
+            },
+        ]
+    }
+    assert stats == expected_stats_mwh
+    hass.states.async_remove("sensor.total_energy_import")
+
+    # Get change from the first recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == expected_stats
+
+    # Get change from the first recorded hour with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    assert stats == expected_stats_wh
+
+    # Get change from the first recorded hour with implicit unit conversion
+    hass.states.async_set(
+        "sensor.total_energy_import", "unknown", {"unit_of_measurement": "MWh"}
+    )
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == expected_stats_mwh
+    hass.states.async_remove("sensor.total_energy_import")
+
+    # Get change from the second recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "sensor.total_energy_import": expected_stats["sensor.total_energy_import"][1:4]
+    }
+
+    # Get change from the second recorded hour with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    assert stats == {
+        "sensor.total_energy_import": expected_stats_wh["sensor.total_energy_import"][
+            1:4
+        ]
+    }
+
+    # Get change from the second recorded hour with implicit unit conversion
+    hass.states.async_set(
+        "sensor.total_energy_import", "unknown", {"unit_of_measurement": "MWh"}
+    )
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "sensor.total_energy_import": expected_stats_mwh["sensor.total_energy_import"][
+            1:4
+        ]
+    }
+    hass.states.async_remove("sensor.total_energy_import")
+
+    # Get change from the second until the third recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        end_time=hour4_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "sensor.total_energy_import": expected_stats["sensor.total_energy_import"][1:3]
+    }
+
+    # Get change from the fourth recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour4_start,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "sensor.total_energy_import": expected_stats["sensor.total_energy_import"][3:4]
+    }
+
+    # Test change with a far future start date
+    future = dt_util.as_utc(dt_util.parse_datetime("2221-11-01 00:00:00"))
+    stats = statistics_during_period(
+        hass,
+        start_time=future,
+        statistic_ids={"sensor.total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {}
+
+    dt_util.set_default_time_zone(dt_util.get_time_zone("UTC"))
+
+
+@pytest.mark.parametrize("timezone", ["America/Regina", "Europe/Vienna", "UTC"])
+@pytest.mark.freeze_time("2022-10-01 00:00:00+00:00")
+def test_change_with_none(
+    hass_recorder: Callable[..., HomeAssistant],
+    caplog: pytest.LogCaptureFixture,
+    timezone,
+) -> None:
+    """Test deriving change from sum statistic.
+
+    This tests the behavior when some record has None sum. The calculated change
+    is not expected to be correct, but we should not raise on this error.
+    """
+    dt_util.set_default_time_zone(dt_util.get_time_zone(timezone))
+
+    hass = hass_recorder()
+    wait_recording_done(hass)
+    assert "Compiling statistics for" not in caplog.text
+    assert "Statistics already compiled" not in caplog.text
+
+    zero = dt_util.utcnow()
+    period1 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 00:00:00"))
+    period2 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 01:00:00"))
+    period3 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 02:00:00"))
+    period4 = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 03:00:00"))
+
+    external_statistics = (
+        {
+            "start": period1,
+            "last_reset": None,
+            "state": 0,
+            "sum": 2,
+        },
+        {
+            "start": period2,
+            "last_reset": None,
+            "state": 1,
+            "sum": 3,
+        },
+        {
+            "start": period3,
+            "last_reset": None,
+            "state": 2,
+            "sum": None,
+        },
+        {
+            "start": period4,
+            "last_reset": None,
+            "state": 3,
+            "sum": 8,
+        },
+    )
+    external_metadata = {
+        "has_mean": False,
+        "has_sum": True,
+        "name": "Total imported energy",
+        "source": "test",
+        "statistic_id": "test:total_energy_import",
+        "unit_of_measurement": "kWh",
+    }
+
+    async_add_external_statistics(hass, external_metadata, external_statistics)
+    wait_recording_done(hass)
+    # Get change from far in the past
+    stats = statistics_during_period(
+        hass,
+        zero,
+        period="hour",
+        statistic_ids={"test:total_energy_import"},
+        types={"change"},
+    )
+    hour1_start = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 00:00:00"))
+    hour1_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 01:00:00"))
+    hour2_start = hour1_end
+    hour2_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 02:00:00"))
+    hour3_start = hour2_end
+    hour3_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 03:00:00"))
+    hour4_start = hour3_end
+    hour4_end = dt_util.as_utc(dt_util.parse_datetime("2023-05-08 04:00:00"))
+    expected_stats = {
+        "test:total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": None,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 5.0,
+            },
+        ]
+    }
+    assert stats == expected_stats
+
+    # Get change from far in the past with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    expected_stats_wh = {
+        "test:total_energy_import": [
+            {
+                "start": hour1_start.timestamp(),
+                "end": hour1_end.timestamp(),
+                "change": 2.0 * 1000,
+            },
+            {
+                "start": hour2_start.timestamp(),
+                "end": hour2_end.timestamp(),
+                "change": 1.0 * 1000,
+            },
+            {
+                "start": hour3_start.timestamp(),
+                "end": hour3_end.timestamp(),
+                "change": None,
+            },
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 5.0 * 1000,
+            },
+        ]
+    }
+    assert stats == expected_stats_wh
+
+    # Get change from the first recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == expected_stats
+
+    # Get change from the first recorded hour with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour1_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    assert stats == expected_stats_wh
+
+    # Get change from the second recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "test:total_energy_import": expected_stats["test:total_energy_import"][1:4]
+    }
+
+    # Get change from the second recorded hour with unit conversion
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+        units={"energy": "Wh"},
+    )
+    assert stats == {
+        "test:total_energy_import": expected_stats_wh["test:total_energy_import"][1:4]
+    }
+
+    # Get change from the second until the third recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour2_start,
+        end_time=hour4_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "test:total_energy_import": expected_stats["test:total_energy_import"][1:3]
+    }
+
+    # Get change from the fourth recorded hour
+    stats = statistics_during_period(
+        hass,
+        start_time=hour4_start,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {
+        "test:total_energy_import": [
+            {
+                "start": hour4_start.timestamp(),
+                "end": hour4_end.timestamp(),
+                "change": 8.0,  # Assumed to be 8 because the previous hour has no data
+            },
+        ]
+    }
+
+    # Test change with a far future start date
+    future = dt_util.as_utc(dt_util.parse_datetime("2221-11-01 00:00:00"))
+    stats = statistics_during_period(
+        hass,
+        start_time=future,
+        statistic_ids={"test:total_energy_import"},
+        period="hour",
+        types={"change"},
+    )
+    assert stats == {}
+
+    dt_util.set_default_time_zone(dt_util.get_time_zone("UTC"))
diff --git a/tests/components/recorder/test_websocket_api.py b/tests/components/recorder/test_websocket_api.py
index fffeb019822..37f5dc77d00 100644
--- a/tests/components/recorder/test_websocket_api.py
+++ b/tests/components/recorder/test_websocket_api.py
@@ -1037,6 +1037,7 @@ async def test_sum_statistics_during_period_unit_conversion(
             {
                 "start": int(now.timestamp() * 1000),
                 "end": int((now + timedelta(minutes=5)).timestamp() * 1000),
+                "change": pytest.approx(value),
                 "last_reset": None,
                 "state": pytest.approx(value),
                 "sum": pytest.approx(value),
@@ -1062,6 +1063,7 @@ async def test_sum_statistics_during_period_unit_conversion(
             {
                 "start": int(now.timestamp() * 1000),
                 "end": int((now + timedelta(minutes=5)).timestamp() * 1000),
+                "change": pytest.approx(converted_value),
                 "last_reset": None,
                 "state": pytest.approx(converted_value),
                 "sum": pytest.approx(converted_value),
diff --git a/tests/components/sensor/test_recorder.py b/tests/components/sensor/test_recorder.py
index 9b297bf884f..53fb2493d70 100644
--- a/tests/components/sensor/test_recorder.py
+++ b/tests/components/sensor/test_recorder.py
@@ -915,11 +915,11 @@ async def test_compile_hourly_sum_statistics_amount(
     }
     assert stats == expected_stats
 
-    # With an offset of 1 minute, we expect to get all periods
+    # With an offset of 1 minute, we expect to get the 2nd and 3rd periods
     stats = statistics_during_period(
         hass, period0 + timedelta(minutes=1), period="5minute"
     )
-    assert stats == expected_stats
+    assert stats == {"sensor.test1": expected_stats["sensor.test1"][1:3]}
 
     # With an offset of 5 minutes, we expect to get the 2nd and 3rd periods
     stats = statistics_during_period(
@@ -927,11 +927,11 @@ async def test_compile_hourly_sum_statistics_amount(
     )
     assert stats == {"sensor.test1": expected_stats["sensor.test1"][1:3]}
 
-    # With an offset of 6 minutes, we expect to get the 2nd and 3rd periods
+    # With an offset of 6 minutes, we expect to get the 3rd period
     stats = statistics_during_period(
         hass, period0 + timedelta(minutes=6), period="5minute"
     )
-    assert stats == {"sensor.test1": expected_stats["sensor.test1"][1:3]}
+    assert stats == {"sensor.test1": expected_stats["sensor.test1"][2:3]}
 
     assert "Error while processing event StatisticsTask" not in caplog.text
     assert "Detected new cycle for sensor.test1, last_reset set to" in caplog.text