Improve history api performance (#35822)

* Improve history api performance A new option "minimal_response" reduces the amount of data sent between the first and last history states to only the "last_changed" and "state" fields. Calling to_native is now avoided where possible and only done at the end for rows that will be returned in the response. When sending the `minimal_response` option, the history api now returns a json response similar to the following for an entity Testing: History API Response time for 1 day Average of 10 runs with minimal_response Before: 19.89s. (content length : 3427428) After: 8.44s (content length: 592199) ``` [{ "attributes": {--TRUNCATED--}, "context": {--TRUNCATED--}, "entity_id": "binary_sensor.powerwall_status", "last_changed": "2020-05-18T23:20:03.213000+00:00", "last_updated": "2020-05-18T23:20:03.213000+00:00", "state": "on" }, ... { "last_changed": "2020-05-19T00:41:08Z", "state": "unavailable" }, ... { "attributes": {--TRUNCATED--}, "context": {--TRUNCATED--}, "entity_id": "binary_sensor.powerwall_status", "last_changed": "2020-05-19T00:42:08.069698+00:00", "last_updated": "2020-05-19T00:42:08.069698+00:00", "state": "on" }] ``` * Remove impossible state check * Remove another impossible state check * Update homeassistant/components/history/__init__.py Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io> * Reorder to save some indent per review * Make query response make sense with to_native=False * Update test for 00:00 to Z change * Update homeassistant/components/recorder/models.py Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io> Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
2025-07-23 13:17:32 +00:00 · 2020-05-26 21:53:56 -05:00 · 2020-05-26 21:53:56 -05:00 · 0a6deeb49b
commit 0a6deeb49b
parent 6a06d648d7
4 changed files with 194 additions and 37 deletions
--- a/homeassistant/components/history/init.py
+++ b/homeassistant/components/history/init.py
@ -12,7 +12,7 @@ import voluptuous as vol

 from homeassistant.components import recorder
 from homeassistant.components.http import HomeAssistantView
-from homeassistant.components.recorder.models import States
+from homeassistant.components.recorder.models import DB_TIMEZONE, States
 from homeassistant.components.recorder.util import execute, session_scope
 from homeassistant.const import (
    ATTR_HIDDEN,
@ -22,6 +22,7 @@ from homeassistant.const import (
    CONF_INCLUDE,
    HTTP_BAD_REQUEST,
 )
+from homeassistant.core import split_entity_id
 import homeassistant.helpers.config_validation as cv
 import homeassistant.util.dt as dt_util

@ -32,6 +33,9 @@ _LOGGER = logging.getLogger(__name__)
 DOMAIN = "history"
 CONF_ORDER = "use_include_order"

+STATE_KEY = "state"
+LAST_CHANGED_KEY = "last_changed"
+
 CONFIG_SCHEMA = vol.Schema(
    {
        DOMAIN: recorder.FILTER_SCHEMA.extend(
@ -43,6 +47,9 @@ CONFIG_SCHEMA = vol.Schema(

 SIGNIFICANT_DOMAINS = ("climate", "device_tracker", "thermostat", "water_heater")
 IGNORE_DOMAINS = ("zone", "scene")
+NEED_ATTRIBUTE_DOMAINS = {"climate", "water_heater", "thermostat", "script"}
+SCRIPT_DOMAIN = "script"
+ATTR_CAN_CANCEL = "can_cancel"


 def get_significant_states(hass, *args, **kwargs):
@ -60,6 +67,7 @@ def _get_significant_states(
    filters=None,
    include_start_time_state=True,
    significant_changes_only=True,
+    minimal_response=False,
 ):
    """
    Return states changes during UTC period start_time - end_time.
@ -87,19 +95,15 @@ def _get_significant_states(
    if end_time is not None:
        query = query.filter(States.last_updated < end_time)

-    query = query.order_by(States.last_updated)
+    query = query.order_by(States.entity_id, States.last_updated)

-    states = (
-        state
-        for state in execute(query)
-        if (_is_significant(state) and not state.attributes.get(ATTR_HIDDEN, False))
-    )
+    states = execute(query, to_native=False)

    if _LOGGER.isEnabledFor(logging.DEBUG):
        elapsed = time.perf_counter() - timer_start
        _LOGGER.debug("get_significant_states took %fs", elapsed)

-    return _states_to_json(
+    return _sorted_states_to_json(
        hass,
        session,
        states,
@ -107,6 +111,7 @@ def _get_significant_states(
        entity_ids,
        filters,
        include_start_time_state,
+        minimal_response,
    )


@ -127,9 +132,11 @@ def state_changes_during_period(hass, start_time, end_time=None, entity_id=None)

        entity_ids = [entity_id] if entity_id is not None else None

-        states = execute(query.order_by(States.last_updated))
+        states = execute(
+            query.order_by(States.entity_id, States.last_updated), to_native=False
+        )

-        return _states_to_json(hass, session, states, start_time, entity_ids)
+        return _sorted_states_to_json(hass, session, states, start_time, entity_ids)


 def get_last_state_changes(hass, number_of_states, entity_id):
@ -146,10 +153,13 @@ def get_last_state_changes(hass, number_of_states, entity_id):
        entity_ids = [entity_id] if entity_id is not None else None

        states = execute(
-            query.order_by(States.last_updated.desc()).limit(number_of_states)
+            query.order_by(States.entity_id, States.last_updated.desc()).limit(
+                number_of_states
+            ),
+            to_native=False,
        )

-        return _states_to_json(
+        return _sorted_states_to_json(
            hass,
            session,
            reversed(states),
@ -252,7 +262,7 @@ def _get_states_with_session(
    ]


-def _states_to_json(
+def _sorted_states_to_json(
    hass,
    session,
    states,
@ -260,12 +270,15 @@ def _states_to_json(
    entity_ids,
    filters=None,
    include_start_time_state=True,
+    minimal_response=False,
 ):
    """Convert SQL results into JSON friendly data structure.

    This takes our state list and turns it into a JSON friendly data
    structure {'entity_id': [list of states], 'entity_id2': [list of states]}

+    States must be sorted by entity_id and last_updated
+
    We also need to go back and create a synthetic zero data point for
    each list of states, otherwise our graphs won't start on the Y
    axis correctly.
@ -293,7 +306,61 @@ def _states_to_json(

    # Append all changes to it
    for ent_id, group in groupby(states, lambda state: state.entity_id):
-        result[ent_id].extend(group)
+        domain = split_entity_id(ent_id)[0]
+        ent_results = result[ent_id]
+        if not minimal_response or domain in NEED_ATTRIBUTE_DOMAINS:
+            ent_results.extend(
+                [
+                    native_state
+                    for native_state in (db_state.to_native() for db_state in group)
+                    if (
+                        domain != SCRIPT_DOMAIN
+                        or native_state.attributes.get(ATTR_CAN_CANCEL)
+                    )
+                    and not native_state.attributes.get(ATTR_HIDDEN, False)
+                ]
+            )
+            continue
+
+        # With minimal response we only provide a native
+        # State for the first and last response. All the states
+        # in-between only provide the "state" and the
+        # "last_changed".
+        if not ent_results:
+            ent_results.append(next(group).to_native())
+
+        initial_state = ent_results[-1]
+        prev_state = ent_results[-1]
+        initial_state_count = len(ent_results)
+
+        for db_state in group:
+            if ATTR_HIDDEN in db_state.attributes and db_state.to_native().attributes.get(
+                ATTR_HIDDEN, False
+            ):
+                continue
+
+            # With minimal response we do not care about attribute
+            # changes so we can filter out duplicate states
+            if db_state.state == prev_state.state:
+                continue
+
+            ent_results.append(
+                {
+                    STATE_KEY: db_state.state,
+                    LAST_CHANGED_KEY: f"{str(db_state.last_changed).replace(' ','T').split('.')[0]}{DB_TIMEZONE}",
+                }
+            )
+            prev_state = db_state
+
+        if (
+            prev_state
+            and prev_state != initial_state
+            and len(ent_results) != initial_state_count
+        ):
+            # There was at least one state change
+            # replace the last minimal state with
+            # a full state
+            ent_results[-1] = prev_state.to_native()

    # Filter out the empty lists if some states had 0 results.
    return {key: val for key, val in result.items() if val}
@ -378,6 +445,8 @@ class HistoryPeriodView(HomeAssistantView):
            request.query.get("significant_changes_only", "1") != "0"
        )

+        minimal_response = "minimal_response" in request.query
+
        hass = request.app["hass"]

        return cast(
@ -390,6 +459,7 @@ class HistoryPeriodView(HomeAssistantView):
                entity_ids,
                include_start_time_state,
                significant_changes_only,
+                minimal_response,
            ),
        )

@ -401,6 +471,7 @@ class HistoryPeriodView(HomeAssistantView):
        entity_ids,
        include_start_time_state,
        significant_changes_only,
+        minimal_response,
    ):
        """Fetch significant stats from the database as json."""
        timer_start = time.perf_counter()
@ -415,6 +486,7 @@ class HistoryPeriodView(HomeAssistantView):
                self.filters,
                include_start_time_state,
                significant_changes_only,
+                minimal_response,
            )

        result = list(result.values())
@ -500,12 +572,3 @@ class Filters:
        if self.excluded_entities:
            query = query.filter(~States.entity_id.in_(self.excluded_entities))
        return query
-
-
-def _is_significant(state):
-    """Test if state is significant for history charts.
-
-    Will only test for things that are not filtered out in SQL.
-    """
-    # scripts that are not cancellable will never change state
-    return state.domain != "script" or state.attributes.get("can_cancel")
--- a/homeassistant/components/recorder/models.py
+++ b/homeassistant/components/recorder/models.py
@ -28,6 +28,8 @@ SCHEMA_VERSION = 7

 _LOGGER = logging.getLogger(__name__)

+DB_TIMEZONE = "Z"
+

 class Events(Base):  # type: ignore
    """Event history data."""
@ -64,7 +66,7 @@ class Events(Base):  # type: ignore
                self.event_type,
                json.loads(self.event_data),
                EventOrigin(self.origin),
-                _process_timestamp(self.time_fired),
+                process_timestamp(self.time_fired),
                context=context,
            )
        except ValueError:
@ -133,8 +135,8 @@ class States(Base):  # type: ignore
                self.entity_id,
                self.state,
                json.loads(self.attributes),
-                _process_timestamp(self.last_changed),
-                _process_timestamp(self.last_updated),
+                process_timestamp(self.last_changed),
+                process_timestamp(self.last_updated),
                context=context,
                # Temp, because database can still store invalid entity IDs
                # Remove with 1.0 or in 2020.
@ -193,7 +195,7 @@ class SchemaChanges(Base):  # type: ignore
    changed = Column(DateTime(timezone=True), default=dt_util.utcnow)


-def _process_timestamp(ts):
+def process_timestamp(ts):
    """Process a timestamp into datetime object."""
    if ts is None:
        return None
--- a/homeassistant/components/recorder/util.py
+++ b/homeassistant/components/recorder/util.py
@ -54,7 +54,7 @@ def commit(session, work):
    return False


-def execute(qry):
+def execute(qry, to_native=True):
    """Query the database and convert the objects to HA native form.

    This method also retries a few times in the case of stale connections.
@ -62,17 +62,25 @@ def execute(qry):
    for tryno in range(0, RETRIES):
        try:
            timer_start = time.perf_counter()
-            result = [
-                row for row in (row.to_native() for row in qry) if row is not None
-            ]
+            if to_native:
+                result = [
+                    row for row in (row.to_native() for row in qry) if row is not None
+                ]
+            else:
+                result = list(qry)

            if _LOGGER.isEnabledFor(logging.DEBUG):
                elapsed = time.perf_counter() - timer_start
-                _LOGGER.debug(
-                    "converting %d rows to native objects took %fs",
-                    len(result),
-                    elapsed,
-                )
+                if to_native:
+                    _LOGGER.debug(
+                        "converting %d rows to native objects took %fs",
+                        len(result),
+                        elapsed,
+                    )
+                else:
+                    _LOGGER.debug(
+                        "querying %d rows took %fs", len(result), elapsed,
+                    )

            return result
        except SQLAlchemyError as err:
--- a/tests/components/history/test_init.py
+++ b/tests/components/history/test_init.py
@ -1,10 +1,13 @@
 """The tests the History component."""
 # pylint: disable=protected-access,invalid-name
 from datetime import timedelta
+import json
 import unittest

 from homeassistant.components import history, recorder
+from homeassistant.components.recorder.models import process_timestamp
 import homeassistant.core as ha
+from homeassistant.helpers.json import JSONEncoder
 from homeassistant.setup import async_setup_component, setup_component
 import homeassistant.util.dt as dt_util

@ -197,6 +200,41 @@ class TestComponentHistory(unittest.TestCase):
        )
        assert states == hist

+    def test_get_significant_states_minimal_response(self):
+        """Test that only significant states are returned.
+
+        When minimal responses is set only the first and
+        last states return a complete state.
+
+        We should get back every thermostat change that
+        includes an attribute change, but only the state updates for
+        media player (attribute changes are not significant and not returned).
+        """
+        zero, four, states = self.record_states()
+        hist = history.get_significant_states(
+            self.hass, zero, four, filters=history.Filters(), minimal_response=True
+        )
+
+        # The second media_player.test state is reduced
+        # down to last_changed and state when minimal_response
+        # is set.  We use JSONEncoder to make sure that are
+        # pre-encoded last_changed is always the same as what
+        # will happen with encoding a native state
+        input_state = states["media_player.test"][1]
+        orig_last_changed = json.dumps(
+            process_timestamp(input_state.last_changed.replace(microsecond=0)),
+            cls=JSONEncoder,
+        ).replace('"', "")
+        if orig_last_changed.endswith("+00:00"):
+            orig_last_changed = f"{orig_last_changed[:-6]}{recorder.models.DB_TIMEZONE}"
+        orig_state = input_state.state
+        states["media_player.test"][1] = {
+            "last_changed": orig_last_changed,
+            "state": orig_state,
+        }
+
+        assert states == hist
+
    def test_get_significant_states_with_initial(self):
        """Test that only significant states are returned.

@ -252,6 +290,7 @@ class TestComponentHistory(unittest.TestCase):
        """Test that only significant states are returned for one entity."""
        zero, four, states = self.record_states()
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]
@ -265,6 +304,7 @@ class TestComponentHistory(unittest.TestCase):
        """Test that only significant states are returned for one entity."""
        zero, four, states = self.record_states()
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]

@ -286,6 +326,7 @@ class TestComponentHistory(unittest.TestCase):
        zero, four, states = self.record_states()
        del states["media_player.test"]
        del states["media_player.test2"]
+        del states["media_player.test3"]

        config = history.CONFIG_SCHEMA(
            {
@ -346,6 +387,7 @@ class TestComponentHistory(unittest.TestCase):
        """
        zero, four, states = self.record_states()
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]
@ -372,6 +414,7 @@ class TestComponentHistory(unittest.TestCase):
        zero, four, states = self.record_states()
        del states["media_player.test"]
        del states["media_player.test2"]
+        del states["media_player.test3"]

        config = history.CONFIG_SCHEMA(
            {
@ -392,6 +435,7 @@ class TestComponentHistory(unittest.TestCase):
        """
        zero, four, states = self.record_states()
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]
@ -414,6 +458,7 @@ class TestComponentHistory(unittest.TestCase):
        """
        zero, four, states = self.record_states()
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["script.can_cancel_this_one"]

        config = history.CONFIG_SCHEMA(
@ -438,6 +483,7 @@ class TestComponentHistory(unittest.TestCase):
        zero, four, states = self.record_states()
        del states["media_player.test"]
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]
@ -462,6 +508,7 @@ class TestComponentHistory(unittest.TestCase):
        zero, four, states = self.record_states()
        del states["media_player.test"]
        del states["media_player.test2"]
+        del states["media_player.test3"]
        del states["thermostat.test"]
        del states["thermostat.test2"]
        del states["script.can_cancel_this_one"]
@ -607,6 +654,7 @@ class TestComponentHistory(unittest.TestCase):
        self.init_recorder()
        mp = "media_player.test"
        mp2 = "media_player.test2"
+        mp3 = "media_player.test3"
        therm = "thermostat.test"
        therm2 = "thermostat.test2"
        zone = "zone.home"
@ -625,7 +673,7 @@ class TestComponentHistory(unittest.TestCase):
        three = two + timedelta(seconds=1)
        four = three + timedelta(seconds=1)

-        states = {therm: [], therm2: [], mp: [], mp2: [], script_c: []}
+        states = {therm: [], therm2: [], mp: [], mp2: [], mp3: [], script_c: []}
        with patch(
            "homeassistant.components.recorder.dt_util.utcnow", return_value=one
        ):
@ -638,6 +686,9 @@ class TestComponentHistory(unittest.TestCase):
            states[mp2].append(
                set_state(mp2, "YouTube", attributes={"media_title": str(sentinel.mt2)})
            )
+            states[mp3].append(
+                set_state(mp3, "idle", attributes={"media_title": str(sentinel.mt1)})
+            )
            states[therm].append(
                set_state(therm, 20, attributes={"current_temperature": 19.5})
            )
@ -647,6 +698,12 @@ class TestComponentHistory(unittest.TestCase):
        ):
            # This state will be skipped only different in time
            set_state(mp, "YouTube", attributes={"media_title": str(sentinel.mt3)})
+            # This state will be skipped as it hidden
+            set_state(
+                mp3,
+                "Apple TV",
+                attributes={"media_title": str(sentinel.mt2), "hidden": True},
+            )
            # This state will be skipped because domain blacklisted
            set_state(zone, "zoning")
            set_state(script_nc, "off")
@ -666,6 +723,9 @@ class TestComponentHistory(unittest.TestCase):
            states[mp].append(
                set_state(mp, "Netflix", attributes={"media_title": str(sentinel.mt4)})
            )
+            states[mp3].append(
+                set_state(mp3, "Netflix", attributes={"media_title": str(sentinel.mt3)})
+            )
            # Attributes changed even though state is the same
            states[therm].append(
                set_state(therm, 21, attributes={"current_temperature": 20})
@ -686,6 +746,30 @@ async def test_fetch_period_api(hass, hass_client):
    assert response.status == 200


+async def test_fetch_period_api_with_use_include_order(hass, hass_client):
+    """Test the fetch period view for history with include order."""
+    await hass.async_add_executor_job(init_recorder_component, hass)
+    await async_setup_component(
+        hass, "history", {history.DOMAIN: {history.CONF_ORDER: True}}
+    )
+    await hass.async_add_job(hass.data[recorder.DATA_INSTANCE].block_till_done)
+    client = await hass_client()
+    response = await client.get(f"/api/history/period/{dt_util.utcnow().isoformat()}")
+    assert response.status == 200
+
+
+async def test_fetch_period_api_with_minimal_response(hass, hass_client):
+    """Test the fetch period view for history with minimal_response."""
+    await hass.async_add_executor_job(init_recorder_component, hass)
+    await async_setup_component(hass, "history", {})
+    await hass.async_add_job(hass.data[recorder.DATA_INSTANCE].block_till_done)
+    client = await hass_client()
+    response = await client.get(
+        f"/api/history/period/{dt_util.utcnow().isoformat()}?minimal_response"
+    )
+    assert response.status == 200
+
+
 async def test_fetch_period_api_with_include_order(hass, hass_client):
    """Test the fetch period view for history."""
    await hass.async_add_executor_job(init_recorder_component, hass)