Improve history api performance (#35822)

* Improve history api performance

A new option "minimal_response" reduces the amount of data
sent between the first and last history states to only the
"last_changed" and "state" fields.

Calling to_native is now avoided where possible and only
done at the end for rows that will be returned in the response.

When sending the `minimal_response` option, the history
api now returns a json response similar to the following
for an entity

Testing:

History API Response time for 1 day
Average of 10 runs with minimal_response

Before: 19.89s. (content length : 3427428)
After: 8.44s (content length: 592199)

```
[{
	"attributes": {--TRUNCATED--},
	"context": {--TRUNCATED--},
	"entity_id": "binary_sensor.powerwall_status",
	"last_changed": "2020-05-18T23:20:03.213000+00:00",
	"last_updated": "2020-05-18T23:20:03.213000+00:00",
	"state": "on"
},
...
{
	"last_changed": "2020-05-19T00:41:08Z",
	"state": "unavailable"
},
...
{
	"attributes": {--TRUNCATED--},
	"context": {--TRUNCATED--},
	"entity_id": "binary_sensor.powerwall_status",
	"last_changed": "2020-05-19T00:42:08.069698+00:00",
	"last_updated": "2020-05-19T00:42:08.069698+00:00",
	"state": "on"
}]
```

* Remove impossible state check

* Remove another impossible state check

* Update homeassistant/components/history/__init__.py

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>

* Reorder to save some indent per review

* Make query response make sense with to_native=False

* Update test for 00:00 to Z change

* Update homeassistant/components/recorder/models.py

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>

Co-authored-by: Paulus Schoutsen <paulus@home-assistant.io>
This commit is contained in:
J. Nick Koston 2020-05-26 21:53:56 -05:00 committed by GitHub
parent 6a06d648d7
commit 0a6deeb49b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 194 additions and 37 deletions

View File

@ -12,7 +12,7 @@ import voluptuous as vol
from homeassistant.components import recorder
from homeassistant.components.http import HomeAssistantView
from homeassistant.components.recorder.models import States
from homeassistant.components.recorder.models import DB_TIMEZONE, States
from homeassistant.components.recorder.util import execute, session_scope
from homeassistant.const import (
ATTR_HIDDEN,
@ -22,6 +22,7 @@ from homeassistant.const import (
CONF_INCLUDE,
HTTP_BAD_REQUEST,
)
from homeassistant.core import split_entity_id
import homeassistant.helpers.config_validation as cv
import homeassistant.util.dt as dt_util
@ -32,6 +33,9 @@ _LOGGER = logging.getLogger(__name__)
DOMAIN = "history"
CONF_ORDER = "use_include_order"
STATE_KEY = "state"
LAST_CHANGED_KEY = "last_changed"
CONFIG_SCHEMA = vol.Schema(
{
DOMAIN: recorder.FILTER_SCHEMA.extend(
@ -43,6 +47,9 @@ CONFIG_SCHEMA = vol.Schema(
SIGNIFICANT_DOMAINS = ("climate", "device_tracker", "thermostat", "water_heater")
IGNORE_DOMAINS = ("zone", "scene")
NEED_ATTRIBUTE_DOMAINS = {"climate", "water_heater", "thermostat", "script"}
SCRIPT_DOMAIN = "script"
ATTR_CAN_CANCEL = "can_cancel"
def get_significant_states(hass, *args, **kwargs):
@ -60,6 +67,7 @@ def _get_significant_states(
filters=None,
include_start_time_state=True,
significant_changes_only=True,
minimal_response=False,
):
"""
Return states changes during UTC period start_time - end_time.
@ -87,19 +95,15 @@ def _get_significant_states(
if end_time is not None:
query = query.filter(States.last_updated < end_time)
query = query.order_by(States.last_updated)
query = query.order_by(States.entity_id, States.last_updated)
states = (
state
for state in execute(query)
if (_is_significant(state) and not state.attributes.get(ATTR_HIDDEN, False))
)
states = execute(query, to_native=False)
if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug("get_significant_states took %fs", elapsed)
return _states_to_json(
return _sorted_states_to_json(
hass,
session,
states,
@ -107,6 +111,7 @@ def _get_significant_states(
entity_ids,
filters,
include_start_time_state,
minimal_response,
)
@ -127,9 +132,11 @@ def state_changes_during_period(hass, start_time, end_time=None, entity_id=None)
entity_ids = [entity_id] if entity_id is not None else None
states = execute(query.order_by(States.last_updated))
states = execute(
query.order_by(States.entity_id, States.last_updated), to_native=False
)
return _states_to_json(hass, session, states, start_time, entity_ids)
return _sorted_states_to_json(hass, session, states, start_time, entity_ids)
def get_last_state_changes(hass, number_of_states, entity_id):
@ -146,10 +153,13 @@ def get_last_state_changes(hass, number_of_states, entity_id):
entity_ids = [entity_id] if entity_id is not None else None
states = execute(
query.order_by(States.last_updated.desc()).limit(number_of_states)
query.order_by(States.entity_id, States.last_updated.desc()).limit(
number_of_states
),
to_native=False,
)
return _states_to_json(
return _sorted_states_to_json(
hass,
session,
reversed(states),
@ -252,7 +262,7 @@ def _get_states_with_session(
]
def _states_to_json(
def _sorted_states_to_json(
hass,
session,
states,
@ -260,12 +270,15 @@ def _states_to_json(
entity_ids,
filters=None,
include_start_time_state=True,
minimal_response=False,
):
"""Convert SQL results into JSON friendly data structure.
This takes our state list and turns it into a JSON friendly data
structure {'entity_id': [list of states], 'entity_id2': [list of states]}
States must be sorted by entity_id and last_updated
We also need to go back and create a synthetic zero data point for
each list of states, otherwise our graphs won't start on the Y
axis correctly.
@ -293,7 +306,61 @@ def _states_to_json(
# Append all changes to it
for ent_id, group in groupby(states, lambda state: state.entity_id):
result[ent_id].extend(group)
domain = split_entity_id(ent_id)[0]
ent_results = result[ent_id]
if not minimal_response or domain in NEED_ATTRIBUTE_DOMAINS:
ent_results.extend(
[
native_state
for native_state in (db_state.to_native() for db_state in group)
if (
domain != SCRIPT_DOMAIN
or native_state.attributes.get(ATTR_CAN_CANCEL)
)
and not native_state.attributes.get(ATTR_HIDDEN, False)
]
)
continue
# With minimal response we only provide a native
# State for the first and last response. All the states
# in-between only provide the "state" and the
# "last_changed".
if not ent_results:
ent_results.append(next(group).to_native())
initial_state = ent_results[-1]
prev_state = ent_results[-1]
initial_state_count = len(ent_results)
for db_state in group:
if ATTR_HIDDEN in db_state.attributes and db_state.to_native().attributes.get(
ATTR_HIDDEN, False
):
continue
# With minimal response we do not care about attribute
# changes so we can filter out duplicate states
if db_state.state == prev_state.state:
continue
ent_results.append(
{
STATE_KEY: db_state.state,
LAST_CHANGED_KEY: f"{str(db_state.last_changed).replace(' ','T').split('.')[0]}{DB_TIMEZONE}",
}
)
prev_state = db_state
if (
prev_state
and prev_state != initial_state
and len(ent_results) != initial_state_count
):
# There was at least one state change
# replace the last minimal state with
# a full state
ent_results[-1] = prev_state.to_native()
# Filter out the empty lists if some states had 0 results.
return {key: val for key, val in result.items() if val}
@ -378,6 +445,8 @@ class HistoryPeriodView(HomeAssistantView):
request.query.get("significant_changes_only", "1") != "0"
)
minimal_response = "minimal_response" in request.query
hass = request.app["hass"]
return cast(
@ -390,6 +459,7 @@ class HistoryPeriodView(HomeAssistantView):
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
),
)
@ -401,6 +471,7 @@ class HistoryPeriodView(HomeAssistantView):
entity_ids,
include_start_time_state,
significant_changes_only,
minimal_response,
):
"""Fetch significant stats from the database as json."""
timer_start = time.perf_counter()
@ -415,6 +486,7 @@ class HistoryPeriodView(HomeAssistantView):
self.filters,
include_start_time_state,
significant_changes_only,
minimal_response,
)
result = list(result.values())
@ -500,12 +572,3 @@ class Filters:
if self.excluded_entities:
query = query.filter(~States.entity_id.in_(self.excluded_entities))
return query
def _is_significant(state):
"""Test if state is significant for history charts.
Will only test for things that are not filtered out in SQL.
"""
# scripts that are not cancellable will never change state
return state.domain != "script" or state.attributes.get("can_cancel")

View File

@ -28,6 +28,8 @@ SCHEMA_VERSION = 7
_LOGGER = logging.getLogger(__name__)
DB_TIMEZONE = "Z"
class Events(Base): # type: ignore
"""Event history data."""
@ -64,7 +66,7 @@ class Events(Base): # type: ignore
self.event_type,
json.loads(self.event_data),
EventOrigin(self.origin),
_process_timestamp(self.time_fired),
process_timestamp(self.time_fired),
context=context,
)
except ValueError:
@ -133,8 +135,8 @@ class States(Base): # type: ignore
self.entity_id,
self.state,
json.loads(self.attributes),
_process_timestamp(self.last_changed),
_process_timestamp(self.last_updated),
process_timestamp(self.last_changed),
process_timestamp(self.last_updated),
context=context,
# Temp, because database can still store invalid entity IDs
# Remove with 1.0 or in 2020.
@ -193,7 +195,7 @@ class SchemaChanges(Base): # type: ignore
changed = Column(DateTime(timezone=True), default=dt_util.utcnow)
def _process_timestamp(ts):
def process_timestamp(ts):
"""Process a timestamp into datetime object."""
if ts is None:
return None

View File

@ -54,7 +54,7 @@ def commit(session, work):
return False
def execute(qry):
def execute(qry, to_native=True):
"""Query the database and convert the objects to HA native form.
This method also retries a few times in the case of stale connections.
@ -62,17 +62,25 @@ def execute(qry):
for tryno in range(0, RETRIES):
try:
timer_start = time.perf_counter()
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
if to_native:
result = [
row for row in (row.to_native() for row in qry) if row is not None
]
else:
result = list(qry)
if _LOGGER.isEnabledFor(logging.DEBUG):
elapsed = time.perf_counter() - timer_start
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
if to_native:
_LOGGER.debug(
"converting %d rows to native objects took %fs",
len(result),
elapsed,
)
else:
_LOGGER.debug(
"querying %d rows took %fs", len(result), elapsed,
)
return result
except SQLAlchemyError as err:

View File

@ -1,10 +1,13 @@
"""The tests the History component."""
# pylint: disable=protected-access,invalid-name
from datetime import timedelta
import json
import unittest
from homeassistant.components import history, recorder
from homeassistant.components.recorder.models import process_timestamp
import homeassistant.core as ha
from homeassistant.helpers.json import JSONEncoder
from homeassistant.setup import async_setup_component, setup_component
import homeassistant.util.dt as dt_util
@ -197,6 +200,41 @@ class TestComponentHistory(unittest.TestCase):
)
assert states == hist
def test_get_significant_states_minimal_response(self):
"""Test that only significant states are returned.
When minimal responses is set only the first and
last states return a complete state.
We should get back every thermostat change that
includes an attribute change, but only the state updates for
media player (attribute changes are not significant and not returned).
"""
zero, four, states = self.record_states()
hist = history.get_significant_states(
self.hass, zero, four, filters=history.Filters(), minimal_response=True
)
# The second media_player.test state is reduced
# down to last_changed and state when minimal_response
# is set. We use JSONEncoder to make sure that are
# pre-encoded last_changed is always the same as what
# will happen with encoding a native state
input_state = states["media_player.test"][1]
orig_last_changed = json.dumps(
process_timestamp(input_state.last_changed.replace(microsecond=0)),
cls=JSONEncoder,
).replace('"', "")
if orig_last_changed.endswith("+00:00"):
orig_last_changed = f"{orig_last_changed[:-6]}{recorder.models.DB_TIMEZONE}"
orig_state = input_state.state
states["media_player.test"][1] = {
"last_changed": orig_last_changed,
"state": orig_state,
}
assert states == hist
def test_get_significant_states_with_initial(self):
"""Test that only significant states are returned.
@ -252,6 +290,7 @@ class TestComponentHistory(unittest.TestCase):
"""Test that only significant states are returned for one entity."""
zero, four, states = self.record_states()
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -265,6 +304,7 @@ class TestComponentHistory(unittest.TestCase):
"""Test that only significant states are returned for one entity."""
zero, four, states = self.record_states()
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -286,6 +326,7 @@ class TestComponentHistory(unittest.TestCase):
zero, four, states = self.record_states()
del states["media_player.test"]
del states["media_player.test2"]
del states["media_player.test3"]
config = history.CONFIG_SCHEMA(
{
@ -346,6 +387,7 @@ class TestComponentHistory(unittest.TestCase):
"""
zero, four, states = self.record_states()
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -372,6 +414,7 @@ class TestComponentHistory(unittest.TestCase):
zero, four, states = self.record_states()
del states["media_player.test"]
del states["media_player.test2"]
del states["media_player.test3"]
config = history.CONFIG_SCHEMA(
{
@ -392,6 +435,7 @@ class TestComponentHistory(unittest.TestCase):
"""
zero, four, states = self.record_states()
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -414,6 +458,7 @@ class TestComponentHistory(unittest.TestCase):
"""
zero, four, states = self.record_states()
del states["media_player.test2"]
del states["media_player.test3"]
del states["script.can_cancel_this_one"]
config = history.CONFIG_SCHEMA(
@ -438,6 +483,7 @@ class TestComponentHistory(unittest.TestCase):
zero, four, states = self.record_states()
del states["media_player.test"]
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -462,6 +508,7 @@ class TestComponentHistory(unittest.TestCase):
zero, four, states = self.record_states()
del states["media_player.test"]
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
@ -607,6 +654,7 @@ class TestComponentHistory(unittest.TestCase):
self.init_recorder()
mp = "media_player.test"
mp2 = "media_player.test2"
mp3 = "media_player.test3"
therm = "thermostat.test"
therm2 = "thermostat.test2"
zone = "zone.home"
@ -625,7 +673,7 @@ class TestComponentHistory(unittest.TestCase):
three = two + timedelta(seconds=1)
four = three + timedelta(seconds=1)
states = {therm: [], therm2: [], mp: [], mp2: [], script_c: []}
states = {therm: [], therm2: [], mp: [], mp2: [], mp3: [], script_c: []}
with patch(
"homeassistant.components.recorder.dt_util.utcnow", return_value=one
):
@ -638,6 +686,9 @@ class TestComponentHistory(unittest.TestCase):
states[mp2].append(
set_state(mp2, "YouTube", attributes={"media_title": str(sentinel.mt2)})
)
states[mp3].append(
set_state(mp3, "idle", attributes={"media_title": str(sentinel.mt1)})
)
states[therm].append(
set_state(therm, 20, attributes={"current_temperature": 19.5})
)
@ -647,6 +698,12 @@ class TestComponentHistory(unittest.TestCase):
):
# This state will be skipped only different in time
set_state(mp, "YouTube", attributes={"media_title": str(sentinel.mt3)})
# This state will be skipped as it hidden
set_state(
mp3,
"Apple TV",
attributes={"media_title": str(sentinel.mt2), "hidden": True},
)
# This state will be skipped because domain blacklisted
set_state(zone, "zoning")
set_state(script_nc, "off")
@ -666,6 +723,9 @@ class TestComponentHistory(unittest.TestCase):
states[mp].append(
set_state(mp, "Netflix", attributes={"media_title": str(sentinel.mt4)})
)
states[mp3].append(
set_state(mp3, "Netflix", attributes={"media_title": str(sentinel.mt3)})
)
# Attributes changed even though state is the same
states[therm].append(
set_state(therm, 21, attributes={"current_temperature": 20})
@ -686,6 +746,30 @@ async def test_fetch_period_api(hass, hass_client):
assert response.status == 200
async def test_fetch_period_api_with_use_include_order(hass, hass_client):
"""Test the fetch period view for history with include order."""
await hass.async_add_executor_job(init_recorder_component, hass)
await async_setup_component(
hass, "history", {history.DOMAIN: {history.CONF_ORDER: True}}
)
await hass.async_add_job(hass.data[recorder.DATA_INSTANCE].block_till_done)
client = await hass_client()
response = await client.get(f"/api/history/period/{dt_util.utcnow().isoformat()}")
assert response.status == 200
async def test_fetch_period_api_with_minimal_response(hass, hass_client):
"""Test the fetch period view for history with minimal_response."""
await hass.async_add_executor_job(init_recorder_component, hass)
await async_setup_component(hass, "history", {})
await hass.async_add_job(hass.data[recorder.DATA_INSTANCE].block_till_done)
client = await hass_client()
response = await client.get(
f"/api/history/period/{dt_util.utcnow().isoformat()}?minimal_response"
)
assert response.status == 200
async def test_fetch_period_api_with_include_order(hass, hass_client):
"""Test the fetch period view for history."""
await hass.async_add_executor_job(init_recorder_component, hass)