From 438edc5ca115a3a02bbc3887bdaafaf8ab24eb8e Mon Sep 17 00:00:00 2001 From: Greg Laabs Date: Wed, 26 Jul 2017 08:22:01 -0700 Subject: [PATCH] History performance improvements for single-entity requests (#8632) * Bugfix: remove superfluous domain filter This filter is already applied later in the function by the `filters` object, where it is conditionally applied when appropriate. This fixes the problem where we get a domain filter even when searching for a single entity_id, which needlessly harms the query's performance. * Performance: build different query when only getting single entity When querying the history of a single entity, we can use an entirely different method for the "synthetic zero data point" by simply sorting by date and doing a LIMIT 1. This performs thousands of times better than the multi-entity query when the current recorder_run has been going for a while. * Add entity_id filter to single-entity request The entity_id filter was handled inside the `filters.apply` logic which is used in most cases, BUT didn't work when no `filters` was passed in to the method. Now it'll work even if no `filters` object is passed in. * Fix linting errors in history.py * Undo removal of domain filter Putting back the domain filter that was removed in 76a6371705dcd57483e55dcc03435ae867c184d2 - there are use-cases where get_states is called without a filter object, so we need the domain filter to work in those cases as well. * Fix truncated comment --- homeassistant/components/history.py | 45 ++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/homeassistant/components/history.py b/homeassistant/components/history.py index 9800a15c16b..893ff23df35 100644 --- a/homeassistant/components/history.py +++ b/homeassistant/components/history.py @@ -119,19 +119,42 @@ def get_states(hass, utc_point_in_time, entity_ids=None, run=None, from sqlalchemy import and_, func with session_scope(hass=hass) as session: - most_recent_state_ids = session.query( - func.max(States.state_id).label('max_state_id') - ).filter( - (States.created >= run.start) & - (States.created < utc_point_in_time) & - (~States.domain.in_(IGNORE_DOMAINS))) + if entity_ids and len(entity_ids) == 1: + # Use an entirely different (and extremely fast) query if we only + # have a single entity id + most_recent_state_ids = session.query( + States.state_id.label('max_state_id') + ).filter( + (States.created < utc_point_in_time) & + (States.entity_id.in_(entity_ids)) + ).order_by( + States.created.desc()) - if filters: - most_recent_state_ids = filters.apply(most_recent_state_ids, - entity_ids) + if filters: + most_recent_state_ids = filters.apply(most_recent_state_ids, + entity_ids) - most_recent_state_ids = most_recent_state_ids.group_by( - States.entity_id).subquery() + most_recent_state_ids = most_recent_state_ids.limit(1) + + else: + # We have more than one entity to look at (most commonly we want + # all entities,) so we need to do a search on all states since the + # last recorder run started. + most_recent_state_ids = session.query( + func.max(States.state_id).label('max_state_id') + ).filter( + (States.created >= run.start) & + (States.created < utc_point_in_time) & + (~States.domain.in_(IGNORE_DOMAINS))) + + if filters: + most_recent_state_ids = filters.apply(most_recent_state_ids, + entity_ids) + + most_recent_state_ids = most_recent_state_ids.group_by( + States.entity_id) + + most_recent_state_ids = most_recent_state_ids.subquery() query = session.query(States).join(most_recent_state_ids, and_( States.state_id == most_recent_state_ids.c.max_state_id))