From 438edc5ca115a3a02bbc3887bdaafaf8ab24eb8e Mon Sep 17 00:00:00 2001
From: Greg Laabs <OverloadUT@gmail.com>
Date: Wed, 26 Jul 2017 08:22:01 -0700
Subject: [PATCH] History performance improvements for single-entity requests
 (#8632)

* Bugfix: remove superfluous domain filter

This filter is already applied later in the function by the `filters` object, where it is conditionally applied when appropriate. This fixes the problem where we get a domain filter even when searching for a single entity_id, which needlessly harms the query's performance.

* Performance: build different query when only getting single entity

When querying the history of a single entity, we can use an entirely different method for the "synthetic zero data point" by simply sorting by date and doing a LIMIT 1. This performs thousands of times better than the multi-entity query when the current recorder_run has been going for a while.

* Add entity_id filter to single-entity request

The entity_id filter was handled inside the `filters.apply` logic which is used in most cases, BUT didn't work when no `filters` was passed in to the method. Now it'll work even if no `filters` object is passed in.

* Fix linting errors in history.py

* Undo removal of domain filter

Putting back the domain filter that was removed in 76a6371705dcd57483e55dcc03435ae867c184d2 - there are use-cases where get_states is called without a filter object, so we need the domain filter to work in those cases as well.

* Fix truncated comment
---
 homeassistant/components/history.py | 45 ++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/homeassistant/components/history.py b/homeassistant/components/history.py
index 9800a15c16b..893ff23df35 100644
--- a/homeassistant/components/history.py
+++ b/homeassistant/components/history.py
@@ -119,19 +119,42 @@ def get_states(hass, utc_point_in_time, entity_ids=None, run=None,
     from sqlalchemy import and_, func
 
     with session_scope(hass=hass) as session:
-        most_recent_state_ids = session.query(
-            func.max(States.state_id).label('max_state_id')
-        ).filter(
-            (States.created >= run.start) &
-            (States.created < utc_point_in_time) &
-            (~States.domain.in_(IGNORE_DOMAINS)))
+        if entity_ids and len(entity_ids) == 1:
+            # Use an entirely different (and extremely fast) query if we only
+            # have a single entity id
+            most_recent_state_ids = session.query(
+                States.state_id.label('max_state_id')
+            ).filter(
+                (States.created < utc_point_in_time) &
+                (States.entity_id.in_(entity_ids))
+            ).order_by(
+                States.created.desc())
 
-        if filters:
-            most_recent_state_ids = filters.apply(most_recent_state_ids,
-                                                  entity_ids)
+            if filters:
+                most_recent_state_ids = filters.apply(most_recent_state_ids,
+                                                      entity_ids)
 
-        most_recent_state_ids = most_recent_state_ids.group_by(
-            States.entity_id).subquery()
+            most_recent_state_ids = most_recent_state_ids.limit(1)
+
+        else:
+            # We have more than one entity to look at (most commonly we want
+            # all entities,) so we need to do a search on all states since the
+            # last recorder run started.
+            most_recent_state_ids = session.query(
+                func.max(States.state_id).label('max_state_id')
+            ).filter(
+                (States.created >= run.start) &
+                (States.created < utc_point_in_time) &
+                (~States.domain.in_(IGNORE_DOMAINS)))
+
+            if filters:
+                most_recent_state_ids = filters.apply(most_recent_state_ids,
+                                                      entity_ids)
+
+            most_recent_state_ids = most_recent_state_ids.group_by(
+                States.entity_id)
+
+        most_recent_state_ids = most_recent_state_ids.subquery()
 
         query = session.query(States).join(most_recent_state_ids, and_(
             States.state_id == most_recent_state_ids.c.max_state_id))