From 6bc07298d3131c9aa10c9513b04a9020eaa14eb1 Mon Sep 17 00:00:00 2001
From: Aliaksandr <minchik@gmail.com>
Date: Wed, 19 Jul 2017 10:14:48 +0300
Subject: [PATCH] [media_extractor] Add support for custom stream queries for
 media_extractor (#8538)

* Add support for different stream formats

* Encapsulate logic inside MediaExtractor class

* Add CONFIG_SCHEMA

* Fix for cases when youtube-dl returns content of playlist as list
---
 homeassistant/components/media_extractor.py | 190 ++++++++++++++------
 1 file changed, 139 insertions(+), 51 deletions(-)

diff --git a/homeassistant/components/media_extractor.py b/homeassistant/components/media_extractor.py
index c7d019973a3..559af46e9f6 100644
--- a/homeassistant/components/media_extractor.py
+++ b/homeassistant/components/media_extractor.py
@@ -6,11 +6,14 @@ https://home-assistant.io/components/media_extractor/
 """
 import logging
 import os
+import voluptuous as vol
 
 from homeassistant.components.media_player import (
-    ATTR_MEDIA_CONTENT_ID, DOMAIN as MEDIA_PLAYER_DOMAIN,
-    MEDIA_PLAYER_PLAY_MEDIA_SCHEMA, SERVICE_PLAY_MEDIA)
+    ATTR_ENTITY_ID, ATTR_MEDIA_CONTENT_ID, ATTR_MEDIA_CONTENT_TYPE,
+    DOMAIN as MEDIA_PLAYER_DOMAIN, MEDIA_PLAYER_PLAY_MEDIA_SCHEMA,
+    SERVICE_PLAY_MEDIA)
 from homeassistant.config import load_yaml_config_file
+from homeassistant.helpers import config_validation as cv
 
 REQUIREMENTS = ['youtube_dl==2017.7.9']
 
@@ -19,6 +22,18 @@ _LOGGER = logging.getLogger(__name__)
 DOMAIN = 'media_extractor'
 DEPENDENCIES = ['media_player']
 
+CONF_CUSTOMIZE_ENTITIES = 'customize'
+CONF_DEFAULT_STREAM_QUERY = 'default_query'
+DEFAULT_STREAM_QUERY = 'best'
+
+CONFIG_SCHEMA = vol.Schema({
+    DOMAIN: vol.Schema({
+        vol.Optional(CONF_DEFAULT_STREAM_QUERY): cv.string,
+        vol.Optional(CONF_CUSTOMIZE_ENTITIES):
+            vol.Schema({cv.entity_id: vol.Schema({cv.string: cv.string})}),
+    }),
+}, extra=vol.ALLOW_EXTRA)
+
 
 def setup(hass, config):
     """Set up the media extractor service."""
@@ -28,23 +43,7 @@ def setup(hass, config):
 
     def play_media(call):
         """Get stream URL and send it to the media_player.play_media."""
-        media_url = call.data.get(ATTR_MEDIA_CONTENT_ID)
-
-        try:
-            stream_url = get_media_stream_url(media_url)
-        except YDException:
-            _LOGGER.error("Could not retrieve data for the URL: %s",
-                          media_url)
-            return
-        else:
-            data = {k: v for k, v in call.data.items()
-                    if k != ATTR_MEDIA_CONTENT_ID}
-            data[ATTR_MEDIA_CONTENT_ID] = stream_url
-
-            hass.async_add_job(
-                hass.services.async_call(
-                    MEDIA_PLAYER_DOMAIN, SERVICE_PLAY_MEDIA, data)
-            )
+        MediaExtractor(hass, config[DOMAIN], call.data).extract_and_send()
 
     hass.services.register(DOMAIN,
                            SERVICE_PLAY_MEDIA,
@@ -55,47 +54,136 @@ def setup(hass, config):
     return True
 
 
-class YDException(Exception):
-    """General service exception."""
+class MEDownloadException(Exception):
+    """Media extractor download exception."""
 
     pass
 
 
-def get_media_stream_url(media_url):
-    """Extract stream URL from the media URL."""
-    from youtube_dl import YoutubeDL
-    from youtube_dl.utils import DownloadError, ExtractorError
+class MEQueryException(Exception):
+    """Media extractor query exception."""
 
-    ydl = YoutubeDL({'quiet': True, 'logger': _LOGGER})
+    pass
 
-    try:
-        all_media_streams = ydl.extract_info(media_url, process=False)
-    except DownloadError:
-        # This exception will be logged by youtube-dl itself
-        raise YDException()
 
-    if 'entries' in all_media_streams:
-        _LOGGER.warning("Playlists are not supported, "
-                        "looking for the first video")
+class MediaExtractor:
+    """Class which encapsulates all extraction logic."""
+
+    def __init__(self, hass, component_config, call_data):
+        """Initialize media extractor."""
+        self.hass = hass
+        self.config = component_config
+        self.call_data = call_data
+
+    def get_media_url(self):
+        """Return media content url."""
+        return self.call_data.get(ATTR_MEDIA_CONTENT_ID)
+
+    def get_entities(self):
+        """Return list of entities."""
+        return self.call_data.get(ATTR_ENTITY_ID, [])
+
+    def extract_and_send(self):
+        """Extract exact stream format for each entity_id and play it."""
         try:
-            selected_stream = next(all_media_streams['entries'])
-        except StopIteration:
-            _LOGGER.error("Playlist is empty")
-            raise YDException()
-    else:
-        selected_stream = all_media_streams
+            stream_selector = self.get_stream_selector()
+        except MEDownloadException:
+            _LOGGER.error("Could not retrieve data for the URL: %s",
+                          self.get_media_url())
+        else:
+            entities = self.get_entities()
 
-    try:
-        media_info = ydl.process_ie_result(selected_stream, download=False)
-    except (ExtractorError, DownloadError):
-        # This exception will be logged by youtube-dl itself
-        raise YDException()
+            if len(entities) == 0:
+                self.call_media_player_service(stream_selector, None)
 
-    format_selector = ydl.build_format_selector('best')
+            for entity_id in entities:
+                self.call_media_player_service(stream_selector, entity_id)
 
-    try:
-        best_quality_stream = next(format_selector(media_info))
-    except (KeyError, StopIteration):
-        best_quality_stream = media_info
+    def get_stream_selector(self):
+        """Return format selector for the media URL."""
+        from youtube_dl import YoutubeDL
+        from youtube_dl.utils import DownloadError, ExtractorError
 
-    return best_quality_stream['url']
+        ydl = YoutubeDL({'quiet': True, 'logger': _LOGGER})
+
+        try:
+            all_media = ydl.extract_info(self.get_media_url(),
+                                         process=False)
+        except DownloadError:
+            # This exception will be logged by youtube-dl itself
+            raise MEDownloadException()
+
+        if 'entries' in all_media:
+            _LOGGER.warning("Playlists are not supported, "
+                            "looking for the first video")
+            entries = list(all_media['entries'])
+            if len(entries) > 0:
+                selected_media = entries[0]
+            else:
+                _LOGGER.error("Playlist is empty")
+                raise MEDownloadException()
+        else:
+            selected_media = all_media
+
+        try:
+            media_info = ydl.process_ie_result(selected_media,
+                                               download=False)
+        except (ExtractorError, DownloadError):
+            # This exception will be logged by youtube-dl itself
+            raise MEDownloadException()
+
+        def stream_selector(query):
+            """Find stream url that matches query."""
+            try:
+                format_selector = ydl.build_format_selector(query)
+            except (SyntaxError, ValueError, AttributeError) as ex:
+                _LOGGER.error(ex)
+                raise MEQueryException()
+
+            try:
+                requested_stream = next(format_selector(media_info))
+            except (KeyError, StopIteration):
+                _LOGGER.error("Could not extract stream for the query: %s",
+                              query)
+                raise MEQueryException()
+
+            return requested_stream['url']
+
+        return stream_selector
+
+    def call_media_player_service(self, stream_selector, entity_id):
+        """Call media_player.play_media service."""
+        stream_query = self.get_stream_query_for_entity(entity_id)
+
+        try:
+            stream_url = stream_selector(stream_query)
+        except MEQueryException:
+            _LOGGER.error("Wrong query format: %s", stream_query)
+            return
+        else:
+            data = {k: v for k, v in self.call_data.items()
+                    if k != ATTR_ENTITY_ID}
+            data[ATTR_MEDIA_CONTENT_ID] = stream_url
+
+            if entity_id:
+                data[ATTR_ENTITY_ID] = entity_id
+
+            self.hass.async_add_job(
+                self.hass.services.async_call(
+                    MEDIA_PLAYER_DOMAIN, SERVICE_PLAY_MEDIA, data)
+            )
+
+    def get_stream_query_for_entity(self, entity_id):
+        """Get stream format query for entity."""
+        default_stream_query = self.config.get(CONF_DEFAULT_STREAM_QUERY,
+                                               DEFAULT_STREAM_QUERY)
+
+        if entity_id:
+            media_content_type = self.call_data.get(ATTR_MEDIA_CONTENT_TYPE)
+
+            return self.config \
+                .get(CONF_CUSTOMIZE_ENTITIES, {}) \
+                .get(entity_id, {}) \
+                .get(media_content_type, default_stream_query)
+
+        return default_stream_query