mirror of
https://github.com/home-assistant/core.git
synced 2025-12-21 15:28:19 +00:00
Compare commits
3 Commits
knx-text-u
...
stream-med
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df1454b1fe | ||
|
|
1420f03b93 | ||
|
|
fdb28c69f9 |
@@ -20,9 +20,6 @@ import hass_nabucasa
|
|||||||
import voluptuous as vol
|
import voluptuous as vol
|
||||||
|
|
||||||
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api
|
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api
|
||||||
from homeassistant.components.tts import (
|
|
||||||
generate_media_source_id as tts_generate_media_source_id,
|
|
||||||
)
|
|
||||||
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
|
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
|
||||||
from homeassistant.core import Context, HomeAssistant, callback
|
from homeassistant.core import Context, HomeAssistant, callback
|
||||||
from homeassistant.exceptions import HomeAssistantError
|
from homeassistant.exceptions import HomeAssistantError
|
||||||
@@ -1275,26 +1272,10 @@ class PipelineRun:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
# Synthesize audio and get URL
|
|
||||||
tts_media_id = tts_generate_media_source_id(
|
|
||||||
self.hass,
|
|
||||||
tts_input,
|
|
||||||
engine=self.tts_stream.engine,
|
|
||||||
language=self.tts_stream.language,
|
|
||||||
options=self.tts_stream.options,
|
|
||||||
)
|
|
||||||
except Exception as src_error:
|
|
||||||
_LOGGER.exception("Unexpected error during text-to-speech")
|
|
||||||
raise TextToSpeechError(
|
|
||||||
code="tts-failed",
|
|
||||||
message="Unexpected error during text-to-speech",
|
|
||||||
) from src_error
|
|
||||||
|
|
||||||
self.tts_stream.async_set_message(tts_input)
|
self.tts_stream.async_set_message(tts_input)
|
||||||
|
|
||||||
tts_output = {
|
tts_output = {
|
||||||
"media_id": tts_media_id,
|
"media_id": self.tts_stream.media_source_id,
|
||||||
"url": self.tts_stream.url,
|
"url": self.tts_stream.url,
|
||||||
"mime_type": self.tts_stream.content_type,
|
"mime_type": self.tts_stream.content_type,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,9 +23,6 @@ from homeassistant.components.assist_pipeline import (
|
|||||||
vad,
|
vad,
|
||||||
)
|
)
|
||||||
from homeassistant.components.media_player import async_process_play_media_url
|
from homeassistant.components.media_player import async_process_play_media_url
|
||||||
from homeassistant.components.tts import (
|
|
||||||
generate_media_source_id as tts_generate_media_source_id,
|
|
||||||
)
|
|
||||||
from homeassistant.core import Context, callback
|
from homeassistant.core import Context, callback
|
||||||
from homeassistant.exceptions import HomeAssistantError
|
from homeassistant.exceptions import HomeAssistantError
|
||||||
from homeassistant.helpers import chat_session, entity
|
from homeassistant.helpers import chat_session, entity
|
||||||
@@ -484,6 +481,9 @@ class AssistSatelliteEntity(entity.Entity):
|
|||||||
pipeline_id = self._resolve_pipeline()
|
pipeline_id = self._resolve_pipeline()
|
||||||
pipeline = async_get_pipeline(self.hass, pipeline_id)
|
pipeline = async_get_pipeline(self.hass, pipeline_id)
|
||||||
|
|
||||||
|
if pipeline.tts_engine is None:
|
||||||
|
raise HomeAssistantError("Pipeline has no TTS engine configured")
|
||||||
|
|
||||||
tts_options: dict[str, Any] = {}
|
tts_options: dict[str, Any] = {}
|
||||||
if pipeline.tts_voice is not None:
|
if pipeline.tts_voice is not None:
|
||||||
tts_options[tts.ATTR_VOICE] = pipeline.tts_voice
|
tts_options[tts.ATTR_VOICE] = pipeline.tts_voice
|
||||||
@@ -491,14 +491,15 @@ class AssistSatelliteEntity(entity.Entity):
|
|||||||
if self.tts_options is not None:
|
if self.tts_options is not None:
|
||||||
tts_options.update(self.tts_options)
|
tts_options.update(self.tts_options)
|
||||||
|
|
||||||
media_id = tts_generate_media_source_id(
|
stream = tts.async_create_stream(
|
||||||
self.hass,
|
self.hass,
|
||||||
message,
|
|
||||||
engine=pipeline.tts_engine,
|
engine=pipeline.tts_engine,
|
||||||
language=pipeline.tts_language,
|
language=pipeline.tts_language,
|
||||||
options=tts_options,
|
options=tts_options,
|
||||||
)
|
)
|
||||||
original_media_id = media_id
|
stream.async_set_message(message)
|
||||||
|
media_id = stream.url
|
||||||
|
original_media_id = stream.media_source_id
|
||||||
|
|
||||||
if media_source.is_media_source_id(media_id):
|
if media_source.is_media_source_id(media_id):
|
||||||
if not media_id_source:
|
if not media_id_source:
|
||||||
|
|||||||
@@ -27,6 +27,10 @@ import voluptuous as vol
|
|||||||
|
|
||||||
from homeassistant.components import ffmpeg, websocket_api
|
from homeassistant.components import ffmpeg, websocket_api
|
||||||
from homeassistant.components.http import HomeAssistantView
|
from homeassistant.components.http import HomeAssistantView
|
||||||
|
from homeassistant.components.media_source import (
|
||||||
|
Unresolvable,
|
||||||
|
generate_media_source_id as ms_generate_media_source_id,
|
||||||
|
)
|
||||||
from homeassistant.config_entries import ConfigEntry
|
from homeassistant.config_entries import ConfigEntry
|
||||||
from homeassistant.const import EVENT_HOMEASSISTANT_STOP, PLATFORM_FORMAT
|
from homeassistant.const import EVENT_HOMEASSISTANT_STOP, PLATFORM_FORMAT
|
||||||
from homeassistant.core import (
|
from homeassistant.core import (
|
||||||
@@ -188,10 +192,19 @@ async def async_get_media_source_audio(
|
|||||||
) -> tuple[str, bytes]:
|
) -> tuple[str, bytes]:
|
||||||
"""Get TTS audio as extension, data."""
|
"""Get TTS audio as extension, data."""
|
||||||
manager = hass.data[DATA_TTS_MANAGER]
|
manager = hass.data[DATA_TTS_MANAGER]
|
||||||
cache_key = manager.async_cache_message_in_memory(
|
|
||||||
**media_source_id_to_kwargs(media_source_id)
|
if not media_source_id.startswith("media-source://tts/temporary/"):
|
||||||
)
|
cache_key = manager.async_cache_message_in_memory(
|
||||||
return await manager.async_get_tts_audio(cache_key)
|
**media_source_id_to_kwargs(media_source_id)
|
||||||
|
)
|
||||||
|
return await manager.async_get_tts_audio(cache_key)
|
||||||
|
|
||||||
|
token = media_source_id.split("/")[-1]
|
||||||
|
if (stream := manager.token_to_stream.get(token)) is None:
|
||||||
|
raise Unresolvable("Token from media source not found")
|
||||||
|
|
||||||
|
data = b"".join([chunk async for chunk in stream.async_stream_result()])
|
||||||
|
return stream.extension, data
|
||||||
|
|
||||||
|
|
||||||
@callback
|
@callback
|
||||||
@@ -394,6 +407,11 @@ class ResultStream:
|
|||||||
"""Get the URL to stream the result."""
|
"""Get the URL to stream the result."""
|
||||||
return f"/api/tts_proxy/{self.token}"
|
return f"/api/tts_proxy/{self.token}"
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def media_source_id(self) -> str:
|
||||||
|
"""Get the media source ID for the result."""
|
||||||
|
return ms_generate_media_source_id(DOMAIN, f"temporary/{self.token}")
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def _result_cache_key(self) -> asyncio.Future[str]:
|
def _result_cache_key(self) -> asyncio.Future[str]:
|
||||||
"""Get the future that returns the cache key."""
|
"""Get the future that returns the cache key."""
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from typing import TypedDict
|
from typing import TypedDict, TYPE_CHECKING
|
||||||
|
|
||||||
from yarl import URL
|
from yarl import URL
|
||||||
|
|
||||||
@@ -22,12 +22,15 @@ from homeassistant.exceptions import HomeAssistantError
|
|||||||
from .const import DATA_COMPONENT, DATA_TTS_MANAGER, DOMAIN
|
from .const import DATA_COMPONENT, DATA_TTS_MANAGER, DOMAIN
|
||||||
from .helper import get_engine_instance
|
from .helper import get_engine_instance
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from . import SpeechManager
|
||||||
|
|
||||||
URL_QUERY_TTS_OPTIONS = "tts_options"
|
URL_QUERY_TTS_OPTIONS = "tts_options"
|
||||||
|
|
||||||
|
|
||||||
async def async_get_media_source(hass: HomeAssistant) -> TTSMediaSource:
|
async def async_get_media_source(hass: HomeAssistant) -> TTSMediaSource:
|
||||||
"""Set up tts media source."""
|
"""Set up tts media source."""
|
||||||
return TTSMediaSource(hass)
|
return TTSMediaSource(hass, hass.data[DATA_TTS_MANAGER])
|
||||||
|
|
||||||
|
|
||||||
@callback
|
@callback
|
||||||
@@ -109,22 +112,31 @@ class TTSMediaSource(MediaSource):
|
|||||||
"""Provide text-to-speech providers as media sources."""
|
"""Provide text-to-speech providers as media sources."""
|
||||||
|
|
||||||
name: str = "Text-to-speech"
|
name: str = "Text-to-speech"
|
||||||
|
manager: SpeechManager
|
||||||
|
|
||||||
def __init__(self, hass: HomeAssistant) -> None:
|
def __init__(self, hass: HomeAssistant, manager: SpeechManager) -> None:
|
||||||
"""Initialize TTSMediaSource."""
|
"""Initialize TTSMediaSource."""
|
||||||
super().__init__(DOMAIN)
|
super().__init__(DOMAIN)
|
||||||
self.hass = hass
|
self.hass = hass
|
||||||
|
self.manager = manager
|
||||||
|
|
||||||
async def async_resolve_media(self, item: MediaSourceItem) -> PlayMedia:
|
async def async_resolve_media(self, item: MediaSourceItem) -> PlayMedia:
|
||||||
"""Resolve media to a url."""
|
"""Resolve media to a url."""
|
||||||
try:
|
if item.identifier.startswith("temporary/"):
|
||||||
stream = self.hass.data[DATA_TTS_MANAGER].async_create_result_stream(
|
token = item.identifier.partition("/")[2]
|
||||||
**media_source_id_to_kwargs(item.identifier)
|
stream = self.manager.token_to_stream.get(token)
|
||||||
)
|
if stream is None:
|
||||||
except Unresolvable:
|
raise Unresolvable("Temporary media not found")
|
||||||
raise
|
|
||||||
except HomeAssistantError as err:
|
else:
|
||||||
raise Unresolvable(str(err)) from err
|
try:
|
||||||
|
stream = self.manager.async_create_result_stream(
|
||||||
|
**media_source_id_to_kwargs(item.identifier)
|
||||||
|
)
|
||||||
|
except Unresolvable:
|
||||||
|
raise
|
||||||
|
except HomeAssistantError as err:
|
||||||
|
raise Unresolvable(str(err)) from err
|
||||||
|
|
||||||
return PlayMedia(stream.url, stream.content_type)
|
return PlayMedia(stream.url, stream.content_type)
|
||||||
|
|
||||||
@@ -134,6 +146,9 @@ class TTSMediaSource(MediaSource):
|
|||||||
) -> BrowseMediaSource:
|
) -> BrowseMediaSource:
|
||||||
"""Return media."""
|
"""Return media."""
|
||||||
if item.identifier:
|
if item.identifier:
|
||||||
|
if item.identifier.startswith("temporary/"):
|
||||||
|
raise BrowseError("Temporary media cannot be browsed")
|
||||||
|
|
||||||
engine, _, params = item.identifier.partition("?")
|
engine, _, params = item.identifier.partition("?")
|
||||||
return self._engine_item(engine, params)
|
return self._engine_item(engine, params)
|
||||||
|
|
||||||
|
|||||||
@@ -83,7 +83,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -180,7 +180,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22Arnold+Schwarzenegger%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -277,7 +277,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22Arnold+Schwarzenegger%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -398,7 +398,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -79,7 +79,7 @@
|
|||||||
# name: test_audio_pipeline.6
|
# name: test_audio_pipeline.6
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -168,7 +168,7 @@
|
|||||||
# name: test_audio_pipeline_debug.6
|
# name: test_audio_pipeline_debug.6
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -269,7 +269,7 @@
|
|||||||
# name: test_audio_pipeline_with_enhancements.6
|
# name: test_audio_pipeline_with_enhancements.6
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
@@ -380,7 +380,7 @@
|
|||||||
# name: test_audio_pipeline_with_wake_word_no_timeout.8
|
# name: test_audio_pipeline_with_wake_word_no_timeout.8
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
|
'media_id': 'media-source://tts/temporary/test_token.mp3',
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/test_token.mp3',
|
'url': '/api/tts_proxy/test_token.mp3',
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -2,11 +2,11 @@
|
|||||||
|
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
import re
|
import re
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from homeassistant.components import media_source
|
from homeassistant.components import media_source, tts
|
||||||
from homeassistant.components.media_player import BrowseError
|
from homeassistant.components.media_player import BrowseError
|
||||||
from homeassistant.components.tts.media_source import (
|
from homeassistant.components.tts.media_source import (
|
||||||
MediaSourceOptions,
|
MediaSourceOptions,
|
||||||
@@ -302,3 +302,33 @@ async def test_generate_media_source_id_and_media_source_id_to_kwargs(
|
|||||||
"options": {"age": {"k1": [5, 6], "k2": "v2"}},
|
"options": {"age": {"k1": [5, 6], "k2": "v2"}},
|
||||||
"use_file_cache": True,
|
"use_file_cache": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def test_stream_media_sources(hass: HomeAssistant, setup_media_source) -> None:
|
||||||
|
"""Test ResultStream as media sources."""
|
||||||
|
assert await async_setup_component(hass, "tts", {})
|
||||||
|
stream = tts.ResultStream(
|
||||||
|
token="mock-token.flac",
|
||||||
|
extension="flac",
|
||||||
|
content_type="audio/flac",
|
||||||
|
engine="test",
|
||||||
|
use_file_cache=True,
|
||||||
|
language="en",
|
||||||
|
options={},
|
||||||
|
_manager=None,
|
||||||
|
)
|
||||||
|
hass.data[tts.DATA_TTS_MANAGER].token_to_stream[stream.token] = stream
|
||||||
|
assert stream.media_source_id == "media-source://tts/temporary/mock-token.flac"
|
||||||
|
|
||||||
|
assert await media_source.async_resolve_media(
|
||||||
|
hass, stream.media_source_id, None
|
||||||
|
) == media_source.PlayMedia(url=stream.url, mime_type=stream.content_type)
|
||||||
|
|
||||||
|
async def async_stream_result():
|
||||||
|
yield b"test"
|
||||||
|
|
||||||
|
with patch.object(stream, "async_stream_result", async_stream_result):
|
||||||
|
assert await tts.async_get_media_source_audio(hass, stream.media_source_id) == (
|
||||||
|
stream.extension,
|
||||||
|
b"test",
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user