Compare commits

...

3 Commits

Author SHA1 Message Date
Paulus Schoutsen
df1454b1fe Fix some tests 2025-03-02 14:06:54 -05:00
Paulus Schoutsen
1420f03b93 Migrate Assist Satellite to use stream media source IDs 2025-03-02 13:32:31 -05:00
Paulus Schoutsen
fdb28c69f9 Add temporary stream ID 2025-03-02 13:08:11 -05:00
7 changed files with 96 additions and 51 deletions

View File

@@ -20,9 +20,6 @@ import hass_nabucasa
import voluptuous as vol
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api
from homeassistant.components.tts import (
generate_media_source_id as tts_generate_media_source_id,
)
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
from homeassistant.core import Context, HomeAssistant, callback
from homeassistant.exceptions import HomeAssistantError
@@ -1275,26 +1272,10 @@ class PipelineRun:
)
)
try:
# Synthesize audio and get URL
tts_media_id = tts_generate_media_source_id(
self.hass,
tts_input,
engine=self.tts_stream.engine,
language=self.tts_stream.language,
options=self.tts_stream.options,
)
except Exception as src_error:
_LOGGER.exception("Unexpected error during text-to-speech")
raise TextToSpeechError(
code="tts-failed",
message="Unexpected error during text-to-speech",
) from src_error
self.tts_stream.async_set_message(tts_input)
tts_output = {
"media_id": tts_media_id,
"media_id": self.tts_stream.media_source_id,
"url": self.tts_stream.url,
"mime_type": self.tts_stream.content_type,
}

View File

@@ -23,9 +23,6 @@ from homeassistant.components.assist_pipeline import (
vad,
)
from homeassistant.components.media_player import async_process_play_media_url
from homeassistant.components.tts import (
generate_media_source_id as tts_generate_media_source_id,
)
from homeassistant.core import Context, callback
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import chat_session, entity
@@ -484,6 +481,9 @@ class AssistSatelliteEntity(entity.Entity):
pipeline_id = self._resolve_pipeline()
pipeline = async_get_pipeline(self.hass, pipeline_id)
if pipeline.tts_engine is None:
raise HomeAssistantError("Pipeline has no TTS engine configured")
tts_options: dict[str, Any] = {}
if pipeline.tts_voice is not None:
tts_options[tts.ATTR_VOICE] = pipeline.tts_voice
@@ -491,14 +491,15 @@ class AssistSatelliteEntity(entity.Entity):
if self.tts_options is not None:
tts_options.update(self.tts_options)
media_id = tts_generate_media_source_id(
stream = tts.async_create_stream(
self.hass,
message,
engine=pipeline.tts_engine,
language=pipeline.tts_language,
options=tts_options,
)
original_media_id = media_id
stream.async_set_message(message)
media_id = stream.url
original_media_id = stream.media_source_id
if media_source.is_media_source_id(media_id):
if not media_id_source:

View File

@@ -27,6 +27,10 @@ import voluptuous as vol
from homeassistant.components import ffmpeg, websocket_api
from homeassistant.components.http import HomeAssistantView
from homeassistant.components.media_source import (
Unresolvable,
generate_media_source_id as ms_generate_media_source_id,
)
from homeassistant.config_entries import ConfigEntry
from homeassistant.const import EVENT_HOMEASSISTANT_STOP, PLATFORM_FORMAT
from homeassistant.core import (
@@ -188,10 +192,19 @@ async def async_get_media_source_audio(
) -> tuple[str, bytes]:
"""Get TTS audio as extension, data."""
manager = hass.data[DATA_TTS_MANAGER]
cache_key = manager.async_cache_message_in_memory(
**media_source_id_to_kwargs(media_source_id)
)
return await manager.async_get_tts_audio(cache_key)
if not media_source_id.startswith("media-source://tts/temporary/"):
cache_key = manager.async_cache_message_in_memory(
**media_source_id_to_kwargs(media_source_id)
)
return await manager.async_get_tts_audio(cache_key)
token = media_source_id.split("/")[-1]
if (stream := manager.token_to_stream.get(token)) is None:
raise Unresolvable("Token from media source not found")
data = b"".join([chunk async for chunk in stream.async_stream_result()])
return stream.extension, data
@callback
@@ -394,6 +407,11 @@ class ResultStream:
"""Get the URL to stream the result."""
return f"/api/tts_proxy/{self.token}"
@cached_property
def media_source_id(self) -> str:
"""Get the media source ID for the result."""
return ms_generate_media_source_id(DOMAIN, f"temporary/{self.token}")
@cached_property
def _result_cache_key(self) -> asyncio.Future[str]:
"""Get the future that returns the cache key."""

View File

@@ -3,7 +3,7 @@
from __future__ import annotations
import json
from typing import TypedDict
from typing import TypedDict, TYPE_CHECKING
from yarl import URL
@@ -22,12 +22,15 @@ from homeassistant.exceptions import HomeAssistantError
from .const import DATA_COMPONENT, DATA_TTS_MANAGER, DOMAIN
from .helper import get_engine_instance
if TYPE_CHECKING:
from . import SpeechManager
URL_QUERY_TTS_OPTIONS = "tts_options"
async def async_get_media_source(hass: HomeAssistant) -> TTSMediaSource:
"""Set up tts media source."""
return TTSMediaSource(hass)
return TTSMediaSource(hass, hass.data[DATA_TTS_MANAGER])
@callback
@@ -109,22 +112,31 @@ class TTSMediaSource(MediaSource):
"""Provide text-to-speech providers as media sources."""
name: str = "Text-to-speech"
manager: SpeechManager
def __init__(self, hass: HomeAssistant) -> None:
def __init__(self, hass: HomeAssistant, manager: SpeechManager) -> None:
"""Initialize TTSMediaSource."""
super().__init__(DOMAIN)
self.hass = hass
self.manager = manager
async def async_resolve_media(self, item: MediaSourceItem) -> PlayMedia:
"""Resolve media to a url."""
try:
stream = self.hass.data[DATA_TTS_MANAGER].async_create_result_stream(
**media_source_id_to_kwargs(item.identifier)
)
except Unresolvable:
raise
except HomeAssistantError as err:
raise Unresolvable(str(err)) from err
if item.identifier.startswith("temporary/"):
token = item.identifier.partition("/")[2]
stream = self.manager.token_to_stream.get(token)
if stream is None:
raise Unresolvable("Temporary media not found")
else:
try:
stream = self.manager.async_create_result_stream(
**media_source_id_to_kwargs(item.identifier)
)
except Unresolvable:
raise
except HomeAssistantError as err:
raise Unresolvable(str(err)) from err
return PlayMedia(stream.url, stream.content_type)
@@ -134,6 +146,9 @@ class TTSMediaSource(MediaSource):
) -> BrowseMediaSource:
"""Return media."""
if item.identifier:
if item.identifier.startswith("temporary/"):
raise BrowseError("Temporary media cannot be browsed")
engine, _, params = item.identifier.partition("?")
return self._engine_item(engine, params)

View File

@@ -83,7 +83,7 @@
dict({
'data': dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -180,7 +180,7 @@
dict({
'data': dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22Arnold+Schwarzenegger%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -277,7 +277,7 @@
dict({
'data': dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22Arnold+Schwarzenegger%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -398,7 +398,7 @@
dict({
'data': dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),

View File

@@ -79,7 +79,7 @@
# name: test_audio_pipeline.6
dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -168,7 +168,7 @@
# name: test_audio_pipeline_debug.6
dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -269,7 +269,7 @@
# name: test_audio_pipeline_with_enhancements.6
dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),
@@ -380,7 +380,7 @@
# name: test_audio_pipeline_with_wake_word_no_timeout.8
dict({
'tts_output': dict({
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&tts_options=%7B%22voice%22:%22james_earl_jones%22%7D",
'media_id': 'media-source://tts/temporary/test_token.mp3',
'mime_type': 'audio/mpeg',
'url': '/api/tts_proxy/test_token.mp3',
}),

View File

@@ -2,11 +2,11 @@
from http import HTTPStatus
import re
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch
import pytest
from homeassistant.components import media_source
from homeassistant.components import media_source, tts
from homeassistant.components.media_player import BrowseError
from homeassistant.components.tts.media_source import (
MediaSourceOptions,
@@ -302,3 +302,33 @@ async def test_generate_media_source_id_and_media_source_id_to_kwargs(
"options": {"age": {"k1": [5, 6], "k2": "v2"}},
"use_file_cache": True,
}
async def test_stream_media_sources(hass: HomeAssistant, setup_media_source) -> None:
"""Test ResultStream as media sources."""
assert await async_setup_component(hass, "tts", {})
stream = tts.ResultStream(
token="mock-token.flac",
extension="flac",
content_type="audio/flac",
engine="test",
use_file_cache=True,
language="en",
options={},
_manager=None,
)
hass.data[tts.DATA_TTS_MANAGER].token_to_stream[stream.token] = stream
assert stream.media_source_id == "media-source://tts/temporary/mock-token.flac"
assert await media_source.async_resolve_media(
hass, stream.media_source_id, None
) == media_source.PlayMedia(url=stream.url, mime_type=stream.content_type)
async def async_stream_result():
yield b"test"
with patch.object(stream, "async_stream_result", async_stream_result):
assert await tts.async_get_media_source_audio(hass, stream.media_source_id) == (
stream.extension,
b"test",
)