From 6f99b1d69b19fb8ddf4d62bb9ea33da1582b8a38 Mon Sep 17 00:00:00 2001 From: Paulus Schoutsen Date: Sat, 19 Apr 2025 06:41:52 -0400 Subject: [PATCH] TTS to use ffmpeg in streaming fashion (#140536) --- homeassistant/components/tts/__init__.py | 135 +++++++++++------------ tests/components/wyoming/test_tts.py | 1 - 2 files changed, 63 insertions(+), 73 deletions(-) diff --git a/homeassistant/components/tts/__init__.py b/homeassistant/components/tts/__init__.py index cb207643471..8182d375f96 100644 --- a/homeassistant/components/tts/__init__.py +++ b/homeassistant/components/tts/__init__.py @@ -14,8 +14,6 @@ import mimetypes import os import re import secrets -import subprocess -import tempfile from time import monotonic from typing import Any, Final @@ -309,80 +307,73 @@ async def _async_convert_audio( ) -> AsyncGenerator[bytes]: """Convert audio to a preferred format using ffmpeg.""" ffmpeg_manager = ffmpeg.get_ffmpeg_manager(hass) - audio_bytes = b"".join([chunk async for chunk in audio_bytes_gen]) - data = await hass.async_add_executor_job( - lambda: _convert_audio( - ffmpeg_manager.binary, - from_extension, - audio_bytes, - to_extension, - to_sample_rate=to_sample_rate, - to_sample_channels=to_sample_channels, - to_sample_bytes=to_sample_bytes, - ) + + command = [ + ffmpeg_manager.binary, + "-hide_banner", + "-loglevel", + "error", + "-f", + from_extension, + "-i", + "pipe:", + "-f", + to_extension, + ] + if to_sample_rate is not None: + command.extend(["-ar", str(to_sample_rate)]) + if to_sample_channels is not None: + command.extend(["-ac", str(to_sample_channels)]) + if to_extension == "mp3": + # Max quality for MP3. + command.extend(["-q:a", "0"]) + if to_sample_bytes == 2: + # 16-bit samples. + command.extend(["-sample_fmt", "s16"]) + command.append("pipe:1") # Send output to stdout. + + process = await asyncio.create_subprocess_exec( + *command, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, ) - yield data + async def write_input() -> None: + assert process.stdin + try: + async for chunk in audio_bytes_gen: + process.stdin.write(chunk) + await process.stdin.drain() + finally: + if process.stdin: + process.stdin.close() -def _convert_audio( - ffmpeg_binary: str, - from_extension: str, - audio_bytes: bytes, - to_extension: str, - to_sample_rate: int | None = None, - to_sample_channels: int | None = None, - to_sample_bytes: int | None = None, -) -> bytes: - """Convert audio to a preferred format using ffmpeg.""" + writer_task = hass.async_create_background_task( + write_input(), "tts_ffmpeg_conversion" + ) - # We have to use a temporary file here because some formats like WAV store - # the length of the file in the header, and therefore cannot be written in a - # streaming fashion. - with tempfile.NamedTemporaryFile( - mode="wb+", suffix=f".{to_extension}" - ) as output_file: - # input - command = [ - ffmpeg_binary, - "-y", # overwrite temp file - "-f", - from_extension, - "-i", - "pipe:", # input from stdin - ] - - # output - command.extend(["-f", to_extension]) - - if to_sample_rate is not None: - command.extend(["-ar", str(to_sample_rate)]) - - if to_sample_channels is not None: - command.extend(["-ac", str(to_sample_channels)]) - - if to_extension == "mp3": - # Max quality for MP3 - command.extend(["-q:a", "0"]) - - if to_sample_bytes == 2: - # 16-bit samples - command.extend(["-sample_fmt", "s16"]) - - command.append(output_file.name) - - with subprocess.Popen( - command, stdin=subprocess.PIPE, stderr=subprocess.PIPE - ) as proc: - _stdout, stderr = proc.communicate(input=audio_bytes) - if proc.returncode != 0: - _LOGGER.error(stderr.decode()) - raise RuntimeError( - f"Unexpected error while running ffmpeg with arguments: {command}." - "See log for details." - ) - - output_file.seek(0) - return output_file.read() + assert process.stdout + chunk_size = 4096 + try: + while True: + chunk = await process.stdout.read(chunk_size) + if not chunk: + break + yield chunk + finally: + # Ensure we wait for the input writer to complete. + await writer_task + # Wait for process termination and check for errors. + retcode = await process.wait() + if retcode != 0: + assert process.stderr + stderr_data = await process.stderr.read() + _LOGGER.error(stderr_data.decode()) + raise RuntimeError( + f"Unexpected error while running ffmpeg with arguments: {command}. " + "See log for details." + ) async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool: diff --git a/tests/components/wyoming/test_tts.py b/tests/components/wyoming/test_tts.py index 6e0edc022c0..c52b1391038 100644 --- a/tests/components/wyoming/test_tts.py +++ b/tests/components/wyoming/test_tts.py @@ -117,7 +117,6 @@ async def test_get_tts_audio_different_formats( assert wav_file.getframerate() == 48000 assert wav_file.getsampwidth() == 2 assert wav_file.getnchannels() == 2 - assert wav_file.getnframes() == wav_file.getframerate() # one second assert mock_client.written == snapshot