TTS to use ffmpeg in streaming fashion (#140536)

2025-07-09 06:17:07 +00:00 · 2025-04-19 06:41:52 -04:00 · 2025-04-19 06:41:52 -04:00 · 6f99b1d69b
commit 6f99b1d69b
parent 42c4ed85a1
2 changed files with 63 additions and 73 deletions
--- a/homeassistant/components/tts/init.py
+++ b/homeassistant/components/tts/init.py
@ -14,8 +14,6 @@ import mimetypes
 import os
 import re
 import secrets
 import subprocess
 import tempfile
 from time import monotonic
 from typing import Any, Final
@ -309,80 +307,73 @@ async def _async_convert_audio(
 ) -> AsyncGenerator[bytes]:
    """Convert audio to a preferred format using ffmpeg."""
    ffmpeg_manager = ffmpeg.get_ffmpeg_manager(hass)
-    audio_bytes = b"".join([chunk async for chunk in audio_bytes_gen])
+
-    data = await hass.async_add_executor_job(
+    command = [
-        lambda: _convert_audio(
+        ffmpeg_manager.binary,
-            ffmpeg_manager.binary,
+        "-hide_banner",
-            from_extension,
+        "-loglevel",
-            audio_bytes,
+        "error",
-            to_extension,
+        "-f",
-            to_sample_rate=to_sample_rate,
+        from_extension,
-            to_sample_channels=to_sample_channels,
+        "-i",
-            to_sample_bytes=to_sample_bytes,
+        "pipe:",
-        )
+        "-f",
        to_extension,
    ]
    if to_sample_rate is not None:
        command.extend(["-ar", str(to_sample_rate)])
    if to_sample_channels is not None:
        command.extend(["-ac", str(to_sample_channels)])
    if to_extension == "mp3":
        # Max quality for MP3.
        command.extend(["-q:a", "0"])
    if to_sample_bytes == 2:
        # 16-bit samples.
        command.extend(["-sample_fmt", "s16"])
    command.append("pipe:1")  # Send output to stdout.
    process = await asyncio.create_subprocess_exec(
        *command,
        stdin=asyncio.subprocess.PIPE,
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE,
    )
    yield data
    async def write_input() -> None:
        assert process.stdin
        try:
            async for chunk in audio_bytes_gen:
                process.stdin.write(chunk)
                await process.stdin.drain()
        finally:
            if process.stdin:
                process.stdin.close()
-def _convert_audio(
+    writer_task = hass.async_create_background_task(
-    ffmpeg_binary: str,
+        write_input(), "tts_ffmpeg_conversion"
-    from_extension: str,
+    )
    audio_bytes: bytes,
    to_extension: str,
    to_sample_rate: int | None = None,
    to_sample_channels: int | None = None,
    to_sample_bytes: int | None = None,
 ) -> bytes:
    """Convert audio to a preferred format using ffmpeg."""
-    # We have to use a temporary file here because some formats like WAV store
+    assert process.stdout
-    # the length of the file in the header, and therefore cannot be written in a
+    chunk_size = 4096
-    # streaming fashion.
+    try:
-    with tempfile.NamedTemporaryFile(
+        while True:
-        mode="wb+", suffix=f".{to_extension}"
+            chunk = await process.stdout.read(chunk_size)
-    ) as output_file:
+            if not chunk:
-        # input
+                break
-        command = [
+            yield chunk
-            ffmpeg_binary,
+    finally:
-            "-y",  # overwrite temp file
+        # Ensure we wait for the input writer to complete.
-            "-f",
+        await writer_task
-            from_extension,
+        # Wait for process termination and check for errors.
-            "-i",
+        retcode = await process.wait()
-            "pipe:",  # input from stdin
+        if retcode != 0:
-        ]
+            assert process.stderr
-
+            stderr_data = await process.stderr.read()
-        # output
+            _LOGGER.error(stderr_data.decode())
-        command.extend(["-f", to_extension])
+            raise RuntimeError(
-
+                f"Unexpected error while running ffmpeg with arguments: {command}. "
-        if to_sample_rate is not None:
+                "See log for details."
-            command.extend(["-ar", str(to_sample_rate)])
+            )
        if to_sample_channels is not None:
            command.extend(["-ac", str(to_sample_channels)])
        if to_extension == "mp3":
            # Max quality for MP3
            command.extend(["-q:a", "0"])
        if to_sample_bytes == 2:
            # 16-bit samples
            command.extend(["-sample_fmt", "s16"])
        command.append(output_file.name)
        with subprocess.Popen(
            command, stdin=subprocess.PIPE, stderr=subprocess.PIPE
        ) as proc:
            _stdout, stderr = proc.communicate(input=audio_bytes)
            if proc.returncode != 0:
                _LOGGER.error(stderr.decode())
                raise RuntimeError(
                    f"Unexpected error while running ffmpeg with arguments: {command}."
                    "See log for details."
                )
        output_file.seek(0)
        return output_file.read()
 async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
--- a/tests/components/wyoming/test_tts.py
+++ b/tests/components/wyoming/test_tts.py
@ -117,7 +117,6 @@ async def test_get_tts_audio_different_formats(
        assert wav_file.getframerate() == 48000
        assert wav_file.getsampwidth() == 2
        assert wav_file.getnchannels() == 2
        assert wav_file.getnframes() == wav_file.getframerate()  # one second
    assert mock_client.written == snapshot