Add wake word integration (#96380)

* Add wake component * Add wake support to Wyoming * Add helper function to assist_pipeline (not complete) * Rename wake to wake_word * Fix platform * Use send_event and clean up * Merge wake word into pipeline * Add wake option to async_pipeline_from_audio_stream * Add start/end stages to async_pipeline_from_audio_stream * Add wake timeout * Remove layer in wake_output * Use VAD for wake word timeout * Include audio metadata in wake-start * Remove unnecessary websocket command * wake -> wake_word * Incorporate feedback * Clean up wake_word tests * Add wyoming wake word tests * Add pipeline wake word test * Add last processed state * Fix tests * Add tests for wake word * More tests for the codebot
2025-11-09 19:09:32 +00:00 · 2023-08-07 21:22:16 -05:00
parent 798fb3e31a
commit 7ea2998b55
28 changed files with 1802 additions and 27 deletions
--- a/homeassistant/components/assist_pipeline/websocket_api.py
+++ b/homeassistant/components/assist_pipeline/websocket_api.py
@@ -26,11 +26,12 @@ from .pipeline import (
    PipelineInput,
    PipelineRun,
    PipelineStage,
+    WakeWordSettings,
    async_get_pipeline,
 )
-from .vad import VoiceCommandSegmenter

 DEFAULT_TIMEOUT = 30
+DEFAULT_WAKE_WORD_TIMEOUT = 3

 _LOGGER = logging.getLogger(__name__)

@@ -63,6 +64,18 @@ def async_register_websocket_api(hass: HomeAssistant) -> None:
        cv.key_value_schemas(
            "start_stage",
            {
+                PipelineStage.WAKE_WORD: vol.Schema(
+                    {
+                        vol.Required("input"): {
+                            vol.Required("sample_rate"): int,
+                            vol.Optional("timeout"): vol.Any(float, int),
+                            vol.Optional("audio_seconds_to_buffer"): vol.Any(
+                                float, int
+                            ),
+                        }
+                    },
+                    extra=vol.ALLOW_EXTRA,
+                ),
                PipelineStage.STT: vol.Schema(
                    {vol.Required("input"): {vol.Required("sample_rate"): int}},
                    extra=vol.ALLOW_EXTRA,
@@ -102,6 +115,7 @@ async def websocket_run(
    end_stage = PipelineStage(msg["end_stage"])
    handler_id: int | None = None
    unregister_handler: Callable[[], None] | None = None
+    wake_word_settings: WakeWordSettings | None = None

    # Arguments to PipelineInput
    input_args: dict[str, Any] = {
@@ -109,24 +123,26 @@ async def websocket_run(
        "device_id": msg.get("device_id"),
    }

-    if start_stage == PipelineStage.STT:
+    if start_stage in (PipelineStage.WAKE_WORD, PipelineStage.STT):
        # Audio pipeline that will receive audio as binary websocket messages
        audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
        incoming_sample_rate = msg["input"]["sample_rate"]

+        if start_stage == PipelineStage.WAKE_WORD:
+            wake_word_settings = WakeWordSettings(
+                timeout=msg["input"].get("timeout", DEFAULT_WAKE_WORD_TIMEOUT),
+                audio_seconds_to_buffer=msg["input"].get("audio_seconds_to_buffer", 0),
+            )
+
        async def stt_stream() -> AsyncGenerator[bytes, None]:
            state = None
-            segmenter = VoiceCommandSegmenter()

            # Yield until we receive an empty chunk
            while chunk := await audio_queue.get():
-                chunk, state = audioop.ratecv(
-                    chunk, 2, 1, incoming_sample_rate, 16000, state
-                )
-                if not segmenter.process(chunk):
-                    # Voice command is finished
-                    break
-
+                if incoming_sample_rate != 16000:
+                    chunk, state = audioop.ratecv(
+                        chunk, 2, 1, incoming_sample_rate, 16000, state
+                    )
                yield chunk

        def handle_binary(
@@ -169,6 +185,7 @@ async def websocket_run(
            "stt_binary_handler_id": handler_id,
            "timeout": timeout,
        },
+        wake_word_settings=wake_word_settings,
    )

    pipeline_input = PipelineInput(**input_args)