Add wake word integration (#96380)

* Add wake component

* Add wake support to Wyoming

* Add helper function to assist_pipeline (not complete)

* Rename wake to wake_word

* Fix platform

* Use send_event and clean up

* Merge wake word into pipeline

* Add wake option to async_pipeline_from_audio_stream

* Add start/end stages to async_pipeline_from_audio_stream

* Add wake timeout

* Remove layer in wake_output

* Use VAD for wake word timeout

* Include audio metadata in wake-start

* Remove unnecessary websocket command

* wake -> wake_word

* Incorporate feedback

* Clean up wake_word tests

* Add wyoming wake word tests

* Add pipeline wake word test

* Add last processed state

* Fix tests

* Add tests for wake word

* More tests for the codebot
This commit is contained in:
Michael Hansen
2023-08-07 21:22:16 -05:00
committed by GitHub
parent 798fb3e31a
commit 7ea2998b55
28 changed files with 1802 additions and 27 deletions

View File

@@ -26,11 +26,12 @@ from .pipeline import (
PipelineInput,
PipelineRun,
PipelineStage,
WakeWordSettings,
async_get_pipeline,
)
from .vad import VoiceCommandSegmenter
DEFAULT_TIMEOUT = 30
DEFAULT_WAKE_WORD_TIMEOUT = 3
_LOGGER = logging.getLogger(__name__)
@@ -63,6 +64,18 @@ def async_register_websocket_api(hass: HomeAssistant) -> None:
cv.key_value_schemas(
"start_stage",
{
PipelineStage.WAKE_WORD: vol.Schema(
{
vol.Required("input"): {
vol.Required("sample_rate"): int,
vol.Optional("timeout"): vol.Any(float, int),
vol.Optional("audio_seconds_to_buffer"): vol.Any(
float, int
),
}
},
extra=vol.ALLOW_EXTRA,
),
PipelineStage.STT: vol.Schema(
{vol.Required("input"): {vol.Required("sample_rate"): int}},
extra=vol.ALLOW_EXTRA,
@@ -102,6 +115,7 @@ async def websocket_run(
end_stage = PipelineStage(msg["end_stage"])
handler_id: int | None = None
unregister_handler: Callable[[], None] | None = None
wake_word_settings: WakeWordSettings | None = None
# Arguments to PipelineInput
input_args: dict[str, Any] = {
@@ -109,24 +123,26 @@ async def websocket_run(
"device_id": msg.get("device_id"),
}
if start_stage == PipelineStage.STT:
if start_stage in (PipelineStage.WAKE_WORD, PipelineStage.STT):
# Audio pipeline that will receive audio as binary websocket messages
audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
incoming_sample_rate = msg["input"]["sample_rate"]
if start_stage == PipelineStage.WAKE_WORD:
wake_word_settings = WakeWordSettings(
timeout=msg["input"].get("timeout", DEFAULT_WAKE_WORD_TIMEOUT),
audio_seconds_to_buffer=msg["input"].get("audio_seconds_to_buffer", 0),
)
async def stt_stream() -> AsyncGenerator[bytes, None]:
state = None
segmenter = VoiceCommandSegmenter()
# Yield until we receive an empty chunk
while chunk := await audio_queue.get():
chunk, state = audioop.ratecv(
chunk, 2, 1, incoming_sample_rate, 16000, state
)
if not segmenter.process(chunk):
# Voice command is finished
break
if incoming_sample_rate != 16000:
chunk, state = audioop.ratecv(
chunk, 2, 1, incoming_sample_rate, 16000, state
)
yield chunk
def handle_binary(
@@ -169,6 +185,7 @@ async def websocket_run(
"stt_binary_handler_id": handler_id,
"timeout": timeout,
},
wake_word_settings=wake_word_settings,
)
pipeline_input = PipelineInput(**input_args)