diff --git a/src/data/assist_pipeline.ts b/src/data/assist_pipeline.ts index 3d1745d323..18cfab3cc5 100644 --- a/src/data/assist_pipeline.ts +++ b/src/data/assist_pipeline.ts @@ -61,6 +61,19 @@ interface PipelineErrorEvent extends PipelineEventBase { }; } +interface PipelineWakeWordStartEvent extends PipelineEventBase { + type: "wake_word-start"; + data: { + engine: string; + metadata: SpeechMetadata; + }; +} + +interface PipelineWakeWordEndEvent extends PipelineEventBase { + type: "wake_word-end"; + data: { wake_word_output: { ww_id: string; timestamp: number } }; +} + interface PipelineSTTStartEvent extends PipelineEventBase { type: "stt-start"; data: { @@ -110,6 +123,8 @@ export type PipelineRunEvent = | PipelineRunStartEvent | PipelineRunEndEvent | PipelineErrorEvent + | PipelineWakeWordStartEvent + | PipelineWakeWordEndEvent | PipelineSTTStartEvent | PipelineSTTEndEvent | PipelineIntentStartEvent @@ -126,6 +141,14 @@ export type PipelineRunOptions = ( start_stage: "stt"; input: { sample_rate: number }; } + | { + start_stage: "wake_word"; + input: { + sample_rate: number; + timeout?: number; + audio_seconds_to_buffer?: number; + }; + } ) & { end_stage: "stt" | "intent" | "tts"; pipeline?: string; @@ -135,9 +158,11 @@ export type PipelineRunOptions = ( export interface PipelineRun { init_options?: PipelineRunOptions; events: PipelineRunEvent[]; - stage: "ready" | "stt" | "intent" | "tts" | "done" | "error"; + stage: "ready" | "wake_word" | "stt" | "intent" | "tts" | "done" | "error"; run: PipelineRunStartEvent["data"]; error?: PipelineErrorEvent["data"]; + wake_word?: PipelineWakeWordStartEvent["data"] & + Partial & { done: boolean }; stt?: PipelineSTTStartEvent["data"] & Partial & { done: boolean }; intent?: PipelineIntentStartEvent["data"] & @@ -167,7 +192,18 @@ export const processEvent = ( return undefined; } - if (event.type === "stt-start") { + if (event.type === "wake_word-start") { + run = { + ...run, + stage: "wake_word", + wake_word: { ...event.data, done: false }, + }; + } else if (event.type === "wake_word-end") { + run = { + ...run, + wake_word: { ...run.wake_word!, ...event.data, done: true }, + }; + } else if (event.type === "stt-start") { run = { ...run, stage: "stt", diff --git a/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts b/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts index 85bba483ae..cad30ff48a 100644 --- a/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts +++ b/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts @@ -79,18 +79,29 @@ export class AssistPipelineRunDebug extends LitElement { .value=${this._pipelineId} @value-changed=${this._pipelinePicked} > - - Run Text Pipeline - - - Run Audio Pipeline - +
+ + Run Text Pipeline + + + Run Audio Pipeline + + + Run Audio Pipeline with Wake Word detection + +
` : this._pipelineRuns[0].init_options!.start_stage === "intent" ? html` @@ -179,6 +190,83 @@ export class AssistPipelineRunDebug extends LitElement { ); } + private async _runAudioWakeWordPipeline() { + const audioRecorder = new AudioRecorder((data) => { + if (this._audioBuffer) { + this._audioBuffer.push(data); + } else { + this._sendAudioChunk(data); + } + }); + + this._audioBuffer = []; + await audioRecorder.start(); + + let run: PipelineRun | undefined; + + let stopRecording: (() => void) | undefined = () => { + stopRecording = undefined; + audioRecorder.close(); + // We're currently STTing, so finish audio + if (run?.stage === "stt" && run.stt!.done === false) { + if (this._audioBuffer) { + for (const chunk of this._audioBuffer) { + this._sendAudioChunk(chunk); + } + } + // Send empty message to indicate we're done streaming. + this._sendAudioChunk(new Int16Array()); + } + this._audioBuffer = undefined; + }; + + await this._doRunPipeline( + (updatedRun) => { + run = updatedRun; + + // When we start wake work stage, the WS has a binary handler + if (updatedRun.stage === "wake_word" && this._audioBuffer) { + // Send the buffer over the WS to the Wake Word / STT engine. + for (const buffer of this._audioBuffer) { + this._sendAudioChunk(buffer); + } + this._audioBuffer = undefined; + } + + // Stop recording if the server is done with STT stage + if ( + !["ready", "wake_word", "stt"].includes(updatedRun.stage) && + stopRecording + ) { + stopRecording(); + } + + // Play audio when we're done. + if (updatedRun.stage === "done") { + const url = updatedRun.tts!.tts_output!.url; + const audio = new Audio(url); + audio.addEventListener("ended", () => { + if (this._continueConversationCheckbox.checked) { + this._runAudioWakeWordPipeline(); + } else { + this._finished = true; + } + }); + audio.play(); + } else if (updatedRun.stage === "error") { + this._finished = true; + } + }, + { + start_stage: "wake_word", + end_stage: "tts", + input: { + sample_rate: audioRecorder.sampleRate!, + }, + } + ); + } + private async _runAudioPipeline() { const audioRecorder = new AudioRecorder((data) => { if (this._audioBuffer) { @@ -327,6 +415,13 @@ export class AssistPipelineRunDebug extends LitElement { margin: 0 auto; direction: ltr; } + .start-buttons { + display: flex; + flex-wrap: wrap; + gap: 8px; + align-items: center; + justify-content: center; + } .start-row { display: flex; justify-content: space-around; diff --git a/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts b/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts index 14da1d1e7d..132e4f2e2f 100644 --- a/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts +++ b/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts @@ -15,6 +15,9 @@ const RUN_DATA = { pipeline: "Pipeline", language: "Language", }; +const WAKE_WORD_DATA = { + engine: "Engine", +}; const STT_DATA = { engine: "Engine", @@ -35,11 +38,12 @@ const TTS_DATA = { const STAGES: Record = { ready: 0, - stt: 1, - intent: 2, - tts: 3, - done: 4, - error: 5, + wake_word: 1, + stt: 2, + intent: 3, + tts: 4, + done: 5, + error: 6, }; const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) => @@ -53,7 +57,7 @@ const maybeRenderError = ( stage: string, lastRunStage: string ) => { - if (run.stage !== "error" || lastRunStage !== stage) { + if (!("error" in run) || lastRunStage !== stage) { return ""; } @@ -80,11 +84,10 @@ const renderProgress = ( return ""; } - if (pipelineRun.stage === "error") { - return html`❌`; - } - if (!finishEvent) { + if ("error" in pipelineRun) { + return html`❌`; + } return html` `; @@ -138,8 +141,9 @@ export class AssistPipelineDebug extends LitElement { protected render(): TemplateResult { const lastRunStage: string = this.pipelineRun - ? ["tts", "intent", "stt"].find((stage) => stage in this.pipelineRun) || - "ready" + ? ["tts", "intent", "stt", "wake_word"].find( + (stage) => stage in this.pipelineRun + ) || "ready" : "ready"; const messages: Array<{ from: string; text: string }> = []; @@ -194,6 +198,46 @@ export class AssistPipelineDebug extends LitElement { ${maybeRenderError(this.pipelineRun, "ready", lastRunStage)} + ${hasStage(this.pipelineRun, "wake_word") + ? html` + +
+
+ Wake word + ${renderProgress(this.hass, this.pipelineRun, "wake_word")} +
+ ${this.pipelineRun.wake_word + ? html` +
+ ${renderData(this.pipelineRun.wake_word, STT_DATA)} + ${this.pipelineRun.wake_word.wake_word_output + ? html`
+
Model
+
+ ${this.pipelineRun.wake_word.wake_word_output + .ww_id} +
+
+
+
Timestamp
+
+ ${this.pipelineRun.wake_word.wake_word_output + .timestamp} +
+
` + : ""} + ${dataMinusKeysRender( + this.pipelineRun.wake_word, + WAKE_WORD_DATA + )} +
+ ` + : ""} +
+
+ ` + : ""} + ${maybeRenderError(this.pipelineRun, "wake_word", lastRunStage)} ${hasStage(this.pipelineRun, "stt") ? html` @@ -335,6 +379,9 @@ export class AssistPipelineDebug extends LitElement { display: flex; justify-content: space-between; } + .row > div:last-child { + text-align: right; + } ha-expansion-panel { padding-left: 8px; }