Add wake word to assist pipeline debug (#17897)

2025-07-24 09:46:36 +00:00 · 2023-09-25 17:51:12 +02:00 · 2023-09-25 17:51:12 +02:00 · 22f9dbd65d
commit 22f9dbd65d
parent 579050bfc7
3 changed files with 204 additions and 26 deletions
--- a/src/data/assist_pipeline.ts
+++ b/src/data/assist_pipeline.ts
@ -61,6 +61,19 @@ interface PipelineErrorEvent extends PipelineEventBase {
  };
 }

+interface PipelineWakeWordStartEvent extends PipelineEventBase {
+  type: "wake_word-start";
+  data: {
+    engine: string;
+    metadata: SpeechMetadata;
+  };
+}
+
+interface PipelineWakeWordEndEvent extends PipelineEventBase {
+  type: "wake_word-end";
+  data: { wake_word_output: { ww_id: string; timestamp: number } };
+}
+
 interface PipelineSTTStartEvent extends PipelineEventBase {
  type: "stt-start";
  data: {
@ -110,6 +123,8 @@ export type PipelineRunEvent =
  | PipelineRunStartEvent
  | PipelineRunEndEvent
  | PipelineErrorEvent
+  | PipelineWakeWordStartEvent
+  | PipelineWakeWordEndEvent
  | PipelineSTTStartEvent
  | PipelineSTTEndEvent
  | PipelineIntentStartEvent
@ -126,6 +141,14 @@ export type PipelineRunOptions = (
      start_stage: "stt";
      input: { sample_rate: number };
    }
+  | {
+      start_stage: "wake_word";
+      input: {
+        sample_rate: number;
+        timeout?: number;
+        audio_seconds_to_buffer?: number;
+      };
+    }
 ) & {
  end_stage: "stt" | "intent" | "tts";
  pipeline?: string;
@ -135,9 +158,11 @@ export type PipelineRunOptions = (
 export interface PipelineRun {
  init_options?: PipelineRunOptions;
  events: PipelineRunEvent[];
-  stage: "ready" | "stt" | "intent" | "tts" | "done" | "error";
+  stage: "ready" | "wake_word" | "stt" | "intent" | "tts" | "done" | "error";
  run: PipelineRunStartEvent["data"];
  error?: PipelineErrorEvent["data"];
+  wake_word?: PipelineWakeWordStartEvent["data"] &
+    Partial<PipelineWakeWordEndEvent["data"]> & { done: boolean };
  stt?: PipelineSTTStartEvent["data"] &
    Partial<PipelineSTTEndEvent["data"]> & { done: boolean };
  intent?: PipelineIntentStartEvent["data"] &
@ -167,7 +192,18 @@ export const processEvent = (
    return undefined;
  }

-  if (event.type === "stt-start") {
+  if (event.type === "wake_word-start") {
+    run = {
+      ...run,
+      stage: "wake_word",
+      wake_word: { ...event.data, done: false },
+    };
+  } else if (event.type === "wake_word-end") {
+    run = {
+      ...run,
+      wake_word: { ...run.wake_word!, ...event.data, done: true },
+    };
+  } else if (event.type === "stt-start") {
    run = {
      ...run,
      stage: "stt",
--- a/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts
+++ b/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts
@ -79,18 +79,29 @@ export class AssistPipelineRunDebug extends LitElement {
                    .value=${this._pipelineId}
                    @value-changed=${this._pipelinePicked}
                  ></ha-assist-pipeline-picker>
-                  <ha-button raised @click=${this._runTextPipeline}>
-                    Run Text Pipeline
-                  </ha-button>
-                  <ha-button
-                    raised
-                    @click=${this._runAudioPipeline}
-                    .disabled=${!window.isSecureContext ||
-                    // @ts-ignore-next-line
-                    !(window.AudioContext || window.webkitAudioContext)}
-                  >
-                    Run Audio Pipeline
-                  </ha-button>
+                  <div class="start-buttons">
+                    <ha-button raised @click=${this._runTextPipeline}>
+                      Run Text Pipeline
+                    </ha-button>
+                    <ha-button
+                      raised
+                      @click=${this._runAudioPipeline}
+                      .disabled=${!window.isSecureContext ||
+                      // @ts-ignore-next-line
+                      !(window.AudioContext || window.webkitAudioContext)}
+                    >
+                      Run Audio Pipeline
+                    </ha-button>
+                    <ha-button
+                      raised
+                      @click=${this._runAudioWakeWordPipeline}
+                      .disabled=${!window.isSecureContext ||
+                      // @ts-ignore-next-line
+                      !(window.AudioContext || window.webkitAudioContext)}
+                    >
+                      Run Audio Pipeline with Wake Word detection
+                    </ha-button>
+                  </div>
                `
              : this._pipelineRuns[0].init_options!.start_stage === "intent"
              ? html`
@ -179,6 +190,83 @@ export class AssistPipelineRunDebug extends LitElement {
    );
  }

+  private async _runAudioWakeWordPipeline() {
+    const audioRecorder = new AudioRecorder((data) => {
+      if (this._audioBuffer) {
+        this._audioBuffer.push(data);
+      } else {
+        this._sendAudioChunk(data);
+      }
+    });
+
+    this._audioBuffer = [];
+    await audioRecorder.start();
+
+    let run: PipelineRun | undefined;
+
+    let stopRecording: (() => void) | undefined = () => {
+      stopRecording = undefined;
+      audioRecorder.close();
+      // We're currently STTing, so finish audio
+      if (run?.stage === "stt" && run.stt!.done === false) {
+        if (this._audioBuffer) {
+          for (const chunk of this._audioBuffer) {
+            this._sendAudioChunk(chunk);
+          }
+        }
+        // Send empty message to indicate we're done streaming.
+        this._sendAudioChunk(new Int16Array());
+      }
+      this._audioBuffer = undefined;
+    };
+
+    await this._doRunPipeline(
+      (updatedRun) => {
+        run = updatedRun;
+
+        // When we start wake work stage, the WS has a binary handler
+        if (updatedRun.stage === "wake_word" && this._audioBuffer) {
+          // Send the buffer over the WS to the Wake Word / STT engine.
+          for (const buffer of this._audioBuffer) {
+            this._sendAudioChunk(buffer);
+          }
+          this._audioBuffer = undefined;
+        }
+
+        // Stop recording if the server is done with STT stage
+        if (
+          !["ready", "wake_word", "stt"].includes(updatedRun.stage) &&
+          stopRecording
+        ) {
+          stopRecording();
+        }
+
+        // Play audio when we're done.
+        if (updatedRun.stage === "done") {
+          const url = updatedRun.tts!.tts_output!.url;
+          const audio = new Audio(url);
+          audio.addEventListener("ended", () => {
+            if (this._continueConversationCheckbox.checked) {
+              this._runAudioWakeWordPipeline();
+            } else {
+              this._finished = true;
+            }
+          });
+          audio.play();
+        } else if (updatedRun.stage === "error") {
+          this._finished = true;
+        }
+      },
+      {
+        start_stage: "wake_word",
+        end_stage: "tts",
+        input: {
+          sample_rate: audioRecorder.sampleRate!,
+        },
+      }
+    );
+  }
+
  private async _runAudioPipeline() {
    const audioRecorder = new AudioRecorder((data) => {
      if (this._audioBuffer) {
@ -327,6 +415,13 @@ export class AssistPipelineRunDebug extends LitElement {
        margin: 0 auto;
        direction: ltr;
      }
+      .start-buttons {
+        display: flex;
+        flex-wrap: wrap;
+        gap: 8px;
+        align-items: center;
+        justify-content: center;
+      }
      .start-row {
        display: flex;
        justify-content: space-around;
--- a/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts
+++ b/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts
@ -15,6 +15,9 @@ const RUN_DATA = {
  pipeline: "Pipeline",
  language: "Language",
 };
+const WAKE_WORD_DATA = {
+  engine: "Engine",
+};

 const STT_DATA = {
  engine: "Engine",
@ -35,11 +38,12 @@ const TTS_DATA = {

 const STAGES: Record<PipelineRun["stage"], number> = {
  ready: 0,
-  stt: 1,
-  intent: 2,
-  tts: 3,
-  done: 4,
-  error: 5,
+  wake_word: 1,
+  stt: 2,
+  intent: 3,
+  tts: 4,
+  done: 5,
+  error: 6,
 };

 const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) =>
@ -53,7 +57,7 @@ const maybeRenderError = (
  stage: string,
  lastRunStage: string
 ) => {
-  if (run.stage !== "error" || lastRunStage !== stage) {
+  if (!("error" in run) || lastRunStage !== stage) {
    return "";
  }

@ -80,11 +84,10 @@ const renderProgress = (
    return "";
  }

-  if (pipelineRun.stage === "error") {
-    return html`❌`;
-  }
-
  if (!finishEvent) {
+    if ("error" in pipelineRun) {
+      return html`❌`;
+    }
    return html`
      <ha-circular-progress size="tiny" active></ha-circular-progress>
    `;
@ -138,8 +141,9 @@ export class AssistPipelineDebug extends LitElement {

  protected render(): TemplateResult {
    const lastRunStage: string = this.pipelineRun
-      ? ["tts", "intent", "stt"].find((stage) => stage in this.pipelineRun) ||
-        "ready"
+      ? ["tts", "intent", "stt", "wake_word"].find(
+          (stage) => stage in this.pipelineRun
+        ) || "ready"
      : "ready";

    const messages: Array<{ from: string; text: string }> = [];
@ -194,6 +198,46 @@ export class AssistPipelineDebug extends LitElement {
      </ha-card>

      ${maybeRenderError(this.pipelineRun, "ready", lastRunStage)}
+      ${hasStage(this.pipelineRun, "wake_word")
+        ? html`
+            <ha-card>
+              <div class="card-content">
+                <div class="row heading">
+                  <span>Wake word</span>
+                  ${renderProgress(this.hass, this.pipelineRun, "wake_word")}
+                </div>
+                ${this.pipelineRun.wake_word
+                  ? html`
+                      <div class="card-content">
+                        ${renderData(this.pipelineRun.wake_word, STT_DATA)}
+                        ${this.pipelineRun.wake_word.wake_word_output
+                          ? html`<div class="row">
+                                <div>Model</div>
+                                <div>
+                                  ${this.pipelineRun.wake_word.wake_word_output
+                                    .ww_id}
+                                </div>
+                              </div>
+                              <div class="row">
+                                <div>Timestamp</div>
+                                <div>
+                                  ${this.pipelineRun.wake_word.wake_word_output
+                                    .timestamp}
+                                </div>
+                              </div>`
+                          : ""}
+                        ${dataMinusKeysRender(
+                          this.pipelineRun.wake_word,
+                          WAKE_WORD_DATA
+                        )}
+                      </div>
+                    `
+                  : ""}
+              </div>
+            </ha-card>
+          `
+        : ""}
+      ${maybeRenderError(this.pipelineRun, "wake_word", lastRunStage)}
      ${hasStage(this.pipelineRun, "stt")
        ? html`
            <ha-card>
@ -335,6 +379,9 @@ export class AssistPipelineDebug extends LitElement {
      display: flex;
      justify-content: space-between;
    }
+    .row > div:last-child {
+      text-align: right;
+    }
    ha-expansion-panel {
      padding-left: 8px;
    }