Add wake word to assist pipeline debug (#17897)

2025-07-25 18:26:35 +00:00 · 2023-09-25 17:51:12 +02:00 · 2023-09-25 17:51:12 +02:00 · 22f9dbd65d
commit 22f9dbd65d
parent 579050bfc7
3 changed files with 204 additions and 26 deletions
--- a/src/data/assist_pipeline.ts
+++ b/src/data/assist_pipeline.ts
@ -61,6 +61,19 @@ interface PipelineErrorEvent extends PipelineEventBase {
  };
 }
 interface PipelineWakeWordStartEvent extends PipelineEventBase {
  type: "wake_word-start";
  data: {
    engine: string;
    metadata: SpeechMetadata;
  };
 }
 interface PipelineWakeWordEndEvent extends PipelineEventBase {
  type: "wake_word-end";
  data: { wake_word_output: { ww_id: string; timestamp: number } };
 }
 interface PipelineSTTStartEvent extends PipelineEventBase {
  type: "stt-start";
  data: {
@ -110,6 +123,8 @@ export type PipelineRunEvent =
  | PipelineRunStartEvent
  | PipelineRunEndEvent
  | PipelineErrorEvent
  | PipelineWakeWordStartEvent
  | PipelineWakeWordEndEvent
  | PipelineSTTStartEvent
  | PipelineSTTEndEvent
  | PipelineIntentStartEvent
@ -126,6 +141,14 @@ export type PipelineRunOptions = (
      start_stage: "stt";
      input: { sample_rate: number };
    }
  | {
      start_stage: "wake_word";
      input: {
        sample_rate: number;
        timeout?: number;
        audio_seconds_to_buffer?: number;
      };
    }
 ) & {
  end_stage: "stt" | "intent" | "tts";
  pipeline?: string;
@ -135,9 +158,11 @@ export type PipelineRunOptions = (
 export interface PipelineRun {
  init_options?: PipelineRunOptions;
  events: PipelineRunEvent[];
-  stage: "ready" | "stt" | "intent" | "tts" | "done" | "error";
+  stage: "ready" | "wake_word" | "stt" | "intent" | "tts" | "done" | "error";
  run: PipelineRunStartEvent["data"];
  error?: PipelineErrorEvent["data"];
  wake_word?: PipelineWakeWordStartEvent["data"] &
    Partial<PipelineWakeWordEndEvent["data"]> & { done: boolean };
  stt?: PipelineSTTStartEvent["data"] &
    Partial<PipelineSTTEndEvent["data"]> & { done: boolean };
  intent?: PipelineIntentStartEvent["data"] &
@ -167,7 +192,18 @@ export const processEvent = (
    return undefined;
  }
-  if (event.type === "stt-start") {
+  if (event.type === "wake_word-start") {
    run = {
      ...run,
      stage: "wake_word",
      wake_word: { ...event.data, done: false },
    };
  } else if (event.type === "wake_word-end") {
    run = {
      ...run,
      wake_word: { ...run.wake_word!, ...event.data, done: true },
    };
  } else if (event.type === "stt-start") {
    run = {
      ...run,
      stage: "stt",
--- a/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts
+++ b/src/panels/config/voice-assistants/debug/assist-pipeline-run-debug.ts
@ -79,18 +79,29 @@ export class AssistPipelineRunDebug extends LitElement {
                    .value=${this._pipelineId}
                    @value-changed=${this._pipelinePicked}
                  ></ha-assist-pipeline-picker>
-                  <ha-button raised @click=${this._runTextPipeline}>
+                  <div class="start-buttons">
-                    Run Text Pipeline
+                    <ha-button raised @click=${this._runTextPipeline}>
-                  </ha-button>
+                      Run Text Pipeline
-                  <ha-button
+                    </ha-button>
-                    raised
+                    <ha-button
-                    @click=${this._runAudioPipeline}
+                      raised
-                    .disabled=${!window.isSecureContext ||
+                      @click=${this._runAudioPipeline}
-                    // @ts-ignore-next-line
+                      .disabled=${!window.isSecureContext ||
-                    !(window.AudioContext || window.webkitAudioContext)}
+                      // @ts-ignore-next-line
-                  >
+                      !(window.AudioContext || window.webkitAudioContext)}
-                    Run Audio Pipeline
+                    >
-                  </ha-button>
+                      Run Audio Pipeline
                    </ha-button>
                    <ha-button
                      raised
                      @click=${this._runAudioWakeWordPipeline}
                      .disabled=${!window.isSecureContext ||
                      // @ts-ignore-next-line
                      !(window.AudioContext || window.webkitAudioContext)}
                    >
                      Run Audio Pipeline with Wake Word detection
                    </ha-button>
                  </div>
                `
              : this._pipelineRuns[0].init_options!.start_stage === "intent"
              ? html`
@ -179,6 +190,83 @@ export class AssistPipelineRunDebug extends LitElement {
    );
  }
  private async _runAudioWakeWordPipeline() {
    const audioRecorder = new AudioRecorder((data) => {
      if (this._audioBuffer) {
        this._audioBuffer.push(data);
      } else {
        this._sendAudioChunk(data);
      }
    });
    this._audioBuffer = [];
    await audioRecorder.start();
    let run: PipelineRun | undefined;
    let stopRecording: (() => void) | undefined = () => {
      stopRecording = undefined;
      audioRecorder.close();
      // We're currently STTing, so finish audio
      if (run?.stage === "stt" && run.stt!.done === false) {
        if (this._audioBuffer) {
          for (const chunk of this._audioBuffer) {
            this._sendAudioChunk(chunk);
          }
        }
        // Send empty message to indicate we're done streaming.
        this._sendAudioChunk(new Int16Array());
      }
      this._audioBuffer = undefined;
    };
    await this._doRunPipeline(
      (updatedRun) => {
        run = updatedRun;
        // When we start wake work stage, the WS has a binary handler
        if (updatedRun.stage === "wake_word" && this._audioBuffer) {
          // Send the buffer over the WS to the Wake Word / STT engine.
          for (const buffer of this._audioBuffer) {
            this._sendAudioChunk(buffer);
          }
          this._audioBuffer = undefined;
        }
        // Stop recording if the server is done with STT stage
        if (
          !["ready", "wake_word", "stt"].includes(updatedRun.stage) &&
          stopRecording
        ) {
          stopRecording();
        }
        // Play audio when we're done.
        if (updatedRun.stage === "done") {
          const url = updatedRun.tts!.tts_output!.url;
          const audio = new Audio(url);
          audio.addEventListener("ended", () => {
            if (this._continueConversationCheckbox.checked) {
              this._runAudioWakeWordPipeline();
            } else {
              this._finished = true;
            }
          });
          audio.play();
        } else if (updatedRun.stage === "error") {
          this._finished = true;
        }
      },
      {
        start_stage: "wake_word",
        end_stage: "tts",
        input: {
          sample_rate: audioRecorder.sampleRate!,
        },
      }
    );
  }
  private async _runAudioPipeline() {
    const audioRecorder = new AudioRecorder((data) => {
      if (this._audioBuffer) {
@ -327,6 +415,13 @@ export class AssistPipelineRunDebug extends LitElement {
        margin: 0 auto;
        direction: ltr;
      }
      .start-buttons {
        display: flex;
        flex-wrap: wrap;
        gap: 8px;
        align-items: center;
        justify-content: center;
      }
      .start-row {
        display: flex;
        justify-content: space-around;
--- a/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts
+++ b/src/panels/config/voice-assistants/debug/assist-render-pipeline-run.ts
@ -15,6 +15,9 @@ const RUN_DATA = {
  pipeline: "Pipeline",
  language: "Language",
 };
 const WAKE_WORD_DATA = {
  engine: "Engine",
 };
 const STT_DATA = {
  engine: "Engine",
@ -35,11 +38,12 @@ const TTS_DATA = {
 const STAGES: Record<PipelineRun["stage"], number> = {
  ready: 0,
-  stt: 1,
+  wake_word: 1,
-  intent: 2,
+  stt: 2,
-  tts: 3,
+  intent: 3,
-  done: 4,
+  tts: 4,
-  error: 5,
+  done: 5,
  error: 6,
 };
 const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) =>
@ -53,7 +57,7 @@ const maybeRenderError = (
  stage: string,
  lastRunStage: string
 ) => {
-  if (run.stage !== "error" || lastRunStage !== stage) {
+  if (!("error" in run) || lastRunStage !== stage) {
    return "";
  }
@ -80,11 +84,10 @@ const renderProgress = (
    return "";
  }
  if (pipelineRun.stage === "error") {
    return html`❌`;
  }
  if (!finishEvent) {
    if ("error" in pipelineRun) {
      return html`❌`;
    }
    return html`
      <ha-circular-progress size="tiny" active></ha-circular-progress>
    `;
@ -138,8 +141,9 @@ export class AssistPipelineDebug extends LitElement {
  protected render(): TemplateResult {
    const lastRunStage: string = this.pipelineRun
-      ? ["tts", "intent", "stt"].find((stage) => stage in this.pipelineRun) ||
+      ? ["tts", "intent", "stt", "wake_word"].find(
-        "ready"
+          (stage) => stage in this.pipelineRun
        ) || "ready"
      : "ready";
    const messages: Array<{ from: string; text: string }> = [];
@ -194,6 +198,46 @@ export class AssistPipelineDebug extends LitElement {
      </ha-card>
      ${maybeRenderError(this.pipelineRun, "ready", lastRunStage)}
      ${hasStage(this.pipelineRun, "wake_word")
        ? html`
            <ha-card>
              <div class="card-content">
                <div class="row heading">
                  <span>Wake word</span>
                  ${renderProgress(this.hass, this.pipelineRun, "wake_word")}
                </div>
                ${this.pipelineRun.wake_word
                  ? html`
                      <div class="card-content">
                        ${renderData(this.pipelineRun.wake_word, STT_DATA)}
                        ${this.pipelineRun.wake_word.wake_word_output
                          ? html`<div class="row">
                                <div>Model</div>
                                <div>
                                  ${this.pipelineRun.wake_word.wake_word_output
                                    .ww_id}
                                </div>
                              </div>
                              <div class="row">
                                <div>Timestamp</div>
                                <div>
                                  ${this.pipelineRun.wake_word.wake_word_output
                                    .timestamp}
                                </div>
                              </div>`
                          : ""}
                        ${dataMinusKeysRender(
                          this.pipelineRun.wake_word,
                          WAKE_WORD_DATA
                        )}
                      </div>
                    `
                  : ""}
              </div>
            </ha-card>
          `
        : ""}
      ${maybeRenderError(this.pipelineRun, "wake_word", lastRunStage)}
      ${hasStage(this.pipelineRun, "stt")
        ? html`
            <ha-card>
@ -335,6 +379,9 @@ export class AssistPipelineDebug extends LitElement {
      display: flex;
      justify-content: space-between;
    }
    .row > div:last-child {
      text-align: right;
    }
    ha-expansion-panel {
      padding-left: 8px;
    }