Add wake word to assist pipeline debug (#17897)

This commit is contained in:
Bram Kragten 2023-09-25 17:51:12 +02:00 committed by GitHub
parent 579050bfc7
commit 22f9dbd65d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 26 deletions

View File

@ -61,6 +61,19 @@ interface PipelineErrorEvent extends PipelineEventBase {
}; };
} }
interface PipelineWakeWordStartEvent extends PipelineEventBase {
type: "wake_word-start";
data: {
engine: string;
metadata: SpeechMetadata;
};
}
interface PipelineWakeWordEndEvent extends PipelineEventBase {
type: "wake_word-end";
data: { wake_word_output: { ww_id: string; timestamp: number } };
}
interface PipelineSTTStartEvent extends PipelineEventBase { interface PipelineSTTStartEvent extends PipelineEventBase {
type: "stt-start"; type: "stt-start";
data: { data: {
@ -110,6 +123,8 @@ export type PipelineRunEvent =
| PipelineRunStartEvent | PipelineRunStartEvent
| PipelineRunEndEvent | PipelineRunEndEvent
| PipelineErrorEvent | PipelineErrorEvent
| PipelineWakeWordStartEvent
| PipelineWakeWordEndEvent
| PipelineSTTStartEvent | PipelineSTTStartEvent
| PipelineSTTEndEvent | PipelineSTTEndEvent
| PipelineIntentStartEvent | PipelineIntentStartEvent
@ -126,6 +141,14 @@ export type PipelineRunOptions = (
start_stage: "stt"; start_stage: "stt";
input: { sample_rate: number }; input: { sample_rate: number };
} }
| {
start_stage: "wake_word";
input: {
sample_rate: number;
timeout?: number;
audio_seconds_to_buffer?: number;
};
}
) & { ) & {
end_stage: "stt" | "intent" | "tts"; end_stage: "stt" | "intent" | "tts";
pipeline?: string; pipeline?: string;
@ -135,9 +158,11 @@ export type PipelineRunOptions = (
export interface PipelineRun { export interface PipelineRun {
init_options?: PipelineRunOptions; init_options?: PipelineRunOptions;
events: PipelineRunEvent[]; events: PipelineRunEvent[];
stage: "ready" | "stt" | "intent" | "tts" | "done" | "error"; stage: "ready" | "wake_word" | "stt" | "intent" | "tts" | "done" | "error";
run: PipelineRunStartEvent["data"]; run: PipelineRunStartEvent["data"];
error?: PipelineErrorEvent["data"]; error?: PipelineErrorEvent["data"];
wake_word?: PipelineWakeWordStartEvent["data"] &
Partial<PipelineWakeWordEndEvent["data"]> & { done: boolean };
stt?: PipelineSTTStartEvent["data"] & stt?: PipelineSTTStartEvent["data"] &
Partial<PipelineSTTEndEvent["data"]> & { done: boolean }; Partial<PipelineSTTEndEvent["data"]> & { done: boolean };
intent?: PipelineIntentStartEvent["data"] & intent?: PipelineIntentStartEvent["data"] &
@ -167,7 +192,18 @@ export const processEvent = (
return undefined; return undefined;
} }
if (event.type === "stt-start") { if (event.type === "wake_word-start") {
run = {
...run,
stage: "wake_word",
wake_word: { ...event.data, done: false },
};
} else if (event.type === "wake_word-end") {
run = {
...run,
wake_word: { ...run.wake_word!, ...event.data, done: true },
};
} else if (event.type === "stt-start") {
run = { run = {
...run, ...run,
stage: "stt", stage: "stt",

View File

@ -79,18 +79,29 @@ export class AssistPipelineRunDebug extends LitElement {
.value=${this._pipelineId} .value=${this._pipelineId}
@value-changed=${this._pipelinePicked} @value-changed=${this._pipelinePicked}
></ha-assist-pipeline-picker> ></ha-assist-pipeline-picker>
<ha-button raised @click=${this._runTextPipeline}> <div class="start-buttons">
Run Text Pipeline <ha-button raised @click=${this._runTextPipeline}>
</ha-button> Run Text Pipeline
<ha-button </ha-button>
raised <ha-button
@click=${this._runAudioPipeline} raised
.disabled=${!window.isSecureContext || @click=${this._runAudioPipeline}
// @ts-ignore-next-line .disabled=${!window.isSecureContext ||
!(window.AudioContext || window.webkitAudioContext)} // @ts-ignore-next-line
> !(window.AudioContext || window.webkitAudioContext)}
Run Audio Pipeline >
</ha-button> Run Audio Pipeline
</ha-button>
<ha-button
raised
@click=${this._runAudioWakeWordPipeline}
.disabled=${!window.isSecureContext ||
// @ts-ignore-next-line
!(window.AudioContext || window.webkitAudioContext)}
>
Run Audio Pipeline with Wake Word detection
</ha-button>
</div>
` `
: this._pipelineRuns[0].init_options!.start_stage === "intent" : this._pipelineRuns[0].init_options!.start_stage === "intent"
? html` ? html`
@ -179,6 +190,83 @@ export class AssistPipelineRunDebug extends LitElement {
); );
} }
private async _runAudioWakeWordPipeline() {
const audioRecorder = new AudioRecorder((data) => {
if (this._audioBuffer) {
this._audioBuffer.push(data);
} else {
this._sendAudioChunk(data);
}
});
this._audioBuffer = [];
await audioRecorder.start();
let run: PipelineRun | undefined;
let stopRecording: (() => void) | undefined = () => {
stopRecording = undefined;
audioRecorder.close();
// We're currently STTing, so finish audio
if (run?.stage === "stt" && run.stt!.done === false) {
if (this._audioBuffer) {
for (const chunk of this._audioBuffer) {
this._sendAudioChunk(chunk);
}
}
// Send empty message to indicate we're done streaming.
this._sendAudioChunk(new Int16Array());
}
this._audioBuffer = undefined;
};
await this._doRunPipeline(
(updatedRun) => {
run = updatedRun;
// When we start wake work stage, the WS has a binary handler
if (updatedRun.stage === "wake_word" && this._audioBuffer) {
// Send the buffer over the WS to the Wake Word / STT engine.
for (const buffer of this._audioBuffer) {
this._sendAudioChunk(buffer);
}
this._audioBuffer = undefined;
}
// Stop recording if the server is done with STT stage
if (
!["ready", "wake_word", "stt"].includes(updatedRun.stage) &&
stopRecording
) {
stopRecording();
}
// Play audio when we're done.
if (updatedRun.stage === "done") {
const url = updatedRun.tts!.tts_output!.url;
const audio = new Audio(url);
audio.addEventListener("ended", () => {
if (this._continueConversationCheckbox.checked) {
this._runAudioWakeWordPipeline();
} else {
this._finished = true;
}
});
audio.play();
} else if (updatedRun.stage === "error") {
this._finished = true;
}
},
{
start_stage: "wake_word",
end_stage: "tts",
input: {
sample_rate: audioRecorder.sampleRate!,
},
}
);
}
private async _runAudioPipeline() { private async _runAudioPipeline() {
const audioRecorder = new AudioRecorder((data) => { const audioRecorder = new AudioRecorder((data) => {
if (this._audioBuffer) { if (this._audioBuffer) {
@ -327,6 +415,13 @@ export class AssistPipelineRunDebug extends LitElement {
margin: 0 auto; margin: 0 auto;
direction: ltr; direction: ltr;
} }
.start-buttons {
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
justify-content: center;
}
.start-row { .start-row {
display: flex; display: flex;
justify-content: space-around; justify-content: space-around;

View File

@ -15,6 +15,9 @@ const RUN_DATA = {
pipeline: "Pipeline", pipeline: "Pipeline",
language: "Language", language: "Language",
}; };
const WAKE_WORD_DATA = {
engine: "Engine",
};
const STT_DATA = { const STT_DATA = {
engine: "Engine", engine: "Engine",
@ -35,11 +38,12 @@ const TTS_DATA = {
const STAGES: Record<PipelineRun["stage"], number> = { const STAGES: Record<PipelineRun["stage"], number> = {
ready: 0, ready: 0,
stt: 1, wake_word: 1,
intent: 2, stt: 2,
tts: 3, intent: 3,
done: 4, tts: 4,
error: 5, done: 5,
error: 6,
}; };
const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) => const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) =>
@ -53,7 +57,7 @@ const maybeRenderError = (
stage: string, stage: string,
lastRunStage: string lastRunStage: string
) => { ) => {
if (run.stage !== "error" || lastRunStage !== stage) { if (!("error" in run) || lastRunStage !== stage) {
return ""; return "";
} }
@ -80,11 +84,10 @@ const renderProgress = (
return ""; return "";
} }
if (pipelineRun.stage === "error") {
return html``;
}
if (!finishEvent) { if (!finishEvent) {
if ("error" in pipelineRun) {
return html``;
}
return html` return html`
<ha-circular-progress size="tiny" active></ha-circular-progress> <ha-circular-progress size="tiny" active></ha-circular-progress>
`; `;
@ -138,8 +141,9 @@ export class AssistPipelineDebug extends LitElement {
protected render(): TemplateResult { protected render(): TemplateResult {
const lastRunStage: string = this.pipelineRun const lastRunStage: string = this.pipelineRun
? ["tts", "intent", "stt"].find((stage) => stage in this.pipelineRun) || ? ["tts", "intent", "stt", "wake_word"].find(
"ready" (stage) => stage in this.pipelineRun
) || "ready"
: "ready"; : "ready";
const messages: Array<{ from: string; text: string }> = []; const messages: Array<{ from: string; text: string }> = [];
@ -194,6 +198,46 @@ export class AssistPipelineDebug extends LitElement {
</ha-card> </ha-card>
${maybeRenderError(this.pipelineRun, "ready", lastRunStage)} ${maybeRenderError(this.pipelineRun, "ready", lastRunStage)}
${hasStage(this.pipelineRun, "wake_word")
? html`
<ha-card>
<div class="card-content">
<div class="row heading">
<span>Wake word</span>
${renderProgress(this.hass, this.pipelineRun, "wake_word")}
</div>
${this.pipelineRun.wake_word
? html`
<div class="card-content">
${renderData(this.pipelineRun.wake_word, STT_DATA)}
${this.pipelineRun.wake_word.wake_word_output
? html`<div class="row">
<div>Model</div>
<div>
${this.pipelineRun.wake_word.wake_word_output
.ww_id}
</div>
</div>
<div class="row">
<div>Timestamp</div>
<div>
${this.pipelineRun.wake_word.wake_word_output
.timestamp}
</div>
</div>`
: ""}
${dataMinusKeysRender(
this.pipelineRun.wake_word,
WAKE_WORD_DATA
)}
</div>
`
: ""}
</div>
</ha-card>
`
: ""}
${maybeRenderError(this.pipelineRun, "wake_word", lastRunStage)}
${hasStage(this.pipelineRun, "stt") ${hasStage(this.pipelineRun, "stt")
? html` ? html`
<ha-card> <ha-card>
@ -335,6 +379,9 @@ export class AssistPipelineDebug extends LitElement {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
} }
.row > div:last-child {
text-align: right;
}
ha-expansion-panel { ha-expansion-panel {
padding-left: 8px; padding-left: 8px;
} }