Add wake word to assist pipeline debug (#17897)

This commit is contained in:
Bram Kragten 2023-09-25 17:51:12 +02:00 committed by GitHub
parent 579050bfc7
commit 22f9dbd65d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 26 deletions

View File

@ -61,6 +61,19 @@ interface PipelineErrorEvent extends PipelineEventBase {
};
}
interface PipelineWakeWordStartEvent extends PipelineEventBase {
type: "wake_word-start";
data: {
engine: string;
metadata: SpeechMetadata;
};
}
interface PipelineWakeWordEndEvent extends PipelineEventBase {
type: "wake_word-end";
data: { wake_word_output: { ww_id: string; timestamp: number } };
}
interface PipelineSTTStartEvent extends PipelineEventBase {
type: "stt-start";
data: {
@ -110,6 +123,8 @@ export type PipelineRunEvent =
| PipelineRunStartEvent
| PipelineRunEndEvent
| PipelineErrorEvent
| PipelineWakeWordStartEvent
| PipelineWakeWordEndEvent
| PipelineSTTStartEvent
| PipelineSTTEndEvent
| PipelineIntentStartEvent
@ -126,6 +141,14 @@ export type PipelineRunOptions = (
start_stage: "stt";
input: { sample_rate: number };
}
| {
start_stage: "wake_word";
input: {
sample_rate: number;
timeout?: number;
audio_seconds_to_buffer?: number;
};
}
) & {
end_stage: "stt" | "intent" | "tts";
pipeline?: string;
@ -135,9 +158,11 @@ export type PipelineRunOptions = (
export interface PipelineRun {
init_options?: PipelineRunOptions;
events: PipelineRunEvent[];
stage: "ready" | "stt" | "intent" | "tts" | "done" | "error";
stage: "ready" | "wake_word" | "stt" | "intent" | "tts" | "done" | "error";
run: PipelineRunStartEvent["data"];
error?: PipelineErrorEvent["data"];
wake_word?: PipelineWakeWordStartEvent["data"] &
Partial<PipelineWakeWordEndEvent["data"]> & { done: boolean };
stt?: PipelineSTTStartEvent["data"] &
Partial<PipelineSTTEndEvent["data"]> & { done: boolean };
intent?: PipelineIntentStartEvent["data"] &
@ -167,7 +192,18 @@ export const processEvent = (
return undefined;
}
if (event.type === "stt-start") {
if (event.type === "wake_word-start") {
run = {
...run,
stage: "wake_word",
wake_word: { ...event.data, done: false },
};
} else if (event.type === "wake_word-end") {
run = {
...run,
wake_word: { ...run.wake_word!, ...event.data, done: true },
};
} else if (event.type === "stt-start") {
run = {
...run,
stage: "stt",

View File

@ -79,18 +79,29 @@ export class AssistPipelineRunDebug extends LitElement {
.value=${this._pipelineId}
@value-changed=${this._pipelinePicked}
></ha-assist-pipeline-picker>
<ha-button raised @click=${this._runTextPipeline}>
Run Text Pipeline
</ha-button>
<ha-button
raised
@click=${this._runAudioPipeline}
.disabled=${!window.isSecureContext ||
// @ts-ignore-next-line
!(window.AudioContext || window.webkitAudioContext)}
>
Run Audio Pipeline
</ha-button>
<div class="start-buttons">
<ha-button raised @click=${this._runTextPipeline}>
Run Text Pipeline
</ha-button>
<ha-button
raised
@click=${this._runAudioPipeline}
.disabled=${!window.isSecureContext ||
// @ts-ignore-next-line
!(window.AudioContext || window.webkitAudioContext)}
>
Run Audio Pipeline
</ha-button>
<ha-button
raised
@click=${this._runAudioWakeWordPipeline}
.disabled=${!window.isSecureContext ||
// @ts-ignore-next-line
!(window.AudioContext || window.webkitAudioContext)}
>
Run Audio Pipeline with Wake Word detection
</ha-button>
</div>
`
: this._pipelineRuns[0].init_options!.start_stage === "intent"
? html`
@ -179,6 +190,83 @@ export class AssistPipelineRunDebug extends LitElement {
);
}
private async _runAudioWakeWordPipeline() {
const audioRecorder = new AudioRecorder((data) => {
if (this._audioBuffer) {
this._audioBuffer.push(data);
} else {
this._sendAudioChunk(data);
}
});
this._audioBuffer = [];
await audioRecorder.start();
let run: PipelineRun | undefined;
let stopRecording: (() => void) | undefined = () => {
stopRecording = undefined;
audioRecorder.close();
// We're currently STTing, so finish audio
if (run?.stage === "stt" && run.stt!.done === false) {
if (this._audioBuffer) {
for (const chunk of this._audioBuffer) {
this._sendAudioChunk(chunk);
}
}
// Send empty message to indicate we're done streaming.
this._sendAudioChunk(new Int16Array());
}
this._audioBuffer = undefined;
};
await this._doRunPipeline(
(updatedRun) => {
run = updatedRun;
// When we start wake work stage, the WS has a binary handler
if (updatedRun.stage === "wake_word" && this._audioBuffer) {
// Send the buffer over the WS to the Wake Word / STT engine.
for (const buffer of this._audioBuffer) {
this._sendAudioChunk(buffer);
}
this._audioBuffer = undefined;
}
// Stop recording if the server is done with STT stage
if (
!["ready", "wake_word", "stt"].includes(updatedRun.stage) &&
stopRecording
) {
stopRecording();
}
// Play audio when we're done.
if (updatedRun.stage === "done") {
const url = updatedRun.tts!.tts_output!.url;
const audio = new Audio(url);
audio.addEventListener("ended", () => {
if (this._continueConversationCheckbox.checked) {
this._runAudioWakeWordPipeline();
} else {
this._finished = true;
}
});
audio.play();
} else if (updatedRun.stage === "error") {
this._finished = true;
}
},
{
start_stage: "wake_word",
end_stage: "tts",
input: {
sample_rate: audioRecorder.sampleRate!,
},
}
);
}
private async _runAudioPipeline() {
const audioRecorder = new AudioRecorder((data) => {
if (this._audioBuffer) {
@ -327,6 +415,13 @@ export class AssistPipelineRunDebug extends LitElement {
margin: 0 auto;
direction: ltr;
}
.start-buttons {
display: flex;
flex-wrap: wrap;
gap: 8px;
align-items: center;
justify-content: center;
}
.start-row {
display: flex;
justify-content: space-around;

View File

@ -15,6 +15,9 @@ const RUN_DATA = {
pipeline: "Pipeline",
language: "Language",
};
const WAKE_WORD_DATA = {
engine: "Engine",
};
const STT_DATA = {
engine: "Engine",
@ -35,11 +38,12 @@ const TTS_DATA = {
const STAGES: Record<PipelineRun["stage"], number> = {
ready: 0,
stt: 1,
intent: 2,
tts: 3,
done: 4,
error: 5,
wake_word: 1,
stt: 2,
intent: 3,
tts: 4,
done: 5,
error: 6,
};
const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) =>
@ -53,7 +57,7 @@ const maybeRenderError = (
stage: string,
lastRunStage: string
) => {
if (run.stage !== "error" || lastRunStage !== stage) {
if (!("error" in run) || lastRunStage !== stage) {
return "";
}
@ -80,11 +84,10 @@ const renderProgress = (
return "";
}
if (pipelineRun.stage === "error") {
return html``;
}
if (!finishEvent) {
if ("error" in pipelineRun) {
return html``;
}
return html`
<ha-circular-progress size="tiny" active></ha-circular-progress>
`;
@ -138,8 +141,9 @@ export class AssistPipelineDebug extends LitElement {
protected render(): TemplateResult {
const lastRunStage: string = this.pipelineRun
? ["tts", "intent", "stt"].find((stage) => stage in this.pipelineRun) ||
"ready"
? ["tts", "intent", "stt", "wake_word"].find(
(stage) => stage in this.pipelineRun
) || "ready"
: "ready";
const messages: Array<{ from: string; text: string }> = [];
@ -194,6 +198,46 @@ export class AssistPipelineDebug extends LitElement {
</ha-card>
${maybeRenderError(this.pipelineRun, "ready", lastRunStage)}
${hasStage(this.pipelineRun, "wake_word")
? html`
<ha-card>
<div class="card-content">
<div class="row heading">
<span>Wake word</span>
${renderProgress(this.hass, this.pipelineRun, "wake_word")}
</div>
${this.pipelineRun.wake_word
? html`
<div class="card-content">
${renderData(this.pipelineRun.wake_word, STT_DATA)}
${this.pipelineRun.wake_word.wake_word_output
? html`<div class="row">
<div>Model</div>
<div>
${this.pipelineRun.wake_word.wake_word_output
.ww_id}
</div>
</div>
<div class="row">
<div>Timestamp</div>
<div>
${this.pipelineRun.wake_word.wake_word_output
.timestamp}
</div>
</div>`
: ""}
${dataMinusKeysRender(
this.pipelineRun.wake_word,
WAKE_WORD_DATA
)}
</div>
`
: ""}
</div>
</ha-card>
`
: ""}
${maybeRenderError(this.pipelineRun, "wake_word", lastRunStage)}
${hasStage(this.pipelineRun, "stt")
? html`
<ha-card>
@ -335,6 +379,9 @@ export class AssistPipelineDebug extends LitElement {
display: flex;
justify-content: space-between;
}
.row > div:last-child {
text-align: right;
}
ha-expansion-panel {
padding-left: 8px;
}