diff --git a/src/data/stt.ts b/src/data/stt.ts new file mode 100644 index 0000000000..c855e77757 --- /dev/null +++ b/src/data/stt.ts @@ -0,0 +1,17 @@ +export interface SpeechMetadata { + language: string; + format: "wav" | "ogg"; + codec: "pcm" | "opus"; + bit_rate: 8 | 16 | 24 | 32; + sample_rate: + | 8000 + | 11000 + | 16000 + | 18900 + | 22000 + | 32000 + | 37800 + | 44100 + | 48000; + channel: 1 | 2; +} diff --git a/src/data/voice_assistant.ts b/src/data/voice_assistant.ts index a1aa203d54..fc5681a271 100644 --- a/src/data/voice_assistant.ts +++ b/src/data/voice_assistant.ts @@ -1,5 +1,7 @@ -import { HomeAssistant } from "../types"; -import { ConversationResult } from "./conversation"; +import type { HomeAssistant } from "../types"; +import type { ConversationResult } from "./conversation"; +import type { ResolvedMediaSource } from "./media_source"; +import type { SpeechMetadata } from "./stt"; interface PipelineEventBase { timestamp: string; @@ -12,8 +14,8 @@ interface PipelineRunStartEvent extends PipelineEventBase { language: string; }; } -interface PipelineRunFinishEvent extends PipelineEventBase { - type: "run-finish"; +interface PipelineRunEndEvent extends PipelineEventBase { + type: "run-end"; data: Record; } @@ -27,11 +29,16 @@ interface PipelineErrorEvent extends PipelineEventBase { interface PipelineSTTStartEvent extends PipelineEventBase { type: "stt-start"; - data: Record; + data: { + engine: string; + metadata: SpeechMetadata; + }; } -interface PipelineSTTFinishEvent extends PipelineEventBase { - type: "stt-finish"; - data: Record; +interface PipelineSTTEndEvent extends PipelineEventBase { + type: "stt-end"; + data: { + text: string; + }; } interface PipelineIntentStartEvent extends PipelineEventBase { @@ -41,8 +48,8 @@ interface PipelineIntentStartEvent extends PipelineEventBase { intent_input: string; }; } -interface PipelineIntentFinishEvent extends PipelineEventBase { - type: "intent-finish"; +interface PipelineIntentEndEvent extends PipelineEventBase { + type: "intent-end"; data: { intent_output: ConversationResult; }; @@ -50,27 +57,35 @@ interface PipelineIntentFinishEvent extends PipelineEventBase { interface PipelineTTSStartEvent extends PipelineEventBase { type: "tts-start"; - data: Record; + data: { + engine: string; + tts_input: string; + }; } -interface PipelineTTSFinishEvent extends PipelineEventBase { - type: "tts-finish"; - data: Record; +interface PipelineTTSEndEvent extends PipelineEventBase { + type: "tts-end"; + data: { + tts_output: ResolvedMediaSource; + }; } type PipelineRunEvent = | PipelineRunStartEvent - | PipelineRunFinishEvent + | PipelineRunEndEvent | PipelineErrorEvent | PipelineSTTStartEvent - | PipelineSTTFinishEvent + | PipelineSTTEndEvent | PipelineIntentStartEvent - | PipelineIntentFinishEvent + | PipelineIntentEndEvent | PipelineTTSStartEvent - | PipelineTTSFinishEvent; + | PipelineTTSEndEvent; interface PipelineRunOptions { + start_stage: "stt" | "intent" | "tts"; + end_stage: "stt" | "intent" | "tts"; + language?: string; pipeline?: string; - intent_input?: string; + input?: { text: string }; conversation_id?: string | null; } @@ -80,16 +95,16 @@ export interface PipelineRun { stage: "ready" | "stt" | "intent" | "tts" | "done" | "error"; run: PipelineRunStartEvent["data"]; error?: PipelineErrorEvent["data"]; - stt?: PipelineSTTStartEvent["data"] & Partial; + stt?: PipelineSTTStartEvent["data"] & Partial; intent?: PipelineIntentStartEvent["data"] & - Partial; - tts?: PipelineTTSStartEvent["data"] & Partial; + Partial; + tts?: PipelineTTSStartEvent["data"] & Partial; } export const runPipelineFromText = ( hass: HomeAssistant, callback: (event: PipelineRun) => void, - options: PipelineRunOptions = {} + options: PipelineRunOptions ) => { let run: PipelineRun | undefined; @@ -121,17 +136,17 @@ export const runPipelineFromText = ( if (updateEvent.type === "stt-start") { run = { ...run, stage: "stt", stt: updateEvent.data }; - } else if (updateEvent.type === "stt-finish") { + } else if (updateEvent.type === "stt-end") { run = { ...run, stt: { ...run.stt!, ...updateEvent.data } }; } else if (updateEvent.type === "intent-start") { run = { ...run, stage: "intent", intent: updateEvent.data }; - } else if (updateEvent.type === "intent-finish") { + } else if (updateEvent.type === "intent-end") { run = { ...run, intent: { ...run.intent!, ...updateEvent.data } }; } else if (updateEvent.type === "tts-start") { run = { ...run, stage: "tts", tts: updateEvent.data }; - } else if (updateEvent.type === "tts-finish") { + } else if (updateEvent.type === "tts-end") { run = { ...run, tts: { ...run.tts!, ...updateEvent.data } }; - } else if (updateEvent.type === "run-finish") { + } else if (updateEvent.type === "run-end") { run = { ...run, stage: "done" }; unsubProm.then((unsub) => unsub()); } else if (updateEvent.type === "error") { diff --git a/src/panels/config/integrations/integration-panels/voice_assistant/assist/assist-pipeline-debug.ts b/src/panels/config/integrations/integration-panels/voice_assistant/assist/assist-pipeline-debug.ts index f333818166..2220673c6d 100644 --- a/src/panels/config/integrations/integration-panels/voice_assistant/assist/assist-pipeline-debug.ts +++ b/src/panels/config/integrations/integration-panels/voice_assistant/assist/assist-pipeline-debug.ts @@ -12,7 +12,7 @@ import { import "../../../../../../layouts/hass-subpage"; import { SubscribeMixin } from "../../../../../../mixins/subscribe-mixin"; import { haStyle } from "../../../../../../resources/styles"; -import { HomeAssistant } from "../../../../../../types"; +import type { HomeAssistant } from "../../../../../../types"; import { formatNumber } from "../../../../../../common/number/format_number"; const RUN_DATA = { @@ -25,11 +25,33 @@ const ERROR_DATA = { message: "Message", }; +const STT_DATA = { + engine: "Engine", +}; + const INTENT_DATA = { engine: "Engine", intent_input: "Input", }; +const TTS_DATA = { + engine: "Engine", + tts_input: "Input", +}; + +const STAGES: Record = { + ready: 0, + stt: 1, + intent: 2, + tts: 3, + done: 4, + error: 5, +}; + +const hasStage = (run: PipelineRun, stage: PipelineRun["stage"]) => + STAGES[run.init_options.start_stage] <= STAGES[stage] && + STAGES[stage] <= STAGES[run.init_options.end_stage]; + const renderProgress = ( hass: HomeAssistant, pipelineRun: PipelineRun, @@ -39,7 +61,7 @@ const renderProgress = ( (ev) => ev.type === `${stage}-start` ); const finishEvent = pipelineRun.events.find( - (ev) => ev.type === `${stage}-finish` + (ev) => ev.type === `${stage}-end` ); if (!startEvent) { @@ -142,25 +164,91 @@ export class AssistPipelineDebug extends SubscribeMixin(LitElement) { : ""} - -
-
- Natural Language Processing - ${renderProgress(this.hass, this._pipelineRun, "intent")} -
- ${this._pipelineRun.intent - ? html` -
- ${renderData(this._pipelineRun.intent, INTENT_DATA)} - ${dataMinusKeysRender( - this._pipelineRun.intent, - INTENT_DATA + + ${hasStage(this._pipelineRun, "stt") + ? html` + +
+
+ Speech-to-Text + ${renderProgress( + this.hass, + this._pipelineRun, + "stt" )}
- ` - : ""} -
-
+ ${this._pipelineRun.stt + ? html` +
+ ${renderData(this._pipelineRun.stt, STT_DATA)} + ${dataMinusKeysRender( + this._pipelineRun.stt, + STT_DATA + )} +
+ ` + : ""} +
+ + ` + : ""} + ${hasStage(this._pipelineRun, "intent") + ? html` + +
+
+ Natural Language Processing + ${renderProgress( + this.hass, + this._pipelineRun, + "intent" + )} +
+ ${this._pipelineRun.intent + ? html` +
+ ${renderData( + this._pipelineRun.intent, + INTENT_DATA + )} + ${dataMinusKeysRender( + this._pipelineRun.intent, + INTENT_DATA + )} +
+ ` + : ""} +
+
+ ` + : ""} + ${hasStage(this._pipelineRun, "tts") + ? html` + +
+
+ Text-to-Speech + ${renderProgress( + this.hass, + this._pipelineRun, + "tts" + )} +
+ ${this._pipelineRun.tts + ? html` +
+ ${renderData(this._pipelineRun.tts, TTS_DATA)} + ${dataMinusKeysRender( + this._pipelineRun.tts, + TTS_DATA + )} +
+ ` + : ""} +
+
+ ` + : ""} Raw @@ -182,7 +270,9 @@ export class AssistPipelineDebug extends SubscribeMixin(LitElement) { this._pipelineRun = run; }, { - intent_input: this._newRunInput.value, + start_stage: "intent", + end_stage: "intent", + input: { text: this._newRunInput.value }, } ); }