mirror of
https://github.com/home-assistant/core.git
synced 2025-07-25 06:07:17 +00:00
Assist pipeline to use configured values (#91901)
* Assist pipeline to use configured values * Include voice in TTS-START event * Use correct tts language var * More vars * Apply suggestions from code review * Update --------- Co-authored-by: Bram Kragten <mail@bramkragten.nl>
This commit is contained in:
parent
ec1952b926
commit
f4df0ca50a
@ -57,9 +57,6 @@ async def async_pipeline_from_audio_stream(
|
|||||||
"pipeline_not_found", f"Pipeline {pipeline_id} not found"
|
"pipeline_not_found", f"Pipeline {pipeline_id} not found"
|
||||||
)
|
)
|
||||||
|
|
||||||
if stt_metadata.language == "":
|
|
||||||
stt_metadata.language = pipeline.language
|
|
||||||
|
|
||||||
pipeline_input = PipelineInput(
|
pipeline_input = PipelineInput(
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
stt_metadata=stt_metadata,
|
stt_metadata=stt_metadata,
|
||||||
|
@ -282,12 +282,14 @@ class PipelineRun:
|
|||||||
message=f"No speech to text provider for: {engine}",
|
message=f"No speech to text provider for: {engine}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
metadata.language = self.pipeline.stt_language or self.language
|
||||||
|
|
||||||
if not stt_provider.check_metadata(metadata):
|
if not stt_provider.check_metadata(metadata):
|
||||||
raise SpeechToTextError(
|
raise SpeechToTextError(
|
||||||
code="stt-provider-unsupported-metadata",
|
code="stt-provider-unsupported-metadata",
|
||||||
message=(
|
message=(
|
||||||
f"Provider {stt_provider.name} does not support input speech "
|
f"Provider {stt_provider.name} does not support input speech "
|
||||||
"to text metadata"
|
"to text metadata {metadata}"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -382,6 +384,7 @@ class PipelineRun:
|
|||||||
PipelineEventType.INTENT_START,
|
PipelineEventType.INTENT_START,
|
||||||
{
|
{
|
||||||
"engine": self.intent_agent,
|
"engine": self.intent_agent,
|
||||||
|
"language": self.pipeline.conversation_language,
|
||||||
"intent_input": intent_input,
|
"intent_input": intent_input,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -393,7 +396,7 @@ class PipelineRun:
|
|||||||
text=intent_input,
|
text=intent_input,
|
||||||
conversation_id=conversation_id,
|
conversation_id=conversation_id,
|
||||||
context=self.context,
|
context=self.context,
|
||||||
language=self.language,
|
language=self.pipeline.conversation_language,
|
||||||
agent_id=self.intent_agent,
|
agent_id=self.intent_agent,
|
||||||
)
|
)
|
||||||
except Exception as src_error:
|
except Exception as src_error:
|
||||||
@ -439,14 +442,14 @@ class PipelineRun:
|
|||||||
if not await tts.async_support_options(
|
if not await tts.async_support_options(
|
||||||
self.hass,
|
self.hass,
|
||||||
engine,
|
engine,
|
||||||
self.language,
|
self.pipeline.tts_language,
|
||||||
tts_options,
|
tts_options,
|
||||||
):
|
):
|
||||||
raise TextToSpeechError(
|
raise TextToSpeechError(
|
||||||
code="tts-not-supported",
|
code="tts-not-supported",
|
||||||
message=(
|
message=(
|
||||||
f"Text to speech engine {engine} "
|
f"Text to speech engine {engine} "
|
||||||
f"does not support language {self.language} or options {tts_options}"
|
f"does not support language {self.pipeline.tts_language} or options {tts_options}"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -463,6 +466,8 @@ class PipelineRun:
|
|||||||
PipelineEventType.TTS_START,
|
PipelineEventType.TTS_START,
|
||||||
{
|
{
|
||||||
"engine": self.tts_engine,
|
"engine": self.tts_engine,
|
||||||
|
"language": self.pipeline.tts_language,
|
||||||
|
"voice": self.pipeline.tts_voice,
|
||||||
"tts_input": tts_input,
|
"tts_input": tts_input,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@ -474,7 +479,7 @@ class PipelineRun:
|
|||||||
self.hass,
|
self.hass,
|
||||||
tts_input,
|
tts_input,
|
||||||
engine=self.tts_engine,
|
engine=self.tts_engine,
|
||||||
language=self.language,
|
language=self.pipeline.tts_language,
|
||||||
options=self.tts_options,
|
options=self.tts_options,
|
||||||
)
|
)
|
||||||
tts_media = await media_source.async_resolve_media(
|
tts_media = await media_source.async_resolve_media(
|
||||||
|
@ -137,7 +137,7 @@ async def websocket_run(
|
|||||||
|
|
||||||
# Audio input must be raw PCM at 16Khz with 16-bit mono samples
|
# Audio input must be raw PCM at 16Khz with 16-bit mono samples
|
||||||
input_args["stt_metadata"] = stt.SpeechMetadata(
|
input_args["stt_metadata"] = stt.SpeechMetadata(
|
||||||
language=pipeline.language,
|
language=pipeline.stt_language or pipeline.language,
|
||||||
format=stt.AudioFormats.WAV,
|
format=stt.AudioFormats.WAV,
|
||||||
codec=stt.AudioCodecs.PCM,
|
codec=stt.AudioCodecs.PCM,
|
||||||
bit_rate=stt.AudioBitRates.BITRATE_16,
|
bit_rate=stt.AudioBitRates.BITRATE_16,
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'test transcript',
|
'intent_input': 'test transcript',
|
||||||
|
'language': None,
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
||||||
}),
|
}),
|
||||||
@ -63,14 +64,16 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': None,
|
||||||
'tts_input': "Sorry, I couldn't understand that",
|
'tts_input': "Sorry, I couldn't understand that",
|
||||||
|
'voice': None,
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
||||||
}),
|
}),
|
||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en",
|
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that",
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
||||||
}),
|
}),
|
||||||
@ -87,7 +90,7 @@
|
|||||||
list([
|
list([
|
||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'language': 'en-US',
|
'language': 'en',
|
||||||
'pipeline': 'test_name',
|
'pipeline': 'test_name',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.RUN_START: 'run-start'>,
|
'type': <PipelineEventType.RUN_START: 'run-start'>,
|
||||||
@ -118,6 +121,7 @@
|
|||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'test transcript',
|
'intent_input': 'test transcript',
|
||||||
|
'language': 'en-US',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
||||||
}),
|
}),
|
||||||
@ -147,14 +151,16 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': 'en-UA',
|
||||||
'tts_input': "Sorry, I couldn't understand that",
|
'tts_input': "Sorry, I couldn't understand that",
|
||||||
|
'voice': 'Arnold Schwarzenegger',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
||||||
}),
|
}),
|
||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&voice=Arnold+Schwarzenegger",
|
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-UA&voice=Arnold+Schwarzenegger",
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_2657c1a8ee_test.mp3',
|
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_2657c1a8ee_test.mp3',
|
||||||
}),
|
}),
|
||||||
@ -171,7 +177,7 @@
|
|||||||
list([
|
list([
|
||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'language': 'en-US',
|
'language': 'en',
|
||||||
'pipeline': 'test_name',
|
'pipeline': 'test_name',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.RUN_START: 'run-start'>,
|
'type': <PipelineEventType.RUN_START: 'run-start'>,
|
||||||
@ -202,6 +208,7 @@
|
|||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'test transcript',
|
'intent_input': 'test transcript',
|
||||||
|
'language': 'en-US',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
'type': <PipelineEventType.INTENT_START: 'intent-start'>,
|
||||||
}),
|
}),
|
||||||
@ -231,14 +238,16 @@
|
|||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': 'en-AU',
|
||||||
'tts_input': "Sorry, I couldn't understand that",
|
'tts_input': "Sorry, I couldn't understand that",
|
||||||
|
'voice': 'Arnold Schwarzenegger',
|
||||||
}),
|
}),
|
||||||
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
'type': <PipelineEventType.TTS_START: 'tts-start'>,
|
||||||
}),
|
}),
|
||||||
dict({
|
dict({
|
||||||
'data': dict({
|
'data': dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-US&voice=Arnold+Schwarzenegger",
|
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en-AU&voice=Arnold+Schwarzenegger",
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_2657c1a8ee_test.mp3',
|
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_2657c1a8ee_test.mp3',
|
||||||
}),
|
}),
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'test transcript',
|
'intent_input': 'test transcript',
|
||||||
|
'language': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_audio_pipeline.4
|
# name: test_audio_pipeline.4
|
||||||
@ -60,13 +61,15 @@
|
|||||||
# name: test_audio_pipeline.5
|
# name: test_audio_pipeline.5
|
||||||
dict({
|
dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': None,
|
||||||
'tts_input': "Sorry, I couldn't understand that",
|
'tts_input': "Sorry, I couldn't understand that",
|
||||||
|
'voice': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_audio_pipeline.6
|
# name: test_audio_pipeline.6
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en",
|
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that",
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
||||||
}),
|
}),
|
||||||
@ -106,6 +109,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'test transcript',
|
'intent_input': 'test transcript',
|
||||||
|
'language': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_audio_pipeline_debug.4
|
# name: test_audio_pipeline_debug.4
|
||||||
@ -133,13 +137,15 @@
|
|||||||
# name: test_audio_pipeline_debug.5
|
# name: test_audio_pipeline_debug.5
|
||||||
dict({
|
dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': None,
|
||||||
'tts_input': "Sorry, I couldn't understand that",
|
'tts_input': "Sorry, I couldn't understand that",
|
||||||
|
'voice': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_audio_pipeline_debug.6
|
# name: test_audio_pipeline_debug.6
|
||||||
dict({
|
dict({
|
||||||
'tts_output': dict({
|
'tts_output': dict({
|
||||||
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that&language=en",
|
'media_id': "media-source://tts/test?message=Sorry,+I+couldn't+understand+that",
|
||||||
'mime_type': 'audio/mpeg',
|
'mime_type': 'audio/mpeg',
|
||||||
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
'url': '/api/tts_proxy/dae2cdcb27a1d1c3b07ba2c7db91480f9d4bfd8f_en-us_-_test.mp3',
|
||||||
}),
|
}),
|
||||||
@ -159,6 +165,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'Are the lights on?',
|
'intent_input': 'Are the lights on?',
|
||||||
|
'language': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_intent_timeout
|
# name: test_intent_timeout
|
||||||
@ -175,6 +182,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'Are the lights on?',
|
'intent_input': 'Are the lights on?',
|
||||||
|
'language': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_intent_timeout.2
|
# name: test_intent_timeout.2
|
||||||
@ -243,6 +251,7 @@
|
|||||||
dict({
|
dict({
|
||||||
'engine': 'homeassistant',
|
'engine': 'homeassistant',
|
||||||
'intent_input': 'Are the lights on?',
|
'intent_input': 'Are the lights on?',
|
||||||
|
'language': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
# name: test_text_only_pipeline.2
|
# name: test_text_only_pipeline.2
|
||||||
@ -286,6 +295,8 @@
|
|||||||
# name: test_tts_failed.1
|
# name: test_tts_failed.1
|
||||||
dict({
|
dict({
|
||||||
'engine': 'test',
|
'engine': 'test',
|
||||||
|
'language': None,
|
||||||
'tts_input': 'Lights are on.',
|
'tts_input': 'Lights are on.',
|
||||||
|
'voice': None,
|
||||||
})
|
})
|
||||||
# ---
|
# ---
|
||||||
|
@ -79,13 +79,13 @@ async def test_pipeline_from_audio_stream_legacy(
|
|||||||
{
|
{
|
||||||
"type": "assist_pipeline/pipeline/create",
|
"type": "assist_pipeline/pipeline/create",
|
||||||
"conversation_engine": "homeassistant",
|
"conversation_engine": "homeassistant",
|
||||||
"conversation_language": "test_language",
|
"conversation_language": "en-US",
|
||||||
"language": "en-US",
|
"language": "en",
|
||||||
"name": "test_name",
|
"name": "test_name",
|
||||||
"stt_engine": "test",
|
"stt_engine": "test",
|
||||||
"stt_language": "test_language",
|
"stt_language": "en-UK",
|
||||||
"tts_engine": "test",
|
"tts_engine": "test",
|
||||||
"tts_language": "test_language",
|
"tts_language": "en-AU",
|
||||||
"tts_voice": "Arnold Schwarzenegger",
|
"tts_voice": "Arnold Schwarzenegger",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -99,7 +99,7 @@ async def test_pipeline_from_audio_stream_legacy(
|
|||||||
Context(),
|
Context(),
|
||||||
events.append,
|
events.append,
|
||||||
stt.SpeechMetadata(
|
stt.SpeechMetadata(
|
||||||
language="",
|
language="en-UK",
|
||||||
format=stt.AudioFormats.WAV,
|
format=stt.AudioFormats.WAV,
|
||||||
codec=stt.AudioCodecs.PCM,
|
codec=stt.AudioCodecs.PCM,
|
||||||
bit_rate=stt.AudioBitRates.BITRATE_16,
|
bit_rate=stt.AudioBitRates.BITRATE_16,
|
||||||
@ -145,13 +145,13 @@ async def test_pipeline_from_audio_stream_entity(
|
|||||||
{
|
{
|
||||||
"type": "assist_pipeline/pipeline/create",
|
"type": "assist_pipeline/pipeline/create",
|
||||||
"conversation_engine": "homeassistant",
|
"conversation_engine": "homeassistant",
|
||||||
"conversation_language": "test_language",
|
"conversation_language": "en-US",
|
||||||
"language": "en-US",
|
"language": "en",
|
||||||
"name": "test_name",
|
"name": "test_name",
|
||||||
"stt_engine": mock_stt_provider_entity.entity_id,
|
"stt_engine": mock_stt_provider_entity.entity_id,
|
||||||
"stt_language": "test_language",
|
"stt_language": "en-UK",
|
||||||
"tts_engine": "test",
|
"tts_engine": "test",
|
||||||
"tts_language": "test_language",
|
"tts_language": "en-UA",
|
||||||
"tts_voice": "Arnold Schwarzenegger",
|
"tts_voice": "Arnold Schwarzenegger",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@ -165,7 +165,7 @@ async def test_pipeline_from_audio_stream_entity(
|
|||||||
Context(),
|
Context(),
|
||||||
events.append,
|
events.append,
|
||||||
stt.SpeechMetadata(
|
stt.SpeechMetadata(
|
||||||
language="",
|
language="en-UK",
|
||||||
format=stt.AudioFormats.WAV,
|
format=stt.AudioFormats.WAV,
|
||||||
codec=stt.AudioCodecs.PCM,
|
codec=stt.AudioCodecs.PCM,
|
||||||
bit_rate=stt.AudioBitRates.BITRATE_16,
|
bit_rate=stt.AudioBitRates.BITRATE_16,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user