Skip TTS events entirely with empty text (#105617)

2025-07-22 04:37:06 +00:00 · 2023-12-12 23:21:16 -06:00 · 2023-12-12 23:21:16 -06:00 · a73e86a741
commit a73e86a741
parent 431a44ab67
3 changed files with 59 additions and 40 deletions
--- a/homeassistant/components/assist_pipeline/pipeline.py
+++ b/homeassistant/components/assist_pipeline/pipeline.py
@ -369,6 +369,7 @@ class PipelineStage(StrEnum):
    STT = "stt"
    INTENT = "intent"
    TTS = "tts"
+    END = "end"


 PIPELINE_STAGE_ORDER = [
@ -1024,7 +1025,6 @@ class PipelineRun:
            )
        )

-        if tts_input := tts_input.strip():
        try:
            # Synthesize audio and get URL
            tts_media_id = tts_generate_media_source_id(
@ -1051,8 +1051,6 @@ class PipelineRun:
            "media_id": tts_media_id,
            **asdict(tts_media),
        }
-        else:
-            tts_output = {}

        self.process_event(
            PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
@ -1345,7 +1343,11 @@ class PipelineInput:
                        self.conversation_id,
                        self.device_id,
                    )
+                    if tts_input.strip():
                        current_stage = PipelineStage.TTS
+                    else:
+                        # Skip TTS
+                        current_stage = PipelineStage.END

                if self.run.end_stage != PipelineStage.INTENT:
                    # text-to-speech
--- a/tests/components/assist_pipeline/snapshots/test_websocket.ambr
+++ b/tests/components/assist_pipeline/snapshots/test_websocket.ambr
@ -662,15 +662,33 @@
 # ---
 # name: test_pipeline_empty_tts_output.1
  dict({
-    'engine': 'test',
-    'language': 'en-US',
-    'tts_input': '',
-    'voice': 'james_earl_jones',
+    'conversation_id': None,
+    'device_id': None,
+    'engine': 'homeassistant',
+    'intent_input': 'never mind',
+    'language': 'en',
  })
 # ---
 # name: test_pipeline_empty_tts_output.2
  dict({
-    'tts_output': dict({
+    'intent_output': dict({
+      'conversation_id': None,
+      'response': dict({
+        'card': dict({
+        }),
+        'data': dict({
+          'failed': list([
+          ]),
+          'success': list([
+          ]),
+          'targets': list([
+          ]),
+        }),
+        'language': 'en',
+        'response_type': 'action_done',
+        'speech': dict({
+        }),
+      }),
    }),
  })
 # ---
--- a/tests/components/assist_pipeline/test_websocket.py
+++ b/tests/components/assist_pipeline/test_websocket.py
@ -2467,10 +2467,10 @@ async def test_pipeline_empty_tts_output(
    await client.send_json_auto_id(
        {
            "type": "assist_pipeline/run",
-            "start_stage": "tts",
+            "start_stage": "intent",
            "end_stage": "tts",
            "input": {
-                "text": "",
+                "text": "never mind",
            },
        }
    )
@ -2486,16 +2486,15 @@ async def test_pipeline_empty_tts_output(
    assert msg["event"]["data"] == snapshot
    events.append(msg["event"])

-    # text-to-speech
+    # intent
    msg = await client.receive_json()
-    assert msg["event"]["type"] == "tts-start"
+    assert msg["event"]["type"] == "intent-start"
    assert msg["event"]["data"] == snapshot
    events.append(msg["event"])

    msg = await client.receive_json()
-    assert msg["event"]["type"] == "tts-end"
+    assert msg["event"]["type"] == "intent-end"
    assert msg["event"]["data"] == snapshot
-    assert not msg["event"]["data"]["tts_output"]
    events.append(msg["event"])

    # run end