From aa78962a9a3cf0f4d8d787c44a5aec7084975977 Mon Sep 17 00:00:00 2001
From: Michael Hansen <mike@rhasspy.org>
Date: Wed, 3 May 2023 12:43:14 -0500
Subject: [PATCH] Pass OPUS payload ID through VoIP (#92421)

---
 homeassistant/components/voip/voip.py | 63 +++++++++++++++------------
 tests/components/voip/test_voip.py    | 23 +++++++++-
 2 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/homeassistant/components/voip/voip.py b/homeassistant/components/voip/voip.py
index ddf40f5918e..8b96941e00a 100644
--- a/homeassistant/components/voip/voip.py
+++ b/homeassistant/components/voip/voip.py
@@ -52,7 +52,11 @@ def make_protocol(
         or (pipeline.tts_engine is None)
     ):
         # Play pre-recorded message instead of failing
-        return PreRecordMessageProtocol(hass, "problem.pcm")
+        return PreRecordMessageProtocol(
+            hass,
+            "problem.pcm",
+            opus_payload_type=call_info.opus_payload_type,
+        )
 
     # Pipeline is properly configured
     return PipelineRtpDatagramProtocol(
@@ -60,6 +64,7 @@ def make_protocol(
         hass.config.language,
         voip_device,
         Context(user_id=devices.config_entry.data["user"]),
+        opus_payload_type=call_info.opus_payload_type,
     )
 
 
@@ -79,7 +84,9 @@ class HassVoipDatagramProtocol(VoipDatagramProtocol):
                 hass, devices, call_info
             ),
             invalid_protocol_factory=lambda call_info: PreRecordMessageProtocol(
-                hass, "not_configured.pcm"
+                hass,
+                "not_configured.pcm",
+                opus_payload_type=call_info.opus_payload_type,
             ),
         )
         self.hass = hass
@@ -109,6 +116,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
         language: str,
         voip_device: VoIPDevice,
         context: Context,
+        opus_payload_type: int,
         pipeline_timeout: float = 30.0,
         audio_timeout: float = 2.0,
         buffered_chunks_before_speech: int = 100,
@@ -119,7 +127,12 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
         tts_extra_timeout: float = 1.0,
     ) -> None:
         """Set up pipeline RTP server."""
-        super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS)
+        super().__init__(
+            rate=RATE,
+            width=WIDTH,
+            channels=CHANNELS,
+            opus_payload_type=opus_payload_type,
+        )
 
         self.hass = hass
         self.language = language
@@ -350,9 +363,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
 
             async with async_timeout.timeout(tts_seconds + self.tts_extra_timeout):
                 # Assume TTS audio is 16Khz 16-bit mono
-                await self.hass.async_add_executor_job(
-                    partial(self.send_audio, audio_bytes, **RTP_AUDIO_SETTINGS)
-                )
+                await self._async_send_audio(audio_bytes)
         except asyncio.TimeoutError as err:
             _LOGGER.warning("TTS timeout")
             raise err
@@ -360,6 +371,12 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
             # Signal pipeline to restart
             self._tts_done.set()
 
+    async def _async_send_audio(self, audio_bytes: bytes, **kwargs):
+        """Send audio in executor."""
+        await self.hass.async_add_executor_job(
+            partial(self.send_audio, audio_bytes, **RTP_AUDIO_SETTINGS, **kwargs)
+        )
+
     async def _play_listening_tone(self) -> None:
         """Play a tone to indicate that Home Assistant is listening."""
         if self._tone_bytes is None:
@@ -369,13 +386,9 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
                 "tone.pcm",
             )
 
-        await self.hass.async_add_executor_job(
-            partial(
-                self.send_audio,
-                self._tone_bytes,
-                silence_before=self.tone_delay,
-                **RTP_AUDIO_SETTINGS,
-            )
+        await self._async_send_audio(
+            self._tone_bytes,
+            silence_before=self.tone_delay,
         )
 
     async def _play_processing_tone(self) -> None:
@@ -387,13 +400,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
                 "processing.pcm",
             )
 
-        await self.hass.async_add_executor_job(
-            partial(
-                self.send_audio,
-                self._processing_bytes,
-                **RTP_AUDIO_SETTINGS,
-            )
-        )
+        await self._async_send_audio(self._processing_bytes)
 
     async def _play_error_tone(self) -> None:
         """Play a tone to indicate a pipeline error occurred."""
@@ -404,13 +411,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
                 "error.pcm",
             )
 
-        await self.hass.async_add_executor_job(
-            partial(
-                self.send_audio,
-                self._error_bytes,
-                **RTP_AUDIO_SETTINGS,
-            )
-        )
+        await self._async_send_audio(self._error_bytes)
 
     def _load_pcm(self, file_name: str) -> bytes:
         """Load raw audio (16Khz, 16-bit mono)."""
@@ -424,11 +425,17 @@ class PreRecordMessageProtocol(RtpDatagramProtocol):
         self,
         hass: HomeAssistant,
         file_name: str,
+        opus_payload_type: int,
         message_delay: float = 1.0,
         loop_delay: float = 2.0,
     ) -> None:
         """Set up RTP server."""
-        super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS)
+        super().__init__(
+            rate=RATE,
+            width=WIDTH,
+            channels=CHANNELS,
+            opus_payload_type=opus_payload_type,
+        )
         self.hass = hass
         self.file_name = file_name
         self.message_delay = message_delay
diff --git a/tests/components/voip/test_voip.py b/tests/components/voip/test_voip.py
index aec9122fae1..bd9a3587a9a 100644
--- a/tests/components/voip/test_voip.py
+++ b/tests/components/voip/test_voip.py
@@ -4,6 +4,7 @@ import time
 from unittest.mock import AsyncMock, Mock, patch
 
 import async_timeout
+import pytest
 
 from homeassistant.components import assist_pipeline, voip
 from homeassistant.components.voip.devices import VoIPDevice
@@ -88,6 +89,7 @@ async def test_pipeline(
             hass.config.language,
             voip_device,
             Context(),
+            opus_payload_type=123,
             listening_tone_enabled=False,
             processing_tone_enabled=False,
             error_tone_enabled=False,
@@ -138,6 +140,7 @@ async def test_pipeline_timeout(hass: HomeAssistant, voip_device: VoIPDevice) ->
             hass.config.language,
             voip_device,
             Context(),
+            opus_payload_type=123,
             pipeline_timeout=0.001,
             listening_tone_enabled=False,
             processing_tone_enabled=False,
@@ -178,6 +181,7 @@ async def test_stt_stream_timeout(hass: HomeAssistant, voip_device: VoIPDevice)
             hass.config.language,
             voip_device,
             Context(),
+            opus_payload_type=123,
             audio_timeout=0.001,
             listening_tone_enabled=False,
             processing_tone_enabled=False,
@@ -247,6 +251,14 @@ async def test_tts_timeout(
         # Block here to force a timeout in _send_tts
         time.sleep(2)
 
+    async def async_send_audio(audio_bytes, **kwargs):
+        if audio_bytes == tone_bytes:
+            # Not TTS
+            return
+
+        # Block here to force a timeout in _send_tts
+        await asyncio.sleep(2)
+
     async def async_get_media_source_audio(
         hass: HomeAssistant,
         media_source_id: str,
@@ -269,6 +281,8 @@ async def test_tts_timeout(
             hass.config.language,
             voip_device,
             Context(),
+            opus_payload_type=123,
+            tts_extra_timeout=0.001,
             listening_tone_enabled=True,
             processing_tone_enabled=True,
             error_tone_enabled=True,
@@ -277,13 +291,18 @@ async def test_tts_timeout(
         rtp_protocol._processing_bytes = tone_bytes
         rtp_protocol._error_bytes = tone_bytes
         rtp_protocol.transport = Mock()
-        rtp_protocol.send_audio = Mock(side_effect=send_audio)
+        rtp_protocol.send_audio = Mock()
+
+        original_send_tts = rtp_protocol._send_tts
 
         async def send_tts(*args, **kwargs):
             # Call original then end test successfully
-            rtp_protocol._send_tts(*args, **kwargs)
+            with pytest.raises(asyncio.TimeoutError):
+                await original_send_tts(*args, **kwargs)
+
             done.set()
 
+        rtp_protocol._async_send_audio = AsyncMock(side_effect=async_send_audio)
         rtp_protocol._send_tts = AsyncMock(side_effect=send_tts)
 
         # silence