diff --git a/homeassistant/components/voip/const.py b/homeassistant/components/voip/const.py index 0e5bd2f2275..8288297d8ef 100644 --- a/homeassistant/components/voip/const.py +++ b/homeassistant/components/voip/const.py @@ -1,3 +1,13 @@ """Constants for the Voice over IP integration.""" DOMAIN = "voip" + +RATE = 16000 +WIDTH = 2 +CHANNELS = 1 +RTP_AUDIO_SETTINGS = { + "rate": RATE, + "width": WIDTH, + "channels": CHANNELS, + "sleep_ratio": 0.99, +} diff --git a/homeassistant/components/voip/problem.pcm b/homeassistant/components/voip/problem.pcm new file mode 100644 index 00000000000..887376687f7 Binary files /dev/null and b/homeassistant/components/voip/problem.pcm differ diff --git a/homeassistant/components/voip/voip.py b/homeassistant/components/voip/voip.py index 0d66facec7b..eb4a008a168 100644 --- a/homeassistant/components/voip/voip.py +++ b/homeassistant/components/voip/voip.py @@ -18,6 +18,7 @@ from homeassistant.components.assist_pipeline import ( Pipeline, PipelineEvent, PipelineEventType, + async_get_pipeline, async_pipeline_from_audio_stream, select as pipeline_select, ) @@ -26,19 +27,42 @@ from homeassistant.const import __version__ from homeassistant.core import Context, HomeAssistant from homeassistant.util.ulid import ulid -from .const import DOMAIN +from .const import CHANNELS, DOMAIN, RATE, RTP_AUDIO_SETTINGS, WIDTH if TYPE_CHECKING: from .devices import VoIPDevice, VoIPDevices -_BUFFERED_CHUNKS_BEFORE_SPEECH = 100 # ~2 seconds -_TONE_DELAY = 0.2 # seconds before playing tone -_MESSAGE_DELAY = 1.0 # seconds before playing "not configured" message -_LOOP_DELAY = 2.0 # seconds before replaying not-configured message -_RTP_AUDIO_SETTINGS = {"rate": 16000, "width": 2, "channels": 1, "sleep_ratio": 1.01} _LOGGER = logging.getLogger(__name__) +def make_protocol( + hass: HomeAssistant, devices: VoIPDevices, call_info: CallInfo +) -> VoipDatagramProtocol: + """Plays a pre-recorded message if pipeline is misconfigured.""" + voip_device = devices.async_get_or_create(call_info) + pipeline_id = pipeline_select.get_chosen_pipeline( + hass, + DOMAIN, + voip_device.voip_id, + ) + pipeline = async_get_pipeline(hass, pipeline_id) + if ( + (pipeline is None) + or (pipeline.stt_engine is None) + or (pipeline.tts_engine is None) + ): + # Play pre-recorded message instead of failing + return PreRecordMessageProtocol(hass, "problem.pcm") + + # Pipeline is properly configured + return PipelineRtpDatagramProtocol( + hass, + hass.config.language, + voip_device, + Context(user_id=devices.config_entry.data["user"]), + ) + + class HassVoipDatagramProtocol(VoipDatagramProtocol): """HA UDP server for Voice over IP (VoIP).""" @@ -51,14 +75,11 @@ class HassVoipDatagramProtocol(VoipDatagramProtocol): session_name="voip_hass", version=__version__, ), - valid_protocol_factory=lambda call_info: PipelineRtpDatagramProtocol( - hass, - hass.config.language, - devices.async_get_or_create(call_info), - Context(user_id=devices.config_entry.data["user"]), + valid_protocol_factory=lambda call_info: make_protocol( + hass, devices, call_info ), - invalid_protocol_factory=lambda call_info: NotConfiguredRtpDatagramProtocol( - hass, + invalid_protocol_factory=lambda call_info: PreRecordMessageProtocol( + hass, "not_configured.pcm" ), ) self.hass = hass @@ -81,12 +102,13 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): context: Context, pipeline_timeout: float = 30.0, audio_timeout: float = 2.0, + buffered_chunks_before_speech: int = 100, listening_tone_enabled: bool = True, processing_tone_enabled: bool = True, + tone_delay: float = 0.2, ) -> None: """Set up pipeline RTP server.""" - # STT expects 16Khz mono with 16-bit samples - super().__init__(rate=16000, width=2, channels=1) + super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS) self.hass = hass self.language = language @@ -94,8 +116,10 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): self.pipeline: Pipeline | None = None self.pipeline_timeout = pipeline_timeout self.audio_timeout = audio_timeout + self.buffered_chunks_before_speech = buffered_chunks_before_speech self.listening_tone_enabled = listening_tone_enabled self.processing_tone_enabled = processing_tone_enabled + self.tone_delay = tone_delay self._audio_queue: asyncio.Queue[bytes] = asyncio.Queue() self._context = context @@ -142,7 +166,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): # Wait for speech before starting pipeline segmenter = VoiceCommandSegmenter() chunk_buffer: deque[bytes] = deque( - maxlen=_BUFFERED_CHUNKS_BEFORE_SPEECH, + maxlen=self.buffered_chunks_before_speech, ) speech_detected = await self._wait_for_speech( segmenter, @@ -294,7 +318,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): # Assume TTS audio is 16Khz 16-bit mono await self.hass.async_add_executor_job( - partial(self.send_audio, audio_bytes, **_RTP_AUDIO_SETTINGS) + partial(self.send_audio, audio_bytes, **RTP_AUDIO_SETTINGS) ) finally: # Signal pipeline to restart @@ -313,8 +337,8 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): partial( self.send_audio, self._tone_bytes, - silence_before=_TONE_DELAY, - **_RTP_AUDIO_SETTINGS, + silence_before=self.tone_delay, + **RTP_AUDIO_SETTINGS, ) ) @@ -331,7 +355,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): partial( self.send_audio, self._processing_bytes, - **_RTP_AUDIO_SETTINGS, + **RTP_AUDIO_SETTINGS, ) ) @@ -340,13 +364,22 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol): return (Path(__file__).parent / file_name).read_bytes() -class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol): - """Plays audio on a loop to inform the user to configure the phone in Home Assistant.""" +class PreRecordMessageProtocol(RtpDatagramProtocol): + """Plays a pre-recorded message on a loop.""" - def __init__(self, hass: HomeAssistant) -> None: + def __init__( + self, + hass: HomeAssistant, + file_name: str, + message_delay: float = 1.0, + loop_delay: float = 2.0, + ) -> None: """Set up RTP server.""" - super().__init__(rate=16000, width=2, channels=1) + super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS) self.hass = hass + self.file_name = file_name + self.message_delay = message_delay + self.loop_delay = loop_delay self._audio_task: asyncio.Task | None = None self._audio_bytes: bytes | None = None @@ -357,9 +390,8 @@ class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol): if self._audio_bytes is None: # 16Khz, 16-bit mono audio message - self._audio_bytes = ( - Path(__file__).parent / "not_configured.pcm" - ).read_bytes() + file_path = Path(__file__).parent / self.file_name + self._audio_bytes = file_path.read_bytes() if self._audio_task is None: self._audio_task = self.hass.async_create_background_task( @@ -372,12 +404,12 @@ class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol): partial( self.send_audio, self._audio_bytes, - silence_before=_MESSAGE_DELAY, - **_RTP_AUDIO_SETTINGS, + silence_before=self.message_delay, + **RTP_AUDIO_SETTINGS, ) ) - await asyncio.sleep(_LOOP_DELAY) + await asyncio.sleep(self.loop_delay) # Allow message to play again self._audio_task = None