Use sample bytes in ESPHome media format (#126016)

This commit is contained in:
Michael Hansen 2024-09-16 03:10:07 -05:00 committed by GitHub
parent 1caed79895
commit 3dd6418160
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 131 additions and 11 deletions

View File

@ -402,10 +402,23 @@ class EsphomeAssistSatellite(
if supported_format.purpose == MediaPlayerFormatPurpose.ANNOUNCEMENT:
self._attr_tts_options = {
tts.ATTR_PREFERRED_FORMAT: supported_format.format,
tts.ATTR_PREFERRED_SAMPLE_RATE: supported_format.sample_rate,
tts.ATTR_PREFERRED_SAMPLE_CHANNELS: supported_format.num_channels,
tts.ATTR_PREFERRED_SAMPLE_BYTES: 2,
}
if supported_format.sample_rate > 0:
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_RATE] = (
supported_format.sample_rate
)
if supported_format.sample_rate > 0:
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_CHANNELS] = (
supported_format.num_channels
)
if supported_format.sample_rate > 0:
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_BYTES] = (
supported_format.sample_bytes
)
break
async def _stream_tts_audio(

View File

@ -26,11 +26,12 @@ def async_create_proxy_url(
media_format: str,
rate: int | None = None,
channels: int | None = None,
width: int | None = None,
) -> str:
"""Create a one-time use proxy URL that automatically converts the media."""
data: FFmpegProxyData = hass.data[DATA_FFMPEG_PROXY]
return data.async_create_proxy_url(
device_id, media_url, media_format, rate, channels
device_id, media_url, media_format, rate, channels, width
)
@ -50,6 +51,9 @@ class FFmpegConversionInfo:
channels: int | None
"""Target number of channels (None to keep source channels)."""
width: int | None
"""Target sample width in bytes (None to keep source width)."""
@dataclass
class FFmpegProxyData:
@ -70,11 +74,12 @@ class FFmpegProxyData:
media_format: str,
rate: int | None,
channels: int | None,
width: int | None,
) -> str:
"""Create a one-time use proxy URL that automatically converts the media."""
convert_id = secrets.token_urlsafe(16)
self.conversions[device_id][convert_id] = FFmpegConversionInfo(
media_url, media_format, rate, channels
media_url, media_format, rate, channels, width
)
_LOGGER.debug("Media URL allowed by proxy: %s", media_url)
@ -136,6 +141,10 @@ class FFmpegConvertResponse(web.StreamResponse):
# Number of channels
command_args.extend(["-ac", str(self.convert_info.channels)])
if self.convert_info.width == 2:
# 16-bit samples
command_args.extend(["-sample_fmt", "s16"])
# Output to stdout
command_args.append("pipe:")

View File

@ -170,13 +170,28 @@ class EsphomeMediaPlayer(
_LOGGER.debug("Proxying media url %s with format %s", url, format_to_use)
device_id = self.device_entry.id
media_format = format_to_use.format
# 0 = None
rate: int | None = None
channels: int | None = None
width: int | None = None
if format_to_use.sample_rate > 0:
rate = format_to_use.sample_rate
if format_to_use.num_channels > 0:
channels = format_to_use.num_channels
if format_to_use.sample_bytes > 0:
width = format_to_use.sample_bytes
proxy_url = async_create_proxy_url(
self.hass,
device_id,
url,
media_format=media_format,
rate=format_to_use.sample_rate,
channels=format_to_use.num_channels,
rate=rate,
channels=channels,
width=width,
)
# Resolve URL

View File

@ -1006,6 +1006,7 @@ async def test_tts_format_from_media_player(
sample_rate=48000,
num_channels=2,
purpose=MediaPlayerFormatPurpose.DEFAULT,
sample_bytes=2,
),
# This is the format that should be used for tts
MediaPlayerSupportedFormat(
@ -1013,6 +1014,7 @@ async def test_tts_format_from_media_player(
sample_rate=22050,
num_channels=1,
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
sample_bytes=2,
),
],
)
@ -1050,6 +1052,73 @@ async def test_tts_format_from_media_player(
}
async def test_tts_minimal_format_from_media_player(
hass: HomeAssistant,
mock_client: APIClient,
mock_esphome_device: Callable[
[APIClient, list[EntityInfo], list[UserService], list[EntityState]],
Awaitable[MockESPHomeDevice],
],
) -> None:
"""Test text-to-speech format when media player only specifies the codec."""
mock_device: MockESPHomeDevice = await mock_esphome_device(
mock_client=mock_client,
entity_info=[
MediaPlayerInfo(
object_id="mymedia_player",
key=1,
name="my media_player",
unique_id="my_media_player",
supports_pause=True,
supported_formats=[
MediaPlayerSupportedFormat(
format="flac",
sample_rate=48000,
num_channels=2,
purpose=MediaPlayerFormatPurpose.DEFAULT,
sample_bytes=2,
),
# This is the format that should be used for tts
MediaPlayerSupportedFormat(
format="mp3",
sample_rate=0, # source rate
num_channels=0, # source channels
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
sample_bytes=0, # source width
),
],
)
],
user_service=[],
states=[],
device_info={
"voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
},
)
await hass.async_block_till_done()
satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
assert satellite is not None
with patch(
"homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
) as mock_pipeline_from_audio_stream:
await satellite.handle_pipeline_start(
conversation_id="",
flags=0,
audio_settings=VoiceAssistantAudioSettings(),
wake_word_phrase=None,
)
mock_pipeline_from_audio_stream.assert_called_once()
kwargs = mock_pipeline_from_audio_stream.call_args_list[0].kwargs
# Should be ANNOUNCEMENT format from media player
assert kwargs.get("tts_audio_output") == {
tts.ATTR_PREFERRED_FORMAT: "mp3",
}
async def test_announce_supported_features(
hass: HomeAssistant,
mock_client: APIClient,

View File

@ -310,15 +310,17 @@ async def test_media_player_proxy(
supported_formats=[
MediaPlayerSupportedFormat(
format="flac",
sample_rate=48000,
num_channels=2,
sample_rate=0, # source rate
num_channels=0, # source channels
purpose=MediaPlayerFormatPurpose.DEFAULT,
sample_bytes=0, # source width
),
MediaPlayerSupportedFormat(
format="wav",
sample_rate=16000,
num_channels=1,
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
sample_bytes=2,
),
MediaPlayerSupportedFormat(
format="mp3",
@ -369,7 +371,13 @@ async def test_media_player_proxy(
mock_async_create_proxy_url.assert_called_once()
device_id = mock_async_create_proxy_url.call_args[0][1]
mock_async_create_proxy_url.assert_called_once_with(
hass, device_id, media_url, media_format="flac", rate=48000, channels=2
hass,
device_id,
media_url,
media_format="flac",
rate=None,
channels=None,
width=None,
)
media_args = mock_client.media_player_command.call_args.kwargs
@ -395,7 +403,13 @@ async def test_media_player_proxy(
mock_async_create_proxy_url.assert_called_once()
device_id = mock_async_create_proxy_url.call_args[0][1]
mock_async_create_proxy_url.assert_called_once_with(
hass, device_id, media_url, media_format="wav", rate=16000, channels=1
hass,
device_id,
media_url,
media_format="wav",
rate=16000,
channels=1,
width=2,
)
media_args = mock_client.media_player_command.call_args.kwargs