mirror of
https://github.com/home-assistant/core.git
synced 2025-07-16 09:47:13 +00:00
Use sample bytes in ESPHome media format (#126016)
This commit is contained in:
parent
1caed79895
commit
3dd6418160
@ -402,10 +402,23 @@ class EsphomeAssistSatellite(
|
||||
if supported_format.purpose == MediaPlayerFormatPurpose.ANNOUNCEMENT:
|
||||
self._attr_tts_options = {
|
||||
tts.ATTR_PREFERRED_FORMAT: supported_format.format,
|
||||
tts.ATTR_PREFERRED_SAMPLE_RATE: supported_format.sample_rate,
|
||||
tts.ATTR_PREFERRED_SAMPLE_CHANNELS: supported_format.num_channels,
|
||||
tts.ATTR_PREFERRED_SAMPLE_BYTES: 2,
|
||||
}
|
||||
|
||||
if supported_format.sample_rate > 0:
|
||||
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_RATE] = (
|
||||
supported_format.sample_rate
|
||||
)
|
||||
|
||||
if supported_format.sample_rate > 0:
|
||||
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_CHANNELS] = (
|
||||
supported_format.num_channels
|
||||
)
|
||||
|
||||
if supported_format.sample_rate > 0:
|
||||
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_BYTES] = (
|
||||
supported_format.sample_bytes
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
async def _stream_tts_audio(
|
||||
|
@ -26,11 +26,12 @@ def async_create_proxy_url(
|
||||
media_format: str,
|
||||
rate: int | None = None,
|
||||
channels: int | None = None,
|
||||
width: int | None = None,
|
||||
) -> str:
|
||||
"""Create a one-time use proxy URL that automatically converts the media."""
|
||||
data: FFmpegProxyData = hass.data[DATA_FFMPEG_PROXY]
|
||||
return data.async_create_proxy_url(
|
||||
device_id, media_url, media_format, rate, channels
|
||||
device_id, media_url, media_format, rate, channels, width
|
||||
)
|
||||
|
||||
|
||||
@ -50,6 +51,9 @@ class FFmpegConversionInfo:
|
||||
channels: int | None
|
||||
"""Target number of channels (None to keep source channels)."""
|
||||
|
||||
width: int | None
|
||||
"""Target sample width in bytes (None to keep source width)."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class FFmpegProxyData:
|
||||
@ -70,11 +74,12 @@ class FFmpegProxyData:
|
||||
media_format: str,
|
||||
rate: int | None,
|
||||
channels: int | None,
|
||||
width: int | None,
|
||||
) -> str:
|
||||
"""Create a one-time use proxy URL that automatically converts the media."""
|
||||
convert_id = secrets.token_urlsafe(16)
|
||||
self.conversions[device_id][convert_id] = FFmpegConversionInfo(
|
||||
media_url, media_format, rate, channels
|
||||
media_url, media_format, rate, channels, width
|
||||
)
|
||||
_LOGGER.debug("Media URL allowed by proxy: %s", media_url)
|
||||
|
||||
@ -136,6 +141,10 @@ class FFmpegConvertResponse(web.StreamResponse):
|
||||
# Number of channels
|
||||
command_args.extend(["-ac", str(self.convert_info.channels)])
|
||||
|
||||
if self.convert_info.width == 2:
|
||||
# 16-bit samples
|
||||
command_args.extend(["-sample_fmt", "s16"])
|
||||
|
||||
# Output to stdout
|
||||
command_args.append("pipe:")
|
||||
|
||||
|
@ -170,13 +170,28 @@ class EsphomeMediaPlayer(
|
||||
_LOGGER.debug("Proxying media url %s with format %s", url, format_to_use)
|
||||
device_id = self.device_entry.id
|
||||
media_format = format_to_use.format
|
||||
|
||||
# 0 = None
|
||||
rate: int | None = None
|
||||
channels: int | None = None
|
||||
width: int | None = None
|
||||
if format_to_use.sample_rate > 0:
|
||||
rate = format_to_use.sample_rate
|
||||
|
||||
if format_to_use.num_channels > 0:
|
||||
channels = format_to_use.num_channels
|
||||
|
||||
if format_to_use.sample_bytes > 0:
|
||||
width = format_to_use.sample_bytes
|
||||
|
||||
proxy_url = async_create_proxy_url(
|
||||
self.hass,
|
||||
device_id,
|
||||
url,
|
||||
media_format=media_format,
|
||||
rate=format_to_use.sample_rate,
|
||||
channels=format_to_use.num_channels,
|
||||
rate=rate,
|
||||
channels=channels,
|
||||
width=width,
|
||||
)
|
||||
|
||||
# Resolve URL
|
||||
|
@ -1006,6 +1006,7 @@ async def test_tts_format_from_media_player(
|
||||
sample_rate=48000,
|
||||
num_channels=2,
|
||||
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||
sample_bytes=2,
|
||||
),
|
||||
# This is the format that should be used for tts
|
||||
MediaPlayerSupportedFormat(
|
||||
@ -1013,6 +1014,7 @@ async def test_tts_format_from_media_player(
|
||||
sample_rate=22050,
|
||||
num_channels=1,
|
||||
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||
sample_bytes=2,
|
||||
),
|
||||
],
|
||||
)
|
||||
@ -1050,6 +1052,73 @@ async def test_tts_format_from_media_player(
|
||||
}
|
||||
|
||||
|
||||
async def test_tts_minimal_format_from_media_player(
|
||||
hass: HomeAssistant,
|
||||
mock_client: APIClient,
|
||||
mock_esphome_device: Callable[
|
||||
[APIClient, list[EntityInfo], list[UserService], list[EntityState]],
|
||||
Awaitable[MockESPHomeDevice],
|
||||
],
|
||||
) -> None:
|
||||
"""Test text-to-speech format when media player only specifies the codec."""
|
||||
mock_device: MockESPHomeDevice = await mock_esphome_device(
|
||||
mock_client=mock_client,
|
||||
entity_info=[
|
||||
MediaPlayerInfo(
|
||||
object_id="mymedia_player",
|
||||
key=1,
|
||||
name="my media_player",
|
||||
unique_id="my_media_player",
|
||||
supports_pause=True,
|
||||
supported_formats=[
|
||||
MediaPlayerSupportedFormat(
|
||||
format="flac",
|
||||
sample_rate=48000,
|
||||
num_channels=2,
|
||||
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||
sample_bytes=2,
|
||||
),
|
||||
# This is the format that should be used for tts
|
||||
MediaPlayerSupportedFormat(
|
||||
format="mp3",
|
||||
sample_rate=0, # source rate
|
||||
num_channels=0, # source channels
|
||||
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||
sample_bytes=0, # source width
|
||||
),
|
||||
],
|
||||
)
|
||||
],
|
||||
user_service=[],
|
||||
states=[],
|
||||
device_info={
|
||||
"voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
|
||||
},
|
||||
)
|
||||
await hass.async_block_till_done()
|
||||
|
||||
satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
|
||||
assert satellite is not None
|
||||
|
||||
with patch(
|
||||
"homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
|
||||
) as mock_pipeline_from_audio_stream:
|
||||
await satellite.handle_pipeline_start(
|
||||
conversation_id="",
|
||||
flags=0,
|
||||
audio_settings=VoiceAssistantAudioSettings(),
|
||||
wake_word_phrase=None,
|
||||
)
|
||||
|
||||
mock_pipeline_from_audio_stream.assert_called_once()
|
||||
kwargs = mock_pipeline_from_audio_stream.call_args_list[0].kwargs
|
||||
|
||||
# Should be ANNOUNCEMENT format from media player
|
||||
assert kwargs.get("tts_audio_output") == {
|
||||
tts.ATTR_PREFERRED_FORMAT: "mp3",
|
||||
}
|
||||
|
||||
|
||||
async def test_announce_supported_features(
|
||||
hass: HomeAssistant,
|
||||
mock_client: APIClient,
|
||||
|
@ -310,15 +310,17 @@ async def test_media_player_proxy(
|
||||
supported_formats=[
|
||||
MediaPlayerSupportedFormat(
|
||||
format="flac",
|
||||
sample_rate=48000,
|
||||
num_channels=2,
|
||||
sample_rate=0, # source rate
|
||||
num_channels=0, # source channels
|
||||
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||
sample_bytes=0, # source width
|
||||
),
|
||||
MediaPlayerSupportedFormat(
|
||||
format="wav",
|
||||
sample_rate=16000,
|
||||
num_channels=1,
|
||||
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||
sample_bytes=2,
|
||||
),
|
||||
MediaPlayerSupportedFormat(
|
||||
format="mp3",
|
||||
@ -369,7 +371,13 @@ async def test_media_player_proxy(
|
||||
mock_async_create_proxy_url.assert_called_once()
|
||||
device_id = mock_async_create_proxy_url.call_args[0][1]
|
||||
mock_async_create_proxy_url.assert_called_once_with(
|
||||
hass, device_id, media_url, media_format="flac", rate=48000, channels=2
|
||||
hass,
|
||||
device_id,
|
||||
media_url,
|
||||
media_format="flac",
|
||||
rate=None,
|
||||
channels=None,
|
||||
width=None,
|
||||
)
|
||||
|
||||
media_args = mock_client.media_player_command.call_args.kwargs
|
||||
@ -395,7 +403,13 @@ async def test_media_player_proxy(
|
||||
mock_async_create_proxy_url.assert_called_once()
|
||||
device_id = mock_async_create_proxy_url.call_args[0][1]
|
||||
mock_async_create_proxy_url.assert_called_once_with(
|
||||
hass, device_id, media_url, media_format="wav", rate=16000, channels=1
|
||||
hass,
|
||||
device_id,
|
||||
media_url,
|
||||
media_format="wav",
|
||||
rate=16000,
|
||||
channels=1,
|
||||
width=2,
|
||||
)
|
||||
|
||||
media_args = mock_client.media_player_command.call_args.kwargs
|
||||
|
Loading…
x
Reference in New Issue
Block a user