mirror of
https://github.com/home-assistant/core.git
synced 2025-07-17 02:07:09 +00:00
Use sample bytes in ESPHome media format (#126016)
This commit is contained in:
parent
1caed79895
commit
3dd6418160
@ -402,10 +402,23 @@ class EsphomeAssistSatellite(
|
|||||||
if supported_format.purpose == MediaPlayerFormatPurpose.ANNOUNCEMENT:
|
if supported_format.purpose == MediaPlayerFormatPurpose.ANNOUNCEMENT:
|
||||||
self._attr_tts_options = {
|
self._attr_tts_options = {
|
||||||
tts.ATTR_PREFERRED_FORMAT: supported_format.format,
|
tts.ATTR_PREFERRED_FORMAT: supported_format.format,
|
||||||
tts.ATTR_PREFERRED_SAMPLE_RATE: supported_format.sample_rate,
|
|
||||||
tts.ATTR_PREFERRED_SAMPLE_CHANNELS: supported_format.num_channels,
|
|
||||||
tts.ATTR_PREFERRED_SAMPLE_BYTES: 2,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if supported_format.sample_rate > 0:
|
||||||
|
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_RATE] = (
|
||||||
|
supported_format.sample_rate
|
||||||
|
)
|
||||||
|
|
||||||
|
if supported_format.sample_rate > 0:
|
||||||
|
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_CHANNELS] = (
|
||||||
|
supported_format.num_channels
|
||||||
|
)
|
||||||
|
|
||||||
|
if supported_format.sample_rate > 0:
|
||||||
|
self._attr_tts_options[tts.ATTR_PREFERRED_SAMPLE_BYTES] = (
|
||||||
|
supported_format.sample_bytes
|
||||||
|
)
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
async def _stream_tts_audio(
|
async def _stream_tts_audio(
|
||||||
|
@ -26,11 +26,12 @@ def async_create_proxy_url(
|
|||||||
media_format: str,
|
media_format: str,
|
||||||
rate: int | None = None,
|
rate: int | None = None,
|
||||||
channels: int | None = None,
|
channels: int | None = None,
|
||||||
|
width: int | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Create a one-time use proxy URL that automatically converts the media."""
|
"""Create a one-time use proxy URL that automatically converts the media."""
|
||||||
data: FFmpegProxyData = hass.data[DATA_FFMPEG_PROXY]
|
data: FFmpegProxyData = hass.data[DATA_FFMPEG_PROXY]
|
||||||
return data.async_create_proxy_url(
|
return data.async_create_proxy_url(
|
||||||
device_id, media_url, media_format, rate, channels
|
device_id, media_url, media_format, rate, channels, width
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -50,6 +51,9 @@ class FFmpegConversionInfo:
|
|||||||
channels: int | None
|
channels: int | None
|
||||||
"""Target number of channels (None to keep source channels)."""
|
"""Target number of channels (None to keep source channels)."""
|
||||||
|
|
||||||
|
width: int | None
|
||||||
|
"""Target sample width in bytes (None to keep source width)."""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FFmpegProxyData:
|
class FFmpegProxyData:
|
||||||
@ -70,11 +74,12 @@ class FFmpegProxyData:
|
|||||||
media_format: str,
|
media_format: str,
|
||||||
rate: int | None,
|
rate: int | None,
|
||||||
channels: int | None,
|
channels: int | None,
|
||||||
|
width: int | None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Create a one-time use proxy URL that automatically converts the media."""
|
"""Create a one-time use proxy URL that automatically converts the media."""
|
||||||
convert_id = secrets.token_urlsafe(16)
|
convert_id = secrets.token_urlsafe(16)
|
||||||
self.conversions[device_id][convert_id] = FFmpegConversionInfo(
|
self.conversions[device_id][convert_id] = FFmpegConversionInfo(
|
||||||
media_url, media_format, rate, channels
|
media_url, media_format, rate, channels, width
|
||||||
)
|
)
|
||||||
_LOGGER.debug("Media URL allowed by proxy: %s", media_url)
|
_LOGGER.debug("Media URL allowed by proxy: %s", media_url)
|
||||||
|
|
||||||
@ -136,6 +141,10 @@ class FFmpegConvertResponse(web.StreamResponse):
|
|||||||
# Number of channels
|
# Number of channels
|
||||||
command_args.extend(["-ac", str(self.convert_info.channels)])
|
command_args.extend(["-ac", str(self.convert_info.channels)])
|
||||||
|
|
||||||
|
if self.convert_info.width == 2:
|
||||||
|
# 16-bit samples
|
||||||
|
command_args.extend(["-sample_fmt", "s16"])
|
||||||
|
|
||||||
# Output to stdout
|
# Output to stdout
|
||||||
command_args.append("pipe:")
|
command_args.append("pipe:")
|
||||||
|
|
||||||
|
@ -170,13 +170,28 @@ class EsphomeMediaPlayer(
|
|||||||
_LOGGER.debug("Proxying media url %s with format %s", url, format_to_use)
|
_LOGGER.debug("Proxying media url %s with format %s", url, format_to_use)
|
||||||
device_id = self.device_entry.id
|
device_id = self.device_entry.id
|
||||||
media_format = format_to_use.format
|
media_format = format_to_use.format
|
||||||
|
|
||||||
|
# 0 = None
|
||||||
|
rate: int | None = None
|
||||||
|
channels: int | None = None
|
||||||
|
width: int | None = None
|
||||||
|
if format_to_use.sample_rate > 0:
|
||||||
|
rate = format_to_use.sample_rate
|
||||||
|
|
||||||
|
if format_to_use.num_channels > 0:
|
||||||
|
channels = format_to_use.num_channels
|
||||||
|
|
||||||
|
if format_to_use.sample_bytes > 0:
|
||||||
|
width = format_to_use.sample_bytes
|
||||||
|
|
||||||
proxy_url = async_create_proxy_url(
|
proxy_url = async_create_proxy_url(
|
||||||
self.hass,
|
self.hass,
|
||||||
device_id,
|
device_id,
|
||||||
url,
|
url,
|
||||||
media_format=media_format,
|
media_format=media_format,
|
||||||
rate=format_to_use.sample_rate,
|
rate=rate,
|
||||||
channels=format_to_use.num_channels,
|
channels=channels,
|
||||||
|
width=width,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Resolve URL
|
# Resolve URL
|
||||||
|
@ -1006,6 +1006,7 @@ async def test_tts_format_from_media_player(
|
|||||||
sample_rate=48000,
|
sample_rate=48000,
|
||||||
num_channels=2,
|
num_channels=2,
|
||||||
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||||
|
sample_bytes=2,
|
||||||
),
|
),
|
||||||
# This is the format that should be used for tts
|
# This is the format that should be used for tts
|
||||||
MediaPlayerSupportedFormat(
|
MediaPlayerSupportedFormat(
|
||||||
@ -1013,6 +1014,7 @@ async def test_tts_format_from_media_player(
|
|||||||
sample_rate=22050,
|
sample_rate=22050,
|
||||||
num_channels=1,
|
num_channels=1,
|
||||||
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||||
|
sample_bytes=2,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -1050,6 +1052,73 @@ async def test_tts_format_from_media_player(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def test_tts_minimal_format_from_media_player(
|
||||||
|
hass: HomeAssistant,
|
||||||
|
mock_client: APIClient,
|
||||||
|
mock_esphome_device: Callable[
|
||||||
|
[APIClient, list[EntityInfo], list[UserService], list[EntityState]],
|
||||||
|
Awaitable[MockESPHomeDevice],
|
||||||
|
],
|
||||||
|
) -> None:
|
||||||
|
"""Test text-to-speech format when media player only specifies the codec."""
|
||||||
|
mock_device: MockESPHomeDevice = await mock_esphome_device(
|
||||||
|
mock_client=mock_client,
|
||||||
|
entity_info=[
|
||||||
|
MediaPlayerInfo(
|
||||||
|
object_id="mymedia_player",
|
||||||
|
key=1,
|
||||||
|
name="my media_player",
|
||||||
|
unique_id="my_media_player",
|
||||||
|
supports_pause=True,
|
||||||
|
supported_formats=[
|
||||||
|
MediaPlayerSupportedFormat(
|
||||||
|
format="flac",
|
||||||
|
sample_rate=48000,
|
||||||
|
num_channels=2,
|
||||||
|
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||||
|
sample_bytes=2,
|
||||||
|
),
|
||||||
|
# This is the format that should be used for tts
|
||||||
|
MediaPlayerSupportedFormat(
|
||||||
|
format="mp3",
|
||||||
|
sample_rate=0, # source rate
|
||||||
|
num_channels=0, # source channels
|
||||||
|
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||||
|
sample_bytes=0, # source width
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
user_service=[],
|
||||||
|
states=[],
|
||||||
|
device_info={
|
||||||
|
"voice_assistant_feature_flags": VoiceAssistantFeature.VOICE_ASSISTANT
|
||||||
|
},
|
||||||
|
)
|
||||||
|
await hass.async_block_till_done()
|
||||||
|
|
||||||
|
satellite = get_satellite_entity(hass, mock_device.device_info.mac_address)
|
||||||
|
assert satellite is not None
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"homeassistant.components.assist_satellite.entity.async_pipeline_from_audio_stream",
|
||||||
|
) as mock_pipeline_from_audio_stream:
|
||||||
|
await satellite.handle_pipeline_start(
|
||||||
|
conversation_id="",
|
||||||
|
flags=0,
|
||||||
|
audio_settings=VoiceAssistantAudioSettings(),
|
||||||
|
wake_word_phrase=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_pipeline_from_audio_stream.assert_called_once()
|
||||||
|
kwargs = mock_pipeline_from_audio_stream.call_args_list[0].kwargs
|
||||||
|
|
||||||
|
# Should be ANNOUNCEMENT format from media player
|
||||||
|
assert kwargs.get("tts_audio_output") == {
|
||||||
|
tts.ATTR_PREFERRED_FORMAT: "mp3",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
async def test_announce_supported_features(
|
async def test_announce_supported_features(
|
||||||
hass: HomeAssistant,
|
hass: HomeAssistant,
|
||||||
mock_client: APIClient,
|
mock_client: APIClient,
|
||||||
|
@ -310,15 +310,17 @@ async def test_media_player_proxy(
|
|||||||
supported_formats=[
|
supported_formats=[
|
||||||
MediaPlayerSupportedFormat(
|
MediaPlayerSupportedFormat(
|
||||||
format="flac",
|
format="flac",
|
||||||
sample_rate=48000,
|
sample_rate=0, # source rate
|
||||||
num_channels=2,
|
num_channels=0, # source channels
|
||||||
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
purpose=MediaPlayerFormatPurpose.DEFAULT,
|
||||||
|
sample_bytes=0, # source width
|
||||||
),
|
),
|
||||||
MediaPlayerSupportedFormat(
|
MediaPlayerSupportedFormat(
|
||||||
format="wav",
|
format="wav",
|
||||||
sample_rate=16000,
|
sample_rate=16000,
|
||||||
num_channels=1,
|
num_channels=1,
|
||||||
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
purpose=MediaPlayerFormatPurpose.ANNOUNCEMENT,
|
||||||
|
sample_bytes=2,
|
||||||
),
|
),
|
||||||
MediaPlayerSupportedFormat(
|
MediaPlayerSupportedFormat(
|
||||||
format="mp3",
|
format="mp3",
|
||||||
@ -369,7 +371,13 @@ async def test_media_player_proxy(
|
|||||||
mock_async_create_proxy_url.assert_called_once()
|
mock_async_create_proxy_url.assert_called_once()
|
||||||
device_id = mock_async_create_proxy_url.call_args[0][1]
|
device_id = mock_async_create_proxy_url.call_args[0][1]
|
||||||
mock_async_create_proxy_url.assert_called_once_with(
|
mock_async_create_proxy_url.assert_called_once_with(
|
||||||
hass, device_id, media_url, media_format="flac", rate=48000, channels=2
|
hass,
|
||||||
|
device_id,
|
||||||
|
media_url,
|
||||||
|
media_format="flac",
|
||||||
|
rate=None,
|
||||||
|
channels=None,
|
||||||
|
width=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
media_args = mock_client.media_player_command.call_args.kwargs
|
media_args = mock_client.media_player_command.call_args.kwargs
|
||||||
@ -395,7 +403,13 @@ async def test_media_player_proxy(
|
|||||||
mock_async_create_proxy_url.assert_called_once()
|
mock_async_create_proxy_url.assert_called_once()
|
||||||
device_id = mock_async_create_proxy_url.call_args[0][1]
|
device_id = mock_async_create_proxy_url.call_args[0][1]
|
||||||
mock_async_create_proxy_url.assert_called_once_with(
|
mock_async_create_proxy_url.assert_called_once_with(
|
||||||
hass, device_id, media_url, media_format="wav", rate=16000, channels=1
|
hass,
|
||||||
|
device_id,
|
||||||
|
media_url,
|
||||||
|
media_format="wav",
|
||||||
|
rate=16000,
|
||||||
|
channels=1,
|
||||||
|
width=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
media_args = mock_client.media_player_command.call_args.kwargs
|
media_args = mock_client.media_player_command.call_args.kwargs
|
||||||
|
Loading…
x
Reference in New Issue
Block a user