Add voice settings to ElevenLabs options flow (#123265)

Add voice settings to options flow
2025-07-25 06:07:17 +00:00 · 2024-09-08 13:11:26 +02:00 · 2024-09-08 13:11:26 +02:00 · 8acc027f38
commit 8acc027f38
parent 3139a7e431
6 changed files with 349 additions and 11 deletions
--- a/homeassistant/components/elevenlabs/config_flow.py
+++ b/homeassistant/components/elevenlabs/config_flow.py
@ -23,7 +23,23 @@ from homeassistant.helpers.selector import (
    SelectSelectorConfig,
 )
-from .const import CONF_MODEL, CONF_VOICE, DEFAULT_MODEL, DOMAIN
+from .const import (
    CONF_CONFIGURE_VOICE,
    CONF_MODEL,
    CONF_OPTIMIZE_LATENCY,
    CONF_SIMILARITY,
    CONF_STABILITY,
    CONF_STYLE,
    CONF_USE_SPEAKER_BOOST,
    CONF_VOICE,
    DEFAULT_MODEL,
    DEFAULT_OPTIMIZE_LATENCY,
    DEFAULT_SIMILARITY,
    DEFAULT_STABILITY,
    DEFAULT_STYLE,
    DEFAULT_USE_SPEAKER_BOOST,
    DOMAIN,
 )
 USER_STEP_SCHEMA = vol.Schema({vol.Required(CONF_API_KEY): str})
@ -92,6 +108,8 @@ class ElevenLabsOptionsFlow(OptionsFlowWithConfigEntry):
        # id -> name
        self.voices: dict[str, str] = {}
        self.models: dict[str, str] = {}
        self.model: str | None = None
        self.voice: str | None = None
    async def async_step_init(
        self, user_input: dict[str, Any] | None = None
@ -103,6 +121,11 @@ class ElevenLabsOptionsFlow(OptionsFlowWithConfigEntry):
        assert self.models and self.voices
        if user_input is not None:
            self.model = user_input[CONF_MODEL]
            self.voice = user_input[CONF_VOICE]
            configure_voice = user_input.pop(CONF_CONFIGURE_VOICE)
            if configure_voice:
                return await self.async_step_voice_settings()
            return self.async_create_entry(
                title="ElevenLabs",
                data=user_input,
@ -139,7 +162,69 @@ class ElevenLabsOptionsFlow(OptionsFlowWithConfigEntry):
                            ]
                        )
                    ),
                    vol.Required(CONF_CONFIGURE_VOICE, default=False): bool,
                }
            ),
            self.options,
        )
    async def async_step_voice_settings(
        self, user_input: dict[str, Any] | None = None
    ) -> ConfigFlowResult:
        """Handle voice settings."""
        assert self.voices and self.models
        if user_input is not None:
            user_input[CONF_MODEL] = self.model
            user_input[CONF_VOICE] = self.voice
            return self.async_create_entry(
                title="ElevenLabs",
                data=user_input,
            )
        return self.async_show_form(
            step_id="voice_settings",
            data_schema=self.elevenlabs_config_options_voice_schema(),
        )
    def elevenlabs_config_options_voice_schema(self) -> vol.Schema:
        """Elevenlabs options voice schema."""
        return vol.Schema(
            {
                vol.Optional(
                    CONF_STABILITY,
                    default=self.config_entry.options.get(
                        CONF_STABILITY, DEFAULT_STABILITY
                    ),
                ): vol.All(
                    vol.Coerce(float),
                    vol.Range(min=0, max=1),
                ),
                vol.Optional(
                    CONF_SIMILARITY,
                    default=self.config_entry.options.get(
                        CONF_SIMILARITY, DEFAULT_SIMILARITY
                    ),
                ): vol.All(
                    vol.Coerce(float),
                    vol.Range(min=0, max=1),
                ),
                vol.Optional(
                    CONF_OPTIMIZE_LATENCY,
                    default=self.config_entry.options.get(
                        CONF_OPTIMIZE_LATENCY, DEFAULT_OPTIMIZE_LATENCY
                    ),
                ): vol.All(int, vol.Range(min=0, max=4)),
                vol.Optional(
                    CONF_STYLE,
                    default=self.config_entry.options.get(CONF_STYLE, DEFAULT_STYLE),
                ): vol.All(
                    vol.Coerce(float),
                    vol.Range(min=0, max=1),
                ),
                vol.Optional(
                    CONF_USE_SPEAKER_BOOST,
                    default=self.config_entry.options.get(
                        CONF_USE_SPEAKER_BOOST, DEFAULT_USE_SPEAKER_BOOST
                    ),
                ): bool,
            }
        )
--- a/homeassistant/components/elevenlabs/const.py
+++ b/homeassistant/components/elevenlabs/const.py
@ -2,6 +2,17 @@
 CONF_VOICE = "voice"
 CONF_MODEL = "model"
 CONF_CONFIGURE_VOICE = "configure_voice"
 CONF_STABILITY = "stability"
 CONF_SIMILARITY = "similarity"
 CONF_OPTIMIZE_LATENCY = "optimize_streaming_latency"
 CONF_STYLE = "style"
 CONF_USE_SPEAKER_BOOST = "use_speaker_boost"
 DOMAIN = "elevenlabs"
 DEFAULT_MODEL = "eleven_multilingual_v2"
 DEFAULT_STABILITY = 0.5
 DEFAULT_SIMILARITY = 0.75
 DEFAULT_OPTIMIZE_LATENCY = 0
 DEFAULT_STYLE = 0
 DEFAULT_USE_SPEAKER_BOOST = True
--- a/homeassistant/components/elevenlabs/strings.json
+++ b/homeassistant/components/elevenlabs/strings.json
@ -19,11 +19,29 @@
      "init": {
        "data": {
          "voice": "Voice",
-          "model": "Model"
+          "model": "Model",
          "configure_voice": "Configure advanced voice settings"
        },
        "data_description": {
          "voice": "Voice to use for the TTS.",
-          "model": "ElevenLabs model to use. Please note that not all models support all languages equally well."
+          "model": "ElevenLabs model to use. Please note that not all models support all languages equally well.",
          "configure_voice": "Configure advanced voice settings. Find more information in the ElevenLabs documentation."
        }
      },
      "voice_settings": {
        "data": {
          "stability": "Stability",
          "similarity": "Similarity",
          "optimize_streaming_latency": "Latency",
          "style": "Style",
          "use_speaker_boost": "Speaker boost"
        },
        "data_description": {
          "stability": "Stability of the generated audio. Higher values lead to less emotional audio.",
          "similarity": "Similarity of the generated audio to the original voice. Higher values may result in more similar audio, but may also introduce background noise.",
          "optimize_streaming_latency": "Optimize the model for streaming. This may reduce the quality of the generated audio.",
          "style": "Style of the generated audio. Recommended to keep at 0 for most almost all use cases.",
          "use_speaker_boost": "Use speaker boost to increase the similarity of the generated audio to the original voice."
        }
      }
    }
--- a/homeassistant/components/elevenlabs/tts.py
+++ b/homeassistant/components/elevenlabs/tts.py
@ -3,11 +3,12 @@
 from __future__ import annotations
 import logging
 from types import MappingProxyType
 from typing import Any
 from elevenlabs.client import AsyncElevenLabs
 from elevenlabs.core import ApiError
-from elevenlabs.types import Model, Voice as ElevenLabsVoice
+from elevenlabs.types import Model, Voice as ElevenLabsVoice, VoiceSettings
 from homeassistant.components.tts import (
    ATTR_VOICE,
@ -21,11 +22,36 @@ from homeassistant.helpers.device_registry import DeviceEntryType, DeviceInfo
 from homeassistant.helpers.entity_platform import AddEntitiesCallback
 from . import EleventLabsConfigEntry
-from .const import CONF_VOICE, DOMAIN
+from .const import (
    CONF_OPTIMIZE_LATENCY,
    CONF_SIMILARITY,
    CONF_STABILITY,
    CONF_STYLE,
    CONF_USE_SPEAKER_BOOST,
    CONF_VOICE,
    DEFAULT_OPTIMIZE_LATENCY,
    DEFAULT_SIMILARITY,
    DEFAULT_STABILITY,
    DEFAULT_STYLE,
    DEFAULT_USE_SPEAKER_BOOST,
    DOMAIN,
 )
 _LOGGER = logging.getLogger(__name__)
 def to_voice_settings(options: MappingProxyType[str, Any]) -> VoiceSettings:
    """Return voice settings."""
    return VoiceSettings(
        stability=options.get(CONF_STABILITY, DEFAULT_STABILITY),
        similarity_boost=options.get(CONF_SIMILARITY, DEFAULT_SIMILARITY),
        style=options.get(CONF_STYLE, DEFAULT_STYLE),
        use_speaker_boost=options.get(
            CONF_USE_SPEAKER_BOOST, DEFAULT_USE_SPEAKER_BOOST
        ),
    )
 async def async_setup_entry(
    hass: HomeAssistant,
    config_entry: EleventLabsConfigEntry,
@ -35,6 +61,7 @@ async def async_setup_entry(
    client = config_entry.runtime_data.client
    voices = (await client.voices.get_all()).voices
    default_voice_id = config_entry.options[CONF_VOICE]
    voice_settings = to_voice_settings(config_entry.options)
    async_add_entities(
        [
            ElevenLabsTTSEntity(
@ -44,6 +71,10 @@ async def async_setup_entry(
                default_voice_id,
                config_entry.entry_id,
                config_entry.title,
                voice_settings,
                config_entry.options.get(
                    CONF_OPTIMIZE_LATENCY, DEFAULT_OPTIMIZE_LATENCY
                ),
            )
        ]
    )
@ -62,6 +93,8 @@ class ElevenLabsTTSEntity(TextToSpeechEntity):
        default_voice_id: str,
        entry_id: str,
        title: str,
        voice_settings: VoiceSettings,
        latency: int = 0,
    ) -> None:
        """Init ElevenLabs TTS service."""
        self._client = client
@ -77,6 +110,10 @@ class ElevenLabsTTSEntity(TextToSpeechEntity):
        ]
        if voice_indices:
            self._voices.insert(0, self._voices.pop(voice_indices[0]))
        self._voice_settings = voice_settings
        self._latency = latency
        # Entity attributes
        self._attr_unique_id = entry_id
        self._attr_name = title
        self._attr_device_info = DeviceInfo(
@ -105,6 +142,8 @@ class ElevenLabsTTSEntity(TextToSpeechEntity):
            audio = await self._client.generate(
                text=message,
                voice=voice_id,
                optimize_streaming_latency=self._latency,
                voice_settings=self._voice_settings,
                model=self._model.model_id,
            )
            bytes_combined = b"".join([byte_seg async for byte_seg in audio])
--- a/tests/components/elevenlabs/test_config_flow.py
+++ b/tests/components/elevenlabs/test_config_flow.py
@ -3,9 +3,20 @@
 from unittest.mock import AsyncMock
 from homeassistant.components.elevenlabs.const import (
    CONF_CONFIGURE_VOICE,
    CONF_MODEL,
    CONF_OPTIMIZE_LATENCY,
    CONF_SIMILARITY,
    CONF_STABILITY,
    CONF_STYLE,
    CONF_USE_SPEAKER_BOOST,
    CONF_VOICE,
    DEFAULT_MODEL,
    DEFAULT_OPTIMIZE_LATENCY,
    DEFAULT_SIMILARITY,
    DEFAULT_STABILITY,
    DEFAULT_STYLE,
    DEFAULT_USE_SPEAKER_BOOST,
    DOMAIN,
 )
 from homeassistant.config_entries import SOURCE_USER
@ -89,6 +100,52 @@ async def test_options_flow_init(
    )
    assert result["type"] is FlowResultType.CREATE_ENTRY
-    assert mock_entry.options == {CONF_MODEL: "model1", CONF_VOICE: "voice1"}
+    assert mock_entry.options == {
        CONF_MODEL: "model1",
        CONF_VOICE: "voice1",
    }
    mock_setup_entry.assert_called_once()
 async def test_options_flow_voice_settings_default(
    hass: HomeAssistant,
    mock_setup_entry: AsyncMock,
    mock_async_client: AsyncMock,
    mock_entry: MockConfigEntry,
 ) -> None:
    """Test options flow voice settings."""
    mock_entry.add_to_hass(hass)
    assert await hass.config_entries.async_setup(mock_entry.entry_id)
    await hass.async_block_till_done()
    result = await hass.config_entries.options.async_init(mock_entry.entry_id)
    assert result["type"] is FlowResultType.FORM
    assert result["step_id"] == "init"
    result = await hass.config_entries.options.async_configure(
        result["flow_id"],
        user_input={
            CONF_MODEL: "model1",
            CONF_VOICE: "voice1",
            CONF_CONFIGURE_VOICE: True,
        },
    )
    assert result["type"] is FlowResultType.FORM
    assert result["step_id"] == "voice_settings"
    result = await hass.config_entries.options.async_configure(
        result["flow_id"],
        user_input={},
    )
    assert result["type"] is FlowResultType.CREATE_ENTRY
    assert mock_entry.options == {
        CONF_MODEL: "model1",
        CONF_VOICE: "voice1",
        CONF_OPTIMIZE_LATENCY: DEFAULT_OPTIMIZE_LATENCY,
        CONF_SIMILARITY: DEFAULT_SIMILARITY,
        CONF_STABILITY: DEFAULT_STABILITY,
        CONF_STYLE: DEFAULT_STYLE,
        CONF_USE_SPEAKER_BOOST: DEFAULT_USE_SPEAKER_BOOST,
    }
--- a/tests/components/elevenlabs/test_tts.py
+++ b/tests/components/elevenlabs/test_tts.py
@ -8,11 +8,25 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 from elevenlabs.core import ApiError
-from elevenlabs.types import GetVoicesResponse
+from elevenlabs.types import GetVoicesResponse, VoiceSettings
 import pytest
 from homeassistant.components import tts
-from homeassistant.components.elevenlabs.const import CONF_MODEL, CONF_VOICE, DOMAIN
+from homeassistant.components.elevenlabs.const import (
    CONF_MODEL,
    CONF_OPTIMIZE_LATENCY,
    CONF_SIMILARITY,
    CONF_STABILITY,
    CONF_STYLE,
    CONF_USE_SPEAKER_BOOST,
    CONF_VOICE,
    DEFAULT_OPTIMIZE_LATENCY,
    DEFAULT_SIMILARITY,
    DEFAULT_STABILITY,
    DEFAULT_STYLE,
    DEFAULT_USE_SPEAKER_BOOST,
    DOMAIN,
 )
 from homeassistant.components.media_player import (
    ATTR_MEDIA_CONTENT_ID,
    DOMAIN as DOMAIN_MP,
@ -53,17 +67,32 @@ async def setup_internal_url(hass: HomeAssistant) -> None:
    )
@pytest.fixture
 def mock_similarity():
    """Mock similarity."""
    return DEFAULT_SIMILARITY / 2
@pytest.fixture
 def mock_latency():
    """Mock latency."""
    return (DEFAULT_OPTIMIZE_LATENCY + 1) % 5  # 0, 1, 2, 3, 4
@pytest.fixture(name="setup")
 async def setup_fixture(
    hass: HomeAssistant,
    config_data: dict[str, Any],
    config_options: dict[str, Any],
    config_options_voice: dict[str, Any],
    request: pytest.FixtureRequest,
    mock_async_client: AsyncMock,
 ) -> AsyncMock:
    """Set up the test environment."""
    if request.param == "mock_config_entry_setup":
        await mock_config_entry_setup(hass, config_data, config_options)
    elif request.param == "mock_config_entry_setup_voice":
        await mock_config_entry_setup(hass, config_data, config_options_voice)
    else:
        raise RuntimeError("Invalid setup fixture")
@ -83,6 +112,18 @@ def config_options_fixture() -> dict[str, Any]:
    return {}
@pytest.fixture(name="config_options_voice")
 def config_options_voice_fixture(mock_similarity, mock_latency) -> dict[str, Any]:
    """Return config options."""
    return {
        CONF_OPTIMIZE_LATENCY: mock_latency,
        CONF_SIMILARITY: mock_similarity,
        CONF_STABILITY: DEFAULT_STABILITY,
        CONF_STYLE: DEFAULT_STYLE,
        CONF_USE_SPEAKER_BOOST: DEFAULT_USE_SPEAKER_BOOST,
    }
 async def mock_config_entry_setup(
    hass: HomeAssistant, config_data: dict[str, Any], config_options: dict[str, Any]
 ) -> None:
@ -146,6 +187,12 @@ async def test_tts_service_speak(
    """Test tts service."""
    tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID])
    tts_entity._client.generate.reset_mock()
    assert tts_entity._voice_settings == VoiceSettings(
        stability=DEFAULT_STABILITY,
        similarity_boost=DEFAULT_SIMILARITY,
        style=DEFAULT_STYLE,
        use_speaker_boost=DEFAULT_USE_SPEAKER_BOOST,
    )
    await hass.services.async_call(
        tts.DOMAIN,
@ -161,7 +208,11 @@ async def test_tts_service_speak(
    )
    tts_entity._client.generate.assert_called_once_with(
-        text="There is a person at the front door.", voice="voice2", model="model1"
+        text="There is a person at the front door.",
        voice="voice2",
        model="model1",
        voice_settings=tts_entity._voice_settings,
        optimize_streaming_latency=tts_entity._latency,
    )
@ -219,7 +270,11 @@ async def test_tts_service_speak_lang_config(
    )
    tts_entity._client.generate.assert_called_once_with(
-        text="There is a person at the front door.", voice="voice1", model="model1"
+        text="There is a person at the front door.",
        voice="voice1",
        model="model1",
        voice_settings=tts_entity._voice_settings,
        optimize_streaming_latency=tts_entity._latency,
    )
@ -266,5 +321,78 @@ async def test_tts_service_speak_error(
    )
    tts_entity._client.generate.assert_called_once_with(
-        text="There is a person at the front door.", voice="voice1", model="model1"
+        text="There is a person at the front door.",
        voice="voice1",
        model="model1",
        voice_settings=tts_entity._voice_settings,
        optimize_streaming_latency=tts_entity._latency,
    )
@pytest.mark.parametrize(
    "config_data",
    [
        {},
        {tts.CONF_LANG: "de"},
        {tts.CONF_LANG: "en"},
        {tts.CONF_LANG: "ja"},
        {tts.CONF_LANG: "es"},
    ],
 )
@pytest.mark.parametrize(
    ("setup", "tts_service", "service_data"),
    [
        (
            "mock_config_entry_setup_voice",
            "speak",
            {
                ATTR_ENTITY_ID: "tts.mock_title",
                tts.ATTR_MEDIA_PLAYER_ENTITY_ID: "media_player.something",
                tts.ATTR_MESSAGE: "There is a person at the front door.",
                tts.ATTR_OPTIONS: {tts.ATTR_VOICE: "voice2"},
            },
        ),
    ],
    indirect=["setup"],
 )
 async def test_tts_service_speak_voice_settings(
    setup: AsyncMock,
    hass: HomeAssistant,
    hass_client: ClientSessionGenerator,
    calls: list[ServiceCall],
    tts_service: str,
    service_data: dict[str, Any],
    mock_similarity: float,
    mock_latency: int,
 ) -> None:
    """Test tts service."""
    tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID])
    tts_entity._client.generate.reset_mock()
    assert tts_entity._voice_settings == VoiceSettings(
        stability=DEFAULT_STABILITY,
        similarity_boost=mock_similarity,
        style=DEFAULT_STYLE,
        use_speaker_boost=DEFAULT_USE_SPEAKER_BOOST,
    )
    assert tts_entity._latency == mock_latency
    await hass.services.async_call(
        tts.DOMAIN,
        tts_service,
        service_data,
        blocking=True,
    )
    assert len(calls) == 1
    assert (
        await retrieve_media(hass, hass_client, calls[0].data[ATTR_MEDIA_CONTENT_ID])
        == HTTPStatus.OK
    )
    tts_entity._client.generate.assert_called_once_with(
        text="There is a person at the front door.",
        voice="voice2",
        model="model1",
        voice_settings=tts_entity._voice_settings,
        optimize_streaming_latency=tts_entity._latency,
    )