diff --git a/homeassistant/components/elevenlabs/__init__.py b/homeassistant/components/elevenlabs/__init__.py index e5807fec67c..a930dea43ed 100644 --- a/homeassistant/components/elevenlabs/__init__.py +++ b/homeassistant/components/elevenlabs/__init__.py @@ -25,7 +25,8 @@ PLATFORMS: list[Platform] = [Platform.TTS] async def get_model_by_id(client: AsyncElevenLabs, model_id: str) -> Model | None: """Get ElevenLabs model from their API by the model_id.""" - models = await client.models.get_all() + models = await client.models.list() + for maybe_model in models: if maybe_model.model_id == model_id: return maybe_model diff --git a/homeassistant/components/elevenlabs/config_flow.py b/homeassistant/components/elevenlabs/config_flow.py index 227749bf82c..fc248235834 100644 --- a/homeassistant/components/elevenlabs/config_flow.py +++ b/homeassistant/components/elevenlabs/config_flow.py @@ -23,14 +23,12 @@ from . import ElevenLabsConfigEntry from .const import ( CONF_CONFIGURE_VOICE, CONF_MODEL, - CONF_OPTIMIZE_LATENCY, CONF_SIMILARITY, CONF_STABILITY, CONF_STYLE, CONF_USE_SPEAKER_BOOST, CONF_VOICE, DEFAULT_MODEL, - DEFAULT_OPTIMIZE_LATENCY, DEFAULT_SIMILARITY, DEFAULT_STABILITY, DEFAULT_STYLE, @@ -51,7 +49,8 @@ async def get_voices_models( httpx_client = get_async_client(hass) client = AsyncElevenLabs(api_key=api_key, httpx_client=httpx_client) voices = (await client.voices.get_all()).voices - models = await client.models.get_all() + models = await client.models.list() + voices_dict = { voice.voice_id: voice.name for voice in sorted(voices, key=lambda v: v.name or "") @@ -78,8 +77,13 @@ class ElevenLabsConfigFlow(ConfigFlow, domain=DOMAIN): if user_input is not None: try: voices, _ = await get_voices_models(self.hass, user_input[CONF_API_KEY]) - except ApiError: - errors["base"] = "invalid_api_key" + except ApiError as exc: + errors["base"] = "unknown" + details = getattr(exc, "body", {}).get("detail", {}) + if details: + status = details.get("status") + if status == "invalid_api_key": + errors["base"] = "invalid_api_key" else: return self.async_create_entry( title="ElevenLabs", @@ -206,12 +210,6 @@ class ElevenLabsOptionsFlow(OptionsFlow): vol.Coerce(float), vol.Range(min=0, max=1), ), - vol.Optional( - CONF_OPTIMIZE_LATENCY, - default=self.config_entry.options.get( - CONF_OPTIMIZE_LATENCY, DEFAULT_OPTIMIZE_LATENCY - ), - ): vol.All(int, vol.Range(min=0, max=4)), vol.Optional( CONF_STYLE, default=self.config_entry.options.get(CONF_STYLE, DEFAULT_STYLE), diff --git a/homeassistant/components/elevenlabs/const.py b/homeassistant/components/elevenlabs/const.py index 1de92f95e43..2629e62d2fc 100644 --- a/homeassistant/components/elevenlabs/const.py +++ b/homeassistant/components/elevenlabs/const.py @@ -7,7 +7,6 @@ CONF_MODEL = "model" CONF_CONFIGURE_VOICE = "configure_voice" CONF_STABILITY = "stability" CONF_SIMILARITY = "similarity" -CONF_OPTIMIZE_LATENCY = "optimize_streaming_latency" CONF_STYLE = "style" CONF_USE_SPEAKER_BOOST = "use_speaker_boost" DOMAIN = "elevenlabs" @@ -15,6 +14,5 @@ DOMAIN = "elevenlabs" DEFAULT_MODEL = "eleven_multilingual_v2" DEFAULT_STABILITY = 0.5 DEFAULT_SIMILARITY = 0.75 -DEFAULT_OPTIMIZE_LATENCY = 0 DEFAULT_STYLE = 0 DEFAULT_USE_SPEAKER_BOOST = True diff --git a/homeassistant/components/elevenlabs/manifest.json b/homeassistant/components/elevenlabs/manifest.json index eb6df09149a..f36a2383576 100644 --- a/homeassistant/components/elevenlabs/manifest.json +++ b/homeassistant/components/elevenlabs/manifest.json @@ -7,5 +7,5 @@ "integration_type": "service", "iot_class": "cloud_polling", "loggers": ["elevenlabs"], - "requirements": ["elevenlabs==1.9.0"] + "requirements": ["elevenlabs==2.3.0"] } diff --git a/homeassistant/components/elevenlabs/strings.json b/homeassistant/components/elevenlabs/strings.json index 8b0205a9e9a..eb497f1a7a6 100644 --- a/homeassistant/components/elevenlabs/strings.json +++ b/homeassistant/components/elevenlabs/strings.json @@ -11,7 +11,8 @@ } }, "error": { - "invalid_api_key": "[%key:common::config_flow::error::invalid_api_key%]" + "invalid_api_key": "[%key:common::config_flow::error::invalid_api_key%]", + "unknown": "[%key:common::config_flow::error::unknown%]" } }, "options": { @@ -32,14 +33,12 @@ "data": { "stability": "Stability", "similarity": "Similarity", - "optimize_streaming_latency": "Latency", "style": "Style", "use_speaker_boost": "Speaker boost" }, "data_description": { "stability": "Stability of the generated audio. Higher values lead to less emotional audio.", "similarity": "Similarity of the generated audio to the original voice. Higher values may result in more similar audio, but may also introduce background noise.", - "optimize_streaming_latency": "Optimize the model for streaming. This may reduce the quality of the generated audio.", "style": "Style of the generated audio. Recommended to keep at 0 for most almost all use cases.", "use_speaker_boost": "Use speaker boost to increase the similarity of the generated audio to the original voice." } diff --git a/homeassistant/components/elevenlabs/tts.py b/homeassistant/components/elevenlabs/tts.py index 61850837075..fc1a950d4b9 100644 --- a/homeassistant/components/elevenlabs/tts.py +++ b/homeassistant/components/elevenlabs/tts.py @@ -25,13 +25,11 @@ from homeassistant.helpers.entity_platform import AddConfigEntryEntitiesCallback from . import ElevenLabsConfigEntry from .const import ( ATTR_MODEL, - CONF_OPTIMIZE_LATENCY, CONF_SIMILARITY, CONF_STABILITY, CONF_STYLE, CONF_USE_SPEAKER_BOOST, CONF_VOICE, - DEFAULT_OPTIMIZE_LATENCY, DEFAULT_SIMILARITY, DEFAULT_STABILITY, DEFAULT_STYLE, @@ -75,9 +73,6 @@ async def async_setup_entry( config_entry.entry_id, config_entry.title, voice_settings, - config_entry.options.get( - CONF_OPTIMIZE_LATENCY, DEFAULT_OPTIMIZE_LATENCY - ), ) ] ) @@ -98,7 +93,6 @@ class ElevenLabsTTSEntity(TextToSpeechEntity): entry_id: str, title: str, voice_settings: VoiceSettings, - latency: int = 0, ) -> None: """Init ElevenLabs TTS service.""" self._client = client @@ -115,7 +109,6 @@ class ElevenLabsTTSEntity(TextToSpeechEntity): if voice_indices: self._voices.insert(0, self._voices.pop(voice_indices[0])) self._voice_settings = voice_settings - self._latency = latency # Entity attributes self._attr_unique_id = entry_id @@ -144,14 +137,14 @@ class ElevenLabsTTSEntity(TextToSpeechEntity): voice_id = options.get(ATTR_VOICE, self._default_voice_id) model = options.get(ATTR_MODEL, self._model.model_id) try: - audio = await self._client.generate( + audio = self._client.text_to_speech.convert( text=message, - voice=voice_id, - optimize_streaming_latency=self._latency, + voice_id=voice_id, voice_settings=self._voice_settings, - model=model, + model_id=model, ) bytes_combined = b"".join([byte_seg async for byte_seg in audio]) + except ApiError as exc: _LOGGER.warning( "Error during processing of TTS request %s", exc, exc_info=True diff --git a/requirements_all.txt b/requirements_all.txt index 10abfedaad0..140932f5f52 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -845,7 +845,7 @@ eheimdigital==1.3.0 electrickiwi-api==0.9.14 # homeassistant.components.elevenlabs -elevenlabs==1.9.0 +elevenlabs==2.3.0 # homeassistant.components.elgato elgato==5.1.2 diff --git a/requirements_test_all.txt b/requirements_test_all.txt index f7d07254799..da9d5047723 100644 --- a/requirements_test_all.txt +++ b/requirements_test_all.txt @@ -736,7 +736,7 @@ eheimdigital==1.3.0 electrickiwi-api==0.9.14 # homeassistant.components.elevenlabs -elevenlabs==1.9.0 +elevenlabs==2.3.0 # homeassistant.components.elgato elgato==5.1.2 diff --git a/tests/components/elevenlabs/conftest.py b/tests/components/elevenlabs/conftest.py index 1c261e2947a..c47017b88e9 100644 --- a/tests/components/elevenlabs/conftest.py +++ b/tests/components/elevenlabs/conftest.py @@ -28,7 +28,8 @@ def mock_setup_entry() -> Generator[AsyncMock]: def _client_mock(): client_mock = AsyncMock() client_mock.voices.get_all.return_value = GetVoicesResponse(voices=MOCK_VOICES) - client_mock.models.get_all.return_value = MOCK_MODELS + client_mock.models.list.return_value = MOCK_MODELS + return client_mock @@ -44,6 +45,10 @@ def mock_async_client() -> Generator[AsyncMock]: "homeassistant.components.elevenlabs.config_flow.AsyncElevenLabs", new=mock_async_client, ), + patch( + "homeassistant.components.elevenlabs.tts.AsyncElevenLabs", + new=mock_async_client, + ), ): yield mock_async_client @@ -52,8 +57,12 @@ def mock_async_client() -> Generator[AsyncMock]: def mock_async_client_api_error() -> Generator[AsyncMock]: """Override async ElevenLabs client with ApiError side effect.""" client_mock = _client_mock() - client_mock.models.get_all.side_effect = ApiError - client_mock.voices.get_all.side_effect = ApiError + api_error = ApiError() + api_error.body = { + "detail": {"status": "invalid_api_key", "message": "API key is invalid"} + } + client_mock.models.list.side_effect = api_error + client_mock.voices.get_all.side_effect = api_error with ( patch( @@ -68,11 +77,51 @@ def mock_async_client_api_error() -> Generator[AsyncMock]: yield mock_async_client +@pytest.fixture +def mock_async_client_voices_error() -> Generator[AsyncMock]: + """Override async ElevenLabs client with ApiError side effect.""" + client_mock = _client_mock() + api_error = ApiError() + api_error.body = { + "detail": { + "status": "voices_unauthorized", + "message": "API is unauthorized for voices", + } + } + client_mock.voices.get_all.side_effect = api_error + + with patch( + "homeassistant.components.elevenlabs.config_flow.AsyncElevenLabs", + return_value=client_mock, + ) as mock_async_client: + yield mock_async_client + + +@pytest.fixture +def mock_async_client_models_error() -> Generator[AsyncMock]: + """Override async ElevenLabs client with ApiError side effect.""" + client_mock = _client_mock() + api_error = ApiError() + api_error.body = { + "detail": { + "status": "models_unauthorized", + "message": "API is unauthorized for models", + } + } + client_mock.models.list.side_effect = api_error + + with patch( + "homeassistant.components.elevenlabs.config_flow.AsyncElevenLabs", + return_value=client_mock, + ) as mock_async_client: + yield mock_async_client + + @pytest.fixture def mock_async_client_connect_error() -> Generator[AsyncMock]: """Override async ElevenLabs client.""" client_mock = _client_mock() - client_mock.models.get_all.side_effect = ConnectError("Unknown") + client_mock.models.list.side_effect = ConnectError("Unknown") client_mock.voices.get_all.side_effect = ConnectError("Unknown") with ( patch( diff --git a/tests/components/elevenlabs/test_config_flow.py b/tests/components/elevenlabs/test_config_flow.py index 7eeb0a6eb46..eccd5d49d92 100644 --- a/tests/components/elevenlabs/test_config_flow.py +++ b/tests/components/elevenlabs/test_config_flow.py @@ -7,14 +7,12 @@ import pytest from homeassistant.components.elevenlabs.const import ( CONF_CONFIGURE_VOICE, CONF_MODEL, - CONF_OPTIMIZE_LATENCY, CONF_SIMILARITY, CONF_STABILITY, CONF_STYLE, CONF_USE_SPEAKER_BOOST, CONF_VOICE, DEFAULT_MODEL, - DEFAULT_OPTIMIZE_LATENCY, DEFAULT_SIMILARITY, DEFAULT_STABILITY, DEFAULT_STYLE, @@ -101,6 +99,94 @@ async def test_invalid_api_key( mock_setup_entry.assert_called_once() +async def test_voices_error( + hass: HomeAssistant, + mock_setup_entry: AsyncMock, + mock_async_client_voices_error: AsyncMock, + request: pytest.FixtureRequest, +) -> None: + """Test user step with invalid api key.""" + + result = await hass.config_entries.flow.async_init( + DOMAIN, context={"source": SOURCE_USER} + ) + assert result["type"] is FlowResultType.FORM + assert not result["errors"] + + result = await hass.config_entries.flow.async_configure( + result["flow_id"], + { + CONF_API_KEY: "api_key", + }, + ) + assert result["type"] is FlowResultType.FORM + assert result["errors"] == {"base": "unknown"} + + mock_setup_entry.assert_not_called() + + # Use a working client + request.getfixturevalue("mock_async_client") + + result = await hass.config_entries.flow.async_configure( + result["flow_id"], + { + CONF_API_KEY: "api_key", + }, + ) + assert result["type"] is FlowResultType.CREATE_ENTRY + assert result["title"] == "ElevenLabs" + assert result["data"] == { + "api_key": "api_key", + } + assert result["options"] == {CONF_MODEL: DEFAULT_MODEL, CONF_VOICE: "voice1"} + + mock_setup_entry.assert_called_once() + + +async def test_models_error( + hass: HomeAssistant, + mock_setup_entry: AsyncMock, + mock_async_client_models_error: AsyncMock, + request: pytest.FixtureRequest, +) -> None: + """Test user step with invalid api key.""" + + result = await hass.config_entries.flow.async_init( + DOMAIN, context={"source": SOURCE_USER} + ) + assert result["type"] is FlowResultType.FORM + assert not result["errors"] + + result = await hass.config_entries.flow.async_configure( + result["flow_id"], + { + CONF_API_KEY: "api_key", + }, + ) + assert result["type"] is FlowResultType.FORM + assert result["errors"] == {"base": "unknown"} + + mock_setup_entry.assert_not_called() + + # Use a working client + request.getfixturevalue("mock_async_client") + + result = await hass.config_entries.flow.async_configure( + result["flow_id"], + { + CONF_API_KEY: "api_key", + }, + ) + assert result["type"] is FlowResultType.CREATE_ENTRY + assert result["title"] == "ElevenLabs" + assert result["data"] == { + "api_key": "api_key", + } + assert result["options"] == {CONF_MODEL: DEFAULT_MODEL, CONF_VOICE: "voice1"} + + mock_setup_entry.assert_called_once() + + async def test_options_flow_init( hass: HomeAssistant, mock_setup_entry: AsyncMock, @@ -166,7 +252,6 @@ async def test_options_flow_voice_settings_default( assert mock_entry.options == { CONF_MODEL: "model1", CONF_VOICE: "voice1", - CONF_OPTIMIZE_LATENCY: DEFAULT_OPTIMIZE_LATENCY, CONF_SIMILARITY: DEFAULT_SIMILARITY, CONF_STABILITY: DEFAULT_STABILITY, CONF_STYLE: DEFAULT_STYLE, diff --git a/tests/components/elevenlabs/test_tts.py b/tests/components/elevenlabs/test_tts.py index a63672cc85d..f25a03f2824 100644 --- a/tests/components/elevenlabs/test_tts.py +++ b/tests/components/elevenlabs/test_tts.py @@ -15,13 +15,11 @@ from homeassistant.components import tts from homeassistant.components.elevenlabs.const import ( ATTR_MODEL, CONF_MODEL, - CONF_OPTIMIZE_LATENCY, CONF_SIMILARITY, CONF_STABILITY, CONF_STYLE, CONF_USE_SPEAKER_BOOST, CONF_VOICE, - DEFAULT_OPTIMIZE_LATENCY, DEFAULT_SIMILARITY, DEFAULT_STABILITY, DEFAULT_STYLE, @@ -44,6 +42,19 @@ from tests.components.tts.common import retrieve_media from tests.typing import ClientSessionGenerator +class FakeAudioGenerator: + """Mock audio generator for ElevenLabs TTS.""" + + def __aiter__(self): + """Mock async iterator for audio parts.""" + + async def _gen(): + yield b"audio-part-1" + yield b"audio-part-2" + + return _gen() + + @pytest.fixture(autouse=True) def tts_mutagen_mock_fixture_autouse(tts_mutagen_mock: MagicMock) -> None: """Mock writing tags.""" @@ -74,12 +85,6 @@ def mock_similarity(): return DEFAULT_SIMILARITY / 2 -@pytest.fixture -def mock_latency(): - """Mock latency.""" - return (DEFAULT_OPTIMIZE_LATENCY + 1) % 5 # 0, 1, 2, 3, 4 - - @pytest.fixture(name="setup") async def setup_fixture( hass: HomeAssistant, @@ -98,6 +103,7 @@ async def setup_fixture( raise RuntimeError("Invalid setup fixture") await hass.async_block_till_done() + return mock_async_client @@ -114,10 +120,9 @@ def config_options_fixture() -> dict[str, Any]: @pytest.fixture(name="config_options_voice") -def config_options_voice_fixture(mock_similarity, mock_latency) -> dict[str, Any]: +def config_options_voice_fixture(mock_similarity) -> dict[str, Any]: """Return config options.""" return { - CONF_OPTIMIZE_LATENCY: mock_latency, CONF_SIMILARITY: mock_similarity, CONF_STABILITY: DEFAULT_STABILITY, CONF_STYLE: DEFAULT_STYLE, @@ -144,7 +149,7 @@ async def mock_config_entry_setup( config_entry.add_to_hass(hass) client_mock = AsyncMock() client_mock.voices.get_all.return_value = GetVoicesResponse(voices=MOCK_VOICES) - client_mock.models.get_all.return_value = MOCK_MODELS + client_mock.models.list.return_value = MOCK_MODELS with patch( "homeassistant.components.elevenlabs.AsyncElevenLabs", return_value=client_mock ): @@ -217,7 +222,10 @@ async def test_tts_service_speak( ) -> None: """Test tts service.""" tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID]) - tts_entity._client.generate.reset_mock() + tts_entity._client.text_to_speech.convert = MagicMock( + return_value=FakeAudioGenerator() + ) + assert tts_entity._voice_settings == VoiceSettings( stability=DEFAULT_STABILITY, similarity_boost=DEFAULT_SIMILARITY, @@ -240,12 +248,11 @@ async def test_tts_service_speak( voice_id = service_data[tts.ATTR_OPTIONS].get(tts.ATTR_VOICE, "voice1") model_id = service_data[tts.ATTR_OPTIONS].get(ATTR_MODEL, "model1") - tts_entity._client.generate.assert_called_once_with( + tts_entity._client.text_to_speech.convert.assert_called_once_with( text="There is a person at the front door.", - voice=voice_id, - model=model_id, + voice_id=voice_id, + model_id=model_id, voice_settings=tts_entity._voice_settings, - optimize_streaming_latency=tts_entity._latency, ) @@ -287,7 +294,9 @@ async def test_tts_service_speak_lang_config( ) -> None: """Test service call say with other langcodes in the config.""" tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID]) - tts_entity._client.generate.reset_mock() + tts_entity._client.text_to_speech.convert = MagicMock( + return_value=FakeAudioGenerator() + ) await hass.services.async_call( tts.DOMAIN, @@ -302,12 +311,11 @@ async def test_tts_service_speak_lang_config( == HTTPStatus.OK ) - tts_entity._client.generate.assert_called_once_with( + tts_entity._client.text_to_speech.convert.assert_called_once_with( text="There is a person at the front door.", - voice="voice1", - model="model1", + voice_id="voice1", + model_id="model1", voice_settings=tts_entity._voice_settings, - optimize_streaming_latency=tts_entity._latency, ) @@ -337,8 +345,10 @@ async def test_tts_service_speak_error( ) -> None: """Test service call say with http response 400.""" tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID]) - tts_entity._client.generate.reset_mock() - tts_entity._client.generate.side_effect = ApiError + tts_entity._client.text_to_speech.convert = MagicMock( + return_value=FakeAudioGenerator() + ) + tts_entity._client.text_to_speech.convert.side_effect = ApiError await hass.services.async_call( tts.DOMAIN, @@ -353,12 +363,11 @@ async def test_tts_service_speak_error( == HTTPStatus.INTERNAL_SERVER_ERROR ) - tts_entity._client.generate.assert_called_once_with( + tts_entity._client.text_to_speech.convert.assert_called_once_with( text="There is a person at the front door.", - voice="voice1", - model="model1", + voice_id="voice1", + model_id="model1", voice_settings=tts_entity._voice_settings, - optimize_streaming_latency=tts_entity._latency, ) @@ -396,18 +405,18 @@ async def test_tts_service_speak_voice_settings( tts_service: str, service_data: dict[str, Any], mock_similarity: float, - mock_latency: int, ) -> None: """Test tts service.""" tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID]) - tts_entity._client.generate.reset_mock() + tts_entity._client.text_to_speech.convert = MagicMock( + return_value=FakeAudioGenerator() + ) assert tts_entity._voice_settings == VoiceSettings( stability=DEFAULT_STABILITY, similarity_boost=mock_similarity, style=DEFAULT_STYLE, use_speaker_boost=DEFAULT_USE_SPEAKER_BOOST, ) - assert tts_entity._latency == mock_latency await hass.services.async_call( tts.DOMAIN, @@ -422,12 +431,11 @@ async def test_tts_service_speak_voice_settings( == HTTPStatus.OK ) - tts_entity._client.generate.assert_called_once_with( + tts_entity._client.text_to_speech.convert.assert_called_once_with( text="There is a person at the front door.", - voice="voice2", - model="model1", + voice_id="voice2", + model_id="model1", voice_settings=tts_entity._voice_settings, - optimize_streaming_latency=tts_entity._latency, ) @@ -457,7 +465,9 @@ async def test_tts_service_speak_without_options( ) -> None: """Test service call say with http response 200.""" tts_entity = hass.data[tts.DOMAIN].get_entity(service_data[ATTR_ENTITY_ID]) - tts_entity._client.generate.reset_mock() + tts_entity._client.text_to_speech.convert = MagicMock( + return_value=FakeAudioGenerator() + ) await hass.services.async_call( tts.DOMAIN, @@ -472,12 +482,11 @@ async def test_tts_service_speak_without_options( == HTTPStatus.OK ) - tts_entity._client.generate.assert_called_once_with( + tts_entity._client.text_to_speech.convert.assert_called_once_with( text="There is a person at the front door.", - voice="voice1", - optimize_streaming_latency=0, + voice_id="voice1", voice_settings=VoiceSettings( stability=0.5, similarity_boost=0.75, style=0.0, use_speaker_boost=True ), - model="model1", + model_id="model1", )