From 663f66a2b27fc557284c7cab575c5049406668e9 Mon Sep 17 00:00:00 2001 From: Dara Adib Date: Thu, 25 May 2023 04:46:34 -0400 Subject: [PATCH] Scrape Microsoft TTS supported languages (#91988) * Update Microsoft TTS supported languages `script.microsoft_tts` scrapes Microsoft Azure documentation for the list of supported languages and saves them to `homeassistant.generated.microsoft_tts` for use in the component. This adds support for more TTS languages, like fa-ir (Persian). * Improve xpath query for Microsoft TTS languages * Remove asserts for Microsoft TTS languages * Add more tests for Microsoft TTS languages --- homeassistant/components/microsoft/tts.py | 75 +---------- homeassistant/generated/microsoft_tts.py | 154 ++++++++++++++++++++++ script/microsoft_tts.py | 25 ++++ tests/components/microsoft/test_tts.py | 87 +++++++++++- 4 files changed, 265 insertions(+), 76 deletions(-) create mode 100644 homeassistant/generated/microsoft_tts.py create mode 100644 script/microsoft_tts.py diff --git a/homeassistant/components/microsoft/tts.py b/homeassistant/components/microsoft/tts.py index ff35fcb561f..38f46d886de 100644 --- a/homeassistant/components/microsoft/tts.py +++ b/homeassistant/components/microsoft/tts.py @@ -7,6 +7,7 @@ import voluptuous as vol from homeassistant.components.tts import CONF_LANG, PLATFORM_SCHEMA, Provider from homeassistant.const import CONF_API_KEY, CONF_REGION, CONF_TYPE, PERCENTAGE +from homeassistant.generated.microsoft_tts import SUPPORTED_LANGUAGES import homeassistant.helpers.config_validation as cv CONF_GENDER = "gender" @@ -17,80 +18,6 @@ CONF_PITCH = "pitch" CONF_CONTOUR = "contour" _LOGGER = logging.getLogger(__name__) -SUPPORTED_LANGUAGES = [ - "ar-eg", - "ar-sa", - "bg-bg", - "ca-es", - "cs-cz", - "cy-gb", - "da-dk", - "de-at", - "de-ch", - "de-de", - "el-gr", - "en-au", - "en-ca", - "en-gb", - "en-hk", - "en-ie", - "en-in", - "en-nz", - "en-ph", - "en-sg", - "en-us", - "en-za", - "es-ar", - "es-co", - "es-es", - "es-mx", - "es-us", - "et-ee", - "fi-fi", - "fr-be", - "fr-ca", - "fr-ch", - "fr-fr", - "ga-ie", - "gu-in", - "he-il", - "hi-in", - "hr-hr", - "hu-hu", - "id-id", - "is-is", - "it-it", - "ja-jp", - "ko-kr", - "lt-lt", - "lv-lv", - "mr-in", - "ms-my", - "mt-mt", - "nb-no", - "nl-be", - "nl-nl", - "pl-pl", - "pt-br", - "pt-pt", - "ro-ro", - "ru-ru", - "sk-sk", - "sl-si", - "sv-se", - "sw-ke", - "ta-in", - "te-in", - "th-th", - "tr-tr", - "uk-ua", - "ur-pk", - "vi-vn", - "zh-cn", - "zh-hk", - "zh-tw", -] - GENDERS = ["Female", "Male"] DEFAULT_LANG = "en-us" diff --git a/homeassistant/generated/microsoft_tts.py b/homeassistant/generated/microsoft_tts.py new file mode 100644 index 00000000000..2b894e834ee --- /dev/null +++ b/homeassistant/generated/microsoft_tts.py @@ -0,0 +1,154 @@ +"""Automatically generated file. + +To update, run python3 -m script.microsoft_tts +""" + +SUPPORTED_LANGUAGES = { + "af-za", + "am-et", + "ar-ae", + "ar-bh", + "ar-dz", + "ar-eg", + "ar-iq", + "ar-jo", + "ar-kw", + "ar-lb", + "ar-ly", + "ar-ma", + "ar-om", + "ar-qa", + "ar-sa", + "ar-sy", + "ar-tn", + "ar-ye", + "az-az", + "bg-bg", + "bn-bd", + "bn-in", + "bs-ba", + "ca-es", + "cs-cz", + "cy-gb", + "da-dk", + "de-at", + "de-ch", + "de-de", + "el-gr", + "en-au", + "en-ca", + "en-gb", + "en-hk", + "en-ie", + "en-in", + "en-ke", + "en-ng", + "en-nz", + "en-ph", + "en-sg", + "en-tz", + "en-us", + "en-za", + "es-ar", + "es-bo", + "es-cl", + "es-co", + "es-cr", + "es-cu", + "es-do", + "es-ec", + "es-es", + "es-gq", + "es-gt", + "es-hn", + "es-mx", + "es-ni", + "es-pa", + "es-pe", + "es-pr", + "es-py", + "es-sv", + "es-us", + "es-uy", + "es-ve", + "et-ee", + "eu-es", + "fa-ir", + "fi-fi", + "fil-ph", + "fr-be", + "fr-ca", + "fr-ch", + "fr-fr", + "ga-ie", + "gl-es", + "gu-in", + "he-il", + "hi-in", + "hr-hr", + "hu-hu", + "hy-am", + "id-id", + "is-is", + "it-it", + "ja-jp", + "jv-id", + "ka-ge", + "kk-kz", + "km-kh", + "kn-in", + "ko-kr", + "lo-la", + "lt-lt", + "lv-lv", + "mk-mk", + "ml-in", + "mn-mn", + "mr-in", + "ms-my", + "mt-mt", + "my-mm", + "nb-no", + "ne-np", + "nl-be", + "nl-nl", + "pl-pl", + "ps-af", + "pt-br", + "pt-pt", + "ro-ro", + "ru-ru", + "si-lk", + "sk-sk", + "sl-si", + "so-so", + "sq-al", + "sr-rs", + "su-id", + "sv-se", + "sw-ke", + "sw-tz", + "ta-in", + "ta-lk", + "ta-my", + "ta-sg", + "te-in", + "th-th", + "tr-tr", + "uk-ua", + "ur-in", + "ur-pk", + "uz-uz", + "vi-vn", + "wuu-cn", + "yue-cn", + "zh-cn", + "zh-cn-henan", + "zh-cn-liaoning", + "zh-cn-shaanxi", + "zh-cn-shandong", + "zh-cn-sichuan", + "zh-hk", + "zh-tw", + "zu-za", +} diff --git a/script/microsoft_tts.py b/script/microsoft_tts.py new file mode 100644 index 00000000000..128c287345c --- /dev/null +++ b/script/microsoft_tts.py @@ -0,0 +1,25 @@ +"""Helper script to update supported languages for Microsoft Text-to-Speech (TTS).""" +from pathlib import Path + +from lxml import html +import requests + +from .hassfest.serializer import format_python_namespace + +URL = "https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support" +XPATH_QUERY = "//section[@data-tab='tts']/table[1]/tbody/tr/td[1]/code/text()" + +req = requests.get(URL) +req.raise_for_status() +tree = html.fromstring(req.content) +supported_languages_raw = tree.xpath(XPATH_QUERY) +supported_languages = {s.lower() for s in supported_languages_raw} + +Path("homeassistant/generated/microsoft_tts.py").write_text( + format_python_namespace( + { + "SUPPORTED_LANGUAGES": supported_languages, + }, + generator="script.microsoft_tts", + ) +) diff --git a/tests/components/microsoft/test_tts.py b/tests/components/microsoft/test_tts.py index 2bdc32832a8..f01eff55690 100644 --- a/tests/components/microsoft/test_tts.py +++ b/tests/components/microsoft/test_tts.py @@ -173,9 +173,92 @@ async def test_service_say_en_gb_service(hass: HomeAssistant, mock_tts, calls) - } +async def test_service_say_fa_ir_config(hass: HomeAssistant, mock_tts, calls) -> None: + """Test service call say with fa-ir code in the config.""" + + await async_setup_component( + hass, + tts.DOMAIN, + { + tts.DOMAIN: { + "platform": "microsoft", + "api_key": "", + "language": "fa-ir", + "type": "DilaraNeural", + } + }, + ) + + await hass.services.async_call( + tts.DOMAIN, + "microsoft_say", + { + "entity_id": "media_player.something", + tts.ATTR_MESSAGE: "There is a person at the front door.", + }, + blocking=True, + ) + + assert len(calls) == 1 + await get_media_source_url(hass, calls[0].data[ATTR_MEDIA_CONTENT_ID]) + assert len(mock_tts.mock_calls) == 2 + assert mock_tts.mock_calls[1][2] == { + "language": "fa-ir", + "gender": "Female", + "voiceType": "DilaraNeural", + "output": "audio-24khz-96kbitrate-mono-mp3", + "rate": "0%", + "volume": "0%", + "pitch": "default", + "contour": "", + "text": "There is a person at the front door.", + } + + +async def test_service_say_fa_ir_service(hass: HomeAssistant, mock_tts, calls) -> None: + """Test service call say with fa-ir code in the service.""" + + config = { + tts.DOMAIN: { + "platform": "microsoft", + "api_key": "", + "service_name": "microsoft_say", + } + } + + await async_setup_component(hass, tts.DOMAIN, config) + + await hass.services.async_call( + tts.DOMAIN, + "microsoft_say", + { + "entity_id": "media_player.something", + tts.ATTR_MESSAGE: "There is a person at the front door.", + tts.ATTR_LANGUAGE: "fa-ir", + tts.ATTR_OPTIONS: {"type": "DilaraNeural"}, + }, + blocking=True, + ) + + assert len(calls) == 1 + await get_media_source_url(hass, calls[0].data[ATTR_MEDIA_CONTENT_ID]) + assert len(mock_tts.mock_calls) == 2 + assert mock_tts.mock_calls[1][2] == { + "language": "fa-ir", + "gender": "Female", + "voiceType": "DilaraNeural", + "output": "audio-24khz-96kbitrate-mono-mp3", + "rate": "0%", + "volume": "0%", + "pitch": "default", + "contour": "", + "text": "There is a person at the front door.", + } + + def test_supported_languages() -> None: """Test list of supported languages.""" - for lang in ["en-us", "en-gb"]: + for lang in ["en-us", "fa-ir", "en-gb"]: assert lang in SUPPORTED_LANGUAGES assert "en-US" not in SUPPORTED_LANGUAGES for lang in [ @@ -187,7 +270,7 @@ def test_supported_languages() -> None: "en-us-jennyneural", ]: assert lang not in {s.lower() for s in SUPPORTED_LANGUAGES} - assert len(SUPPORTED_LANGUAGES) > 70 + assert len(SUPPORTED_LANGUAGES) > 100 async def test_invalid_language(hass: HomeAssistant, mock_tts, calls) -> None: