Add additional support over NC (#28527)

* Add voice support over NC

* Add disocery support for TTS / STT

* fix cloud TTS discovery

* Fix dev config

* Fix discovery

* Bump hass-nabucasa 0.25

* Add channel support

* Fix lint

* Update homeassistant/components/cloud/__init__.py

Co-Authored-By: Paulus Schoutsen <balloob@gmail.com>

* Update homeassistant/components/cloud/tts.py

Co-Authored-By: Paulus Schoutsen <balloob@gmail.com>

* Update homeassistant/components/cloud/tts.py

Co-Authored-By: Paulus Schoutsen <balloob@gmail.com>

* Update homeassistant/components/cloud/tts.py

Co-Authored-By: Paulus Schoutsen <balloob@gmail.com>

* bump hass-nabucasa

* Update tts.py

* fix lint
This commit is contained in:
Pascal Vizeli 2019-11-05 22:39:15 +01:00 committed by GitHub
parent 925e26b061
commit 005a1b2713
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 282 additions and 38 deletions

View File

@ -145,7 +145,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up Amazon Polly speech component."""
output_format = config.get(CONF_OUTPUT_FORMAT)
sample_rate = config.get(CONF_SAMPLE_RATE, DEFAULT_SAMPLE_RATES[output_format])

View File

@ -52,7 +52,7 @@ _OPTIONS = {
SUPPORTED_OPTIONS = [CONF_PERSON, CONF_PITCH, CONF_SPEED, CONF_VOLUME]
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up Baidu TTS component."""
return BaiduTTSProvider(hass, config)

View File

@ -23,6 +23,7 @@ from homeassistant.util.aiohttp import MockRequest
from . import account_link, http_api
from .client import CloudClient
from .const import (
CONF_ACCOUNT_LINK_URL,
CONF_ACME_DIRECTORY_SERVER,
CONF_ALEXA,
CONF_ALEXA_ACCESS_TOKEN_URL,
@ -38,7 +39,7 @@ from .const import (
CONF_REMOTE_API_URL,
CONF_SUBSCRIPTION_INFO_URL,
CONF_USER_POOL_ID,
CONF_ACCOUNT_LINK_URL,
CONF_VOICE_API_URL,
DOMAIN,
MODE_DEV,
MODE_PROD,
@ -103,6 +104,7 @@ CONFIG_SCHEMA = vol.Schema(
vol.Optional(CONF_ALEXA_ACCESS_TOKEN_URL): vol.Url(),
vol.Optional(CONF_GOOGLE_ACTIONS_REPORT_STATE_URL): vol.Url(),
vol.Optional(CONF_ACCOUNT_LINK_URL): vol.Url(),
vol.Optional(CONF_VOICE_API_URL): vol.Url(),
}
)
},
@ -230,21 +232,28 @@ async def async_setup(hass, config):
DOMAIN, SERVICE_REMOTE_DISCONNECT, _service_handler
)
loaded_binary_sensor = False
loaded = False
async def _on_connect():
"""Discover RemoteUI binary sensor."""
nonlocal loaded_binary_sensor
nonlocal loaded
if loaded_binary_sensor:
# Prevent multiple discovery
if loaded:
return
loaded = True
loaded_binary_sensor = True
hass.async_create_task(
hass.helpers.discovery.async_load_platform(
"binary_sensor", DOMAIN, {}, config
)
)
hass.async_create_task(
hass.helpers.discovery.async_load_platform("stt", DOMAIN, {}, config)
)
hass.async_create_task(
hass.helpers.discovery.async_load_platform("tts", DOMAIN, {}, config)
)
cloud.iot.register_on_connect(_on_connect)

View File

@ -38,6 +38,7 @@ CONF_ACME_DIRECTORY_SERVER = "acme_directory_server"
CONF_ALEXA_ACCESS_TOKEN_URL = "alexa_access_token_url"
CONF_GOOGLE_ACTIONS_REPORT_STATE_URL = "google_actions_report_state_url"
CONF_ACCOUNT_LINK_URL = "account_link_url"
CONF_VOICE_API_URL = "voice_api_url"
MODE_DEV = "development"
MODE_PROD = "production"

View File

@ -2,7 +2,7 @@
"domain": "cloud",
"name": "Cloud",
"documentation": "https://www.home-assistant.io/integrations/cloud",
"requirements": ["hass-nabucasa==0.23"],
"requirements": ["hass-nabucasa==0.26"],
"dependencies": ["http", "webhook"],
"codeowners": ["@home-assistant/cloud"]
}

View File

@ -0,0 +1,106 @@
"""Support for the cloud for speech to text service."""
from typing import List
from aiohttp import StreamReader
from hass_nabucasa import Cloud
from hass_nabucasa.voice import VoiceError
from homeassistant.components.stt import Provider, SpeechMetadata, SpeechResult
from homeassistant.components.stt.const import (
AudioBitRates,
AudioChannels,
AudioCodecs,
AudioFormats,
AudioSampleRates,
SpeechResultState,
)
from .const import DOMAIN
SUPPORT_LANGUAGES = [
"da-DK",
"de-DE",
"en-AU",
"en-CA",
"en-GB",
"en-US",
"es-ES",
"fi-FI",
"fr-CA",
"fr-FR",
"it-IT",
"ja-JP",
"nl-NL",
"pl-PL",
"pt-PT",
"ru-RU",
"sv-SE",
"th-TH",
"zh-CN",
"zh-HK",
]
async def async_get_engine(hass, config, discovery_info=None):
"""Set up Cloud speech component."""
cloud: Cloud = hass.data[DOMAIN]
return CloudProvider(cloud)
class CloudProvider(Provider):
"""NabuCasa speech API provider."""
def __init__(self, cloud: Cloud) -> None:
"""Hass NabuCasa Speech to text."""
self.cloud = cloud
@property
def supported_languages(self) -> List[str]:
"""Return a list of supported languages."""
return SUPPORT_LANGUAGES
@property
def supported_formats(self) -> List[AudioFormats]:
"""Return a list of supported formats."""
return [AudioFormats.WAV, AudioFormats.OGG]
@property
def supported_codecs(self) -> List[AudioCodecs]:
"""Return a list of supported codecs."""
return [AudioCodecs.PCM, AudioCodecs.OPUS]
@property
def supported_bit_rates(self) -> List[AudioBitRates]:
"""Return a list of supported bitrates."""
return [AudioBitRates.BITRATE_16]
@property
def supported_sample_rates(self) -> List[AudioSampleRates]:
"""Return a list of supported samplerates."""
return [AudioSampleRates.SAMPLERATE_16000]
@property
def supported_channels(self) -> List[AudioChannels]:
"""Return a list of supported channels."""
return [AudioChannels.CHANNEL_MONO]
async def async_process_audio_stream(
self, metadata: SpeechMetadata, stream: StreamReader
) -> SpeechResult:
"""Process an audio stream to STT service."""
content = f"audio/{metadata.format!s}; codecs=audio/{metadata.codec!s}; samplerate=16000"
# Process STT
try:
result = await self.cloud.voice.process_stt(
stream, content, metadata.language
)
except VoiceError:
return SpeechResult(None, SpeechResultState.ERROR)
# Return Speech as Text
return SpeechResult(
result.text,
SpeechResultState.SUCCESS if result.success else SpeechResultState.ERROR,
)

View File

@ -0,0 +1,81 @@
"""Support for the cloud for text to speech service."""
from hass_nabucasa.voice import VoiceError
from hass_nabucasa import Cloud
import voluptuous as vol
from homeassistant.components.tts import CONF_LANG, PLATFORM_SCHEMA, Provider
from .const import DOMAIN
CONF_GENDER = "gender"
SUPPORT_LANGUAGES = ["en-US", "de-DE", "es-ES"]
SUPPORT_GENDER = ["male", "female"]
DEFAULT_LANG = "en-US"
DEFAULT_GENDER = "female"
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
{
vol.Optional(CONF_LANG, default=DEFAULT_LANG): vol.In(SUPPORT_LANGUAGES),
vol.Optional(CONF_GENDER, default=DEFAULT_GENDER): vol.In(SUPPORT_GENDER),
}
)
async def async_get_engine(hass, config, discovery_info=None):
"""Set up Cloud speech component."""
cloud: Cloud = hass.data[DOMAIN]
if discovery_info is not None:
language = DEFAULT_LANG
gender = DEFAULT_GENDER
else:
language = config[CONF_LANG]
gender = config[CONF_GENDER]
return CloudProvider(cloud, language, gender)
class CloudProvider(Provider):
"""NabuCasa Cloud speech API provider."""
def __init__(self, cloud: Cloud, language: str, gender: str):
"""Initialize cloud provider."""
self.cloud = cloud
self.name = "Cloud"
self._language = language
self._gender = gender
@property
def default_language(self):
"""Return the default language."""
return self._language
@property
def supported_languages(self):
"""Return list of supported languages."""
return SUPPORT_LANGUAGES
@property
def supported_options(self):
"""Return list of supported options like voice, emotion."""
return [CONF_GENDER]
@property
def default_options(self):
"""Return a dict include default options."""
return {CONF_GENDER: self._gender}
async def async_get_tts_audio(self, message, language, options=None):
"""Load TTS from NabuCasa Cloud."""
# Process TTS
try:
data = await self.cloud.voice.process_tts(
message, language, gender=options[CONF_GENDER]
)
except VoiceError:
return (None, None)
return ("mp3", data)

View File

@ -6,16 +6,17 @@ from aiohttp import StreamReader
from homeassistant.components.stt import Provider, SpeechMetadata, SpeechResult
from homeassistant.components.stt.const import (
AudioBitRates,
AudioChannels,
AudioCodecs,
AudioFormats,
AudioSampleRates,
AudioCodecs,
SpeechResultState,
)
SUPPORT_LANGUAGES = ["en", "de"]
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up Demo speech component."""
return DemoProvider()
@ -48,6 +49,11 @@ class DemoProvider(Provider):
"""Return a list of supported sample rates."""
return [AudioSampleRates.SAMPLERATE_16000, AudioSampleRates.SAMPLERATE_44100]
@property
def supported_channels(self) -> List[AudioChannels]:
"""Return a list of supported channels."""
return [AudioChannels.CHANNEL_STEREO]
async def async_process_audio_stream(
self, metadata: SpeechMetadata, stream: StreamReader
) -> SpeechResult:

View File

@ -14,7 +14,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up Demo speech component."""
return DemoProvider(config[CONF_LANG])

View File

@ -122,7 +122,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up Google Cloud TTS component."""
key_file = config.get(CONF_KEY_FILE)
if key_file:

View File

@ -81,7 +81,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up Google speech component."""
return GoogleProvider(hass, config[CONF_LANG])

View File

@ -38,7 +38,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up MaryTTS speech component."""
return MaryTTSProvider(hass, config)

View File

@ -94,7 +94,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up Microsoft speech component."""
return MicrosoftProvider(
config[CONF_API_KEY],

View File

@ -20,7 +20,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up Pico speech component."""
if shutil.which("pico2wave") is None:
_LOGGER.error("'pico2wave' was not found")

View File

@ -7,21 +7,22 @@ from typing import Dict, List, Optional
from aiohttp import StreamReader, web
from aiohttp.hdrs import istr
from aiohttp.web_exceptions import (
HTTPBadRequest,
HTTPNotFound,
HTTPUnsupportedMediaType,
HTTPBadRequest,
)
import attr
from homeassistant.components.http import HomeAssistantView
from homeassistant.core import callback
from homeassistant.helpers import config_per_platform
from homeassistant.helpers import config_per_platform, discovery
from homeassistant.helpers.typing import HomeAssistantType
from homeassistant.setup import async_prepare_setup_platform
from .const import (
DOMAIN,
AudioBitRates,
AudioChannels,
AudioCodecs,
AudioFormats,
AudioSampleRates,
@ -37,14 +38,17 @@ async def async_setup(hass: HomeAssistantType, config):
"""Set up STT."""
providers = {}
async def async_setup_platform(p_type, p_config, disc_info=None):
async def async_setup_platform(p_type, p_config=None, discovery_info=None):
"""Set up a TTS platform."""
if p_config is None:
p_config = {}
platform = await async_prepare_setup_platform(hass, config, DOMAIN, p_type)
if platform is None:
return
try:
provider = await platform.async_get_engine(hass, p_config)
provider = await platform.async_get_engine(hass, p_config, discovery_info)
if provider is None:
_LOGGER.error("Error setting up platform %s", p_type)
return
@ -65,6 +69,13 @@ async def async_setup(hass: HomeAssistantType, config):
if setup_tasks:
await asyncio.wait(setup_tasks)
# Add discovery support
async def async_platform_discovered(platform, info):
"""Handle for discovered platform."""
await async_setup_platform(platform, discovery_info=info)
discovery.async_listen_platform(hass, DOMAIN, async_platform_discovered)
hass.http.register_view(SpeechToTextView(providers))
return True
@ -78,13 +89,14 @@ class SpeechMetadata:
codec: AudioCodecs = attr.ib()
bit_rate: AudioBitRates = attr.ib(converter=int)
sample_rate: AudioSampleRates = attr.ib(converter=int)
channel: AudioChannels = attr.ib(converter=int)
@attr.s
class SpeechResult:
"""Result of audio Speech."""
text: str = attr.ib()
text: Optional[str] = attr.ib()
result: SpeechResultState = attr.ib()
@ -112,12 +124,17 @@ class Provider(ABC):
@property
@abstractmethod
def supported_bit_rates(self) -> List[AudioBitRates]:
"""Return a list of supported bit_rates."""
"""Return a list of supported bit rates."""
@property
@abstractmethod
def supported_sample_rates(self) -> List[AudioSampleRates]:
"""Return a list of supported sample_rates."""
"""Return a list of supported sample rates."""
@property
@abstractmethod
def supported_channels(self) -> List[AudioChannels]:
"""Return a list of supported channels."""
@abstractmethod
async def async_process_audio_stream(
@ -137,6 +154,7 @@ class Provider(ABC):
or metadata.codec not in self.supported_codecs
or metadata.bit_rate not in self.supported_bit_rates
or metadata.sample_rate not in self.supported_sample_rates
or metadata.channel not in self.supported_channels
):
return False
return True
@ -157,7 +175,7 @@ class SpeechToTextView(HomeAssistantView):
def _metadata_from_header(request: web.Request) -> Optional[SpeechMetadata]:
"""Extract metadata from header.
X-Speech-Content: format=wav; codec=pcm; samplerate=16000; bitrate=16; language=de_de
X-Speech-Content: format=wav; codec=pcm; sample_rate=16000; bit_rate=16; channel=1; language=de_de
"""
try:
data = request.headers[istr("X-Speech-Content")].split(";")
@ -213,5 +231,6 @@ class SpeechToTextView(HomeAssistantView):
"codecs": stt_provider.supported_codecs,
"sample_rates": stt_provider.supported_sample_rates,
"bit_rates": stt_provider.supported_bit_rates,
"channels": stt_provider.supported_channels,
}
)

View File

@ -19,7 +19,7 @@ class AudioFormats(str, Enum):
class AudioBitRates(int, Enum):
"""Supported Audio bit_rates."""
"""Supported Audio bit rates."""
BITRATE_8 = 8
BITRATE_16 = 16
@ -28,7 +28,7 @@ class AudioBitRates(int, Enum):
class AudioSampleRates(int, Enum):
"""Supported Audio sample_rates."""
"""Supported Audio sample rates."""
SAMPLERATE_8000 = 8000
SAMPLERATE_11000 = 11000
@ -41,6 +41,13 @@ class AudioSampleRates(int, Enum):
SAMPLERATE_48000 = 48000
class AudioChannels(int, Enum):
"""Supported Audio channel."""
CHANNEL_MONO = 1
CHANNEL_STEREO = 2
class SpeechResultState(str, Enum):
"""Result state of speech."""

View File

@ -25,7 +25,7 @@ from homeassistant.components.media_player.const import (
from homeassistant.const import ATTR_ENTITY_ID, CONF_PLATFORM, ENTITY_MATCH_ALL
from homeassistant.core import callback
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import config_per_platform
from homeassistant.helpers import config_per_platform, discovery
import homeassistant.helpers.config_validation as cv
from homeassistant.helpers.typing import HomeAssistantType
from homeassistant.setup import async_prepare_setup_platform
@ -118,17 +118,24 @@ async def async_setup(hass, config):
hass.http.register_view(TextToSpeechView(tts))
hass.http.register_view(TextToSpeechUrlView(tts))
async def async_setup_platform(p_type, p_config, disc_info=None):
async def async_setup_platform(p_type, p_config=None, discovery_info=None):
"""Set up a TTS platform."""
if p_config is None:
p_config = {}
platform = await async_prepare_setup_platform(hass, config, DOMAIN, p_type)
if platform is None:
return
try:
if hasattr(platform, "async_get_engine"):
provider = await platform.async_get_engine(hass, p_config)
provider = await platform.async_get_engine(
hass, p_config, discovery_info
)
else:
provider = await hass.async_add_job(platform.get_engine, hass, p_config)
provider = await hass.async_add_job(
platform.get_engine, hass, p_config, discovery_info
)
if provider is None:
_LOGGER.error("Error setting up platform %s", p_type)
@ -178,6 +185,12 @@ async def async_setup(hass, config):
if setup_tasks:
await asyncio.wait(setup_tasks)
async def async_platform_discovered(platform, info):
"""Handle for discovered platform."""
await async_setup_platform(platform, discovery_info=info)
discovery.async_listen_platform(hass, DOMAIN, async_platform_discovered)
async def async_clear_cache_handle(service):
"""Handle clear cache service call."""
await tts.async_clear_cache()

View File

@ -131,7 +131,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up VoiceRSS TTS component."""
return VoiceRSSProvider(hass, config)

View File

@ -90,7 +90,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
)
def get_engine(hass, config):
def get_engine(hass, config, discovery_info=None):
"""Set up IBM Watson TTS component."""
from ibm_watson import TextToSpeechV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

View File

@ -79,7 +79,7 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend(
SUPPORTED_OPTIONS = [CONF_CODEC, CONF_VOICE, CONF_EMOTION, CONF_SPEED]
async def async_get_engine(hass, config):
async def async_get_engine(hass, config, discovery_info=None):
"""Set up VoiceRSS speech component."""
return YandexSpeechKitProvider(hass, config)

View File

@ -10,7 +10,7 @@ certifi>=2019.9.11
contextvars==2.4;python_version<"3.7"
cryptography==2.8
distro==1.4.0
hass-nabucasa==0.23
hass-nabucasa==0.26
home-assistant-frontend==20191025.1
importlib-metadata==0.23
jinja2>=2.10.3

View File

@ -44,6 +44,7 @@ disable=
too-many-public-methods,
too-many-return-statements,
too-many-statements,
too-many-boolean-expressions,
unnecessary-pass,
unused-argument

View File

@ -622,7 +622,7 @@ habitipy==0.2.0
hangups==0.4.9
# homeassistant.components.cloud
hass-nabucasa==0.23
hass-nabucasa==0.26
# homeassistant.components.mqtt
hbmqtt==0.9.5

View File

@ -204,7 +204,7 @@ ha-ffmpeg==2.0
hangups==0.4.9
# homeassistant.components.cloud
hass-nabucasa==0.23
hass-nabucasa==0.26
# homeassistant.components.mqtt
hbmqtt==0.9.5

View File

@ -27,6 +27,7 @@ async def test_demo_settings(hass_client):
"sample_rates": [16000, 44100],
"formats": ["wav"],
"codecs": ["pcm"],
"channels": [2],
}
@ -45,7 +46,7 @@ async def test_demo_speech_wrong_metadata(hass_client):
response = await client.post(
"/api/stt/demo",
headers={
"X-Speech-Content": "format=wav; codec=pcm; sample_rate=8000; bit_rate=16; language=de"
"X-Speech-Content": "format=wav; codec=pcm; sample_rate=8000; bit_rate=16; channel=1; language=de"
},
data=b"Test",
)
@ -59,7 +60,7 @@ async def test_demo_speech(hass_client):
response = await client.post(
"/api/stt/demo",
headers={
"X-Speech-Content": "format=wav; codec=pcm; sample_rate=16000; bit_rate=16; language=de"
"X-Speech-Content": "format=wav; codec=pcm; sample_rate=16000; bit_rate=16; channel=2; language=de"
},
data=b"Test",
)