mirror of
https://github.com/home-assistant/core.git
synced 2025-07-28 15:47:12 +00:00
Fix Chinese in Google Cloud STT (#149155)
This commit is contained in:
parent
fcd514a06b
commit
53d77c4c10
@ -186,3 +186,13 @@ STT_LANGUAGES = [
|
|||||||
"yue-Hant-HK",
|
"yue-Hant-HK",
|
||||||
"zu-ZA",
|
"zu-ZA",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# This allows us to support HA's standard codes (e.g., zh-CN) while
|
||||||
|
# sending the correct code to the Google API (e.g., cmn-Hans-CN).
|
||||||
|
HA_TO_GOOGLE_STT_LANG_MAP = {
|
||||||
|
"zh-CN": "cmn-Hans-CN", # Chinese (Mandarin, Simplified, China)
|
||||||
|
"zh-HK": "yue-Hant-HK", # Chinese (Cantonese, Traditional, Hong Kong)
|
||||||
|
"zh-TW": "cmn-Hant-TW", # Chinese (Mandarin, Traditional, Taiwan)
|
||||||
|
"he-IL": "iw-IL", # Hebrew (Google uses 'iw' legacy code)
|
||||||
|
"nb-NO": "no-NO", # Norwegian Bokmål
|
||||||
|
}
|
||||||
|
@ -8,6 +8,7 @@ import logging
|
|||||||
from google.api_core.exceptions import GoogleAPIError, Unauthenticated
|
from google.api_core.exceptions import GoogleAPIError, Unauthenticated
|
||||||
from google.api_core.retry import AsyncRetry
|
from google.api_core.retry import AsyncRetry
|
||||||
from google.cloud import speech_v1
|
from google.cloud import speech_v1
|
||||||
|
from propcache.api import cached_property
|
||||||
|
|
||||||
from homeassistant.components.stt import (
|
from homeassistant.components.stt import (
|
||||||
AudioBitRates,
|
AudioBitRates,
|
||||||
@ -30,6 +31,7 @@ from .const import (
|
|||||||
CONF_STT_MODEL,
|
CONF_STT_MODEL,
|
||||||
DEFAULT_STT_MODEL,
|
DEFAULT_STT_MODEL,
|
||||||
DOMAIN,
|
DOMAIN,
|
||||||
|
HA_TO_GOOGLE_STT_LANG_MAP,
|
||||||
STT_LANGUAGES,
|
STT_LANGUAGES,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -68,10 +70,14 @@ class GoogleCloudSpeechToTextEntity(SpeechToTextEntity):
|
|||||||
self._client = client
|
self._client = client
|
||||||
self._model = entry.options.get(CONF_STT_MODEL, DEFAULT_STT_MODEL)
|
self._model = entry.options.get(CONF_STT_MODEL, DEFAULT_STT_MODEL)
|
||||||
|
|
||||||
@property
|
@cached_property
|
||||||
def supported_languages(self) -> list[str]:
|
def supported_languages(self) -> list[str]:
|
||||||
"""Return a list of supported languages."""
|
"""Return a list of supported languages."""
|
||||||
return STT_LANGUAGES
|
# Combine the native Google languages and the standard HA languages.
|
||||||
|
# A set is used to automatically handle duplicates.
|
||||||
|
supported = set(STT_LANGUAGES)
|
||||||
|
supported.update(HA_TO_GOOGLE_STT_LANG_MAP.keys())
|
||||||
|
return sorted(supported)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def supported_formats(self) -> list[AudioFormats]:
|
def supported_formats(self) -> list[AudioFormats]:
|
||||||
@ -102,6 +108,10 @@ class GoogleCloudSpeechToTextEntity(SpeechToTextEntity):
|
|||||||
self, metadata: SpeechMetadata, stream: AsyncIterable[bytes]
|
self, metadata: SpeechMetadata, stream: AsyncIterable[bytes]
|
||||||
) -> SpeechResult:
|
) -> SpeechResult:
|
||||||
"""Process an audio stream to STT service."""
|
"""Process an audio stream to STT service."""
|
||||||
|
language_code = HA_TO_GOOGLE_STT_LANG_MAP.get(
|
||||||
|
metadata.language, metadata.language
|
||||||
|
)
|
||||||
|
|
||||||
streaming_config = speech_v1.StreamingRecognitionConfig(
|
streaming_config = speech_v1.StreamingRecognitionConfig(
|
||||||
config=speech_v1.RecognitionConfig(
|
config=speech_v1.RecognitionConfig(
|
||||||
encoding=(
|
encoding=(
|
||||||
@ -110,7 +120,7 @@ class GoogleCloudSpeechToTextEntity(SpeechToTextEntity):
|
|||||||
else speech_v1.RecognitionConfig.AudioEncoding.LINEAR16
|
else speech_v1.RecognitionConfig.AudioEncoding.LINEAR16
|
||||||
),
|
),
|
||||||
sample_rate_hertz=metadata.sample_rate,
|
sample_rate_hertz=metadata.sample_rate,
|
||||||
language_code=metadata.language,
|
language_code=language_code,
|
||||||
model=self._model,
|
model=self._model,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user