mirror of
https://github.com/home-assistant/core.git
synced 2025-07-23 13:17:32 +00:00
Add language util (#91290)
* Add language util * Add no match tests * Update tests/util/test_language.py Co-authored-by: Paulus Schoutsen <balloob@gmail.com> --------- Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
This commit is contained in:
parent
687c035bb2
commit
f0c625b2ad
145
homeassistant/util/language.py
Normal file
145
homeassistant/util/language.py
Normal file
@ -0,0 +1,145 @@
|
||||
"""Helper methods for language selection in Home Assistant."""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterable
|
||||
from dataclasses import dataclass
|
||||
import operator
|
||||
import re
|
||||
|
||||
SEPARATOR_RE = re.compile(r"[-_]")
|
||||
|
||||
|
||||
def preferred_regions(
|
||||
language: str,
|
||||
country: str | None = None,
|
||||
code: str | None = None,
|
||||
) -> Iterable[str | None]:
|
||||
"""Yield preferred regions for a language based on country/code hints."""
|
||||
if country is not None:
|
||||
yield country.upper()
|
||||
|
||||
if language == "en":
|
||||
# Prefer U.S. English if no country
|
||||
if country is None:
|
||||
yield "US"
|
||||
elif language == "zh":
|
||||
if code == "Hant":
|
||||
yield "HK"
|
||||
elif code == "Hans":
|
||||
yield "TW"
|
||||
else:
|
||||
# Prefer China if no matching code
|
||||
yield "CN"
|
||||
|
||||
# fr -> fr-FR
|
||||
yield language.upper()
|
||||
|
||||
|
||||
def is_region(language: str, region: str | None) -> bool:
|
||||
"""Return true if region is not known to be a script/code instead."""
|
||||
if language == "es":
|
||||
return region != "419"
|
||||
|
||||
if language == "sr":
|
||||
return region != "Latn"
|
||||
|
||||
if language == "zh":
|
||||
return region not in ("Hans", "Hant")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class Dialect:
|
||||
"""Language with optional region and script/code."""
|
||||
|
||||
language: str
|
||||
region: str | None
|
||||
code: str | None = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Fix casing of language/region."""
|
||||
# Languages are lower-cased
|
||||
self.language = self.language.casefold()
|
||||
|
||||
if self.region is not None:
|
||||
# Regions are upper-cased
|
||||
self.region = self.region.upper()
|
||||
|
||||
def score(self, dialect: Dialect, country: str | None = None) -> int:
|
||||
"""Return score for match with another dialect where higher is better.
|
||||
|
||||
Score < 0 indicates a failure to match.
|
||||
"""
|
||||
if self.language != dialect.language:
|
||||
# Not a match
|
||||
return -1
|
||||
|
||||
if self.region == dialect.region:
|
||||
# Language + region match
|
||||
return 1
|
||||
|
||||
pref_regions: set[str | None] = set()
|
||||
if (self.region is None) or (dialect.region is None):
|
||||
# Generate a set of preferred regions
|
||||
pref_regions = set(
|
||||
preferred_regions(
|
||||
self.language,
|
||||
country=country,
|
||||
code=self.code,
|
||||
)
|
||||
)
|
||||
|
||||
# Replace missing regions with preferred
|
||||
regions = pref_regions if self.region is None else {self.region}
|
||||
other_regions = pref_regions if dialect.region is None else {dialect.region}
|
||||
|
||||
# Better match if there is overlap in regions
|
||||
return 1 if regions.intersection(other_regions) else 0
|
||||
|
||||
@staticmethod
|
||||
def parse(tag: str) -> Dialect:
|
||||
"""Parse language tag into language/region/code."""
|
||||
parts = SEPARATOR_RE.split(tag, maxsplit=1)
|
||||
language = parts[0]
|
||||
region: str | None = None
|
||||
code: str | None = None
|
||||
|
||||
if len(parts) > 1:
|
||||
region_or_code = parts[1]
|
||||
if is_region(language, region_or_code):
|
||||
# US, GB, etc.
|
||||
region = region_or_code
|
||||
else:
|
||||
# Hant, 419, etc.
|
||||
code = region_or_code
|
||||
|
||||
return Dialect(
|
||||
language=language,
|
||||
region=region,
|
||||
code=code,
|
||||
)
|
||||
|
||||
|
||||
def matches(
|
||||
target: str, supported: Iterable[str], country: str | None = None
|
||||
) -> list[str]:
|
||||
"""Return a sorted list of matching language tags based on a target tag and country hint."""
|
||||
target_dialect = Dialect.parse(target)
|
||||
|
||||
# Higher score is better
|
||||
scored = sorted(
|
||||
(
|
||||
(
|
||||
dialect := Dialect.parse(tag),
|
||||
target_dialect.score(dialect, country=country),
|
||||
tag,
|
||||
)
|
||||
for tag in supported
|
||||
),
|
||||
key=operator.itemgetter(1),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
# Score < 0 is not a match
|
||||
return [tag for _dialect, score, tag in scored if score >= 0]
|
123
tests/util/test_language.py
Normal file
123
tests/util/test_language.py
Normal file
@ -0,0 +1,123 @@
|
||||
"""Test Home Assistant language util methods."""
|
||||
from __future__ import annotations
|
||||
|
||||
from homeassistant.util import language
|
||||
|
||||
|
||||
def test_region_match() -> None:
|
||||
"""Test that an exact language/region match is preferred."""
|
||||
assert language.matches("en-GB", ["fr-Fr", "en-US", "en-GB"]) == [
|
||||
"en-GB",
|
||||
"en-US",
|
||||
]
|
||||
|
||||
|
||||
def test_no_match() -> None:
|
||||
"""Test that an empty list is returned when there is no match."""
|
||||
assert (
|
||||
language.matches(
|
||||
"en-US",
|
||||
["de-DE", "fr-FR", "zh"],
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
assert (
|
||||
language.matches(
|
||||
"en",
|
||||
["de-DE", "fr-FR", "zh"],
|
||||
)
|
||||
== []
|
||||
)
|
||||
|
||||
assert language.matches("en", []) == []
|
||||
|
||||
|
||||
def test_prefer_us_english() -> None:
|
||||
"""Test that U.S. English is preferred when no region is provided."""
|
||||
assert language.matches("en", ["en-GB", "en-US", "fr-FR"]) == [
|
||||
"en-US",
|
||||
"en-GB",
|
||||
]
|
||||
|
||||
|
||||
def test_country_preferred() -> None:
|
||||
"""Test that country hint disambiguates."""
|
||||
assert language.matches(
|
||||
"en",
|
||||
["fr-Fr", "en-US", "en-GB"],
|
||||
country="GB",
|
||||
) == [
|
||||
"en-GB",
|
||||
"en-US",
|
||||
]
|
||||
|
||||
|
||||
def test_language_as_region() -> None:
|
||||
"""Test that the language itself can be interpreted as a region."""
|
||||
assert language.matches(
|
||||
"fr",
|
||||
["en-US", "en-GB", "fr-CA", "fr-FR"],
|
||||
) == [
|
||||
"fr-FR",
|
||||
"fr-CA",
|
||||
]
|
||||
|
||||
|
||||
def test_zh_hant() -> None:
|
||||
"""Test that the zh-Hant defaults to HK."""
|
||||
assert language.matches(
|
||||
"zh-Hant",
|
||||
["en-US", "en-GB", "zh-CN", "zh-HK", "zh-TW"],
|
||||
) == [
|
||||
"zh-HK",
|
||||
"zh-CN",
|
||||
"zh-TW",
|
||||
]
|
||||
|
||||
|
||||
def test_zh_hans() -> None:
|
||||
"""Test that the zh-Hans defaults to TW."""
|
||||
assert language.matches(
|
||||
"zh-Hans",
|
||||
["en-US", "en-GB", "zh-CN", "zh-HK", "zh-TW"],
|
||||
) == [
|
||||
"zh-TW",
|
||||
"zh-CN",
|
||||
"zh-HK",
|
||||
]
|
||||
|
||||
|
||||
def test_zh_no_code() -> None:
|
||||
"""Test that the zh defaults to CN."""
|
||||
assert language.matches(
|
||||
"zh",
|
||||
["en-US", "en-GB", "zh-CN", "zh-HK", "zh-TW"],
|
||||
) == [
|
||||
"zh-CN",
|
||||
"zh-HK",
|
||||
"zh-TW",
|
||||
]
|
||||
|
||||
|
||||
def test_es_419() -> None:
|
||||
"""Test that the es-419 matches es dialects."""
|
||||
assert language.matches(
|
||||
"es-419",
|
||||
["en-US", "en-GB", "es-CL", "es-US", "es-ES"],
|
||||
) == [
|
||||
"es-ES",
|
||||
"es-CL",
|
||||
"es-US",
|
||||
]
|
||||
|
||||
|
||||
def test_sr_latn() -> None:
|
||||
"""Test that the sr_Latn matches sr dialects."""
|
||||
assert language.matches(
|
||||
"sr-Latn",
|
||||
["en-US", "en-GB", "sr-CS", "sr-RS"],
|
||||
) == [
|
||||
"sr-CS",
|
||||
"sr-RS",
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user