Reduce discovery integration matching overhead (#77194)

2025-07-21 12:17:07 +00:00 · 2022-08-23 04:35:20 -10:00 · 2022-08-23 04:35:20 -10:00 · c975146146
commit c975146146
parent bf5ab64b99
3 changed files with 89 additions and 14 deletions
--- a/homeassistant/components/bluetooth/match.py
+++ b/homeassistant/components/bluetooth/match.py
@ -2,7 +2,9 @@
 from __future__ import annotations
 from dataclasses import dataclass
-import fnmatch
+from fnmatch import translate
 from functools import lru_cache
 import re
 from typing import TYPE_CHECKING, Final, TypedDict
 from lru import LRU  # pylint: disable=no-name-in-module
@ -136,12 +138,6 @@ def ble_device_matches(
        return False
    advertisement_data = service_info.advertisement
    if (local_name := matcher.get(LOCAL_NAME)) is not None and not fnmatch.fnmatch(
        advertisement_data.local_name or device.name or device.address,
        local_name,
    ):
        return False
    if (
        service_uuid := matcher.get(SERVICE_UUID)
    ) is not None and service_uuid not in advertisement_data.service_uuids:
@ -165,4 +161,34 @@ def ble_device_matches(
        ):
            return False
    if (local_name := matcher.get(LOCAL_NAME)) is not None and (
        (device_name := advertisement_data.local_name or device.name) is None
        or not _memorized_fnmatch(
            device_name,
            local_name,
        )
    ):
        return False
    return True
@lru_cache(maxsize=4096, typed=True)
 def _compile_fnmatch(pattern: str) -> re.Pattern:
    """Compile a fnmatch pattern."""
    return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
 def _memorized_fnmatch(name: str, pattern: str) -> bool:
    """Memorized version of fnmatch that has a larger lru_cache.
    The default version of fnmatch only has a lru_cache of 256 entries.
    With many devices we quickly reach that limit and end up compiling
    the same pattern over and over again.
    Bluetooth has its own memorized fnmatch with its own lru_cache
    since the data is going to be relatively the same
    since the devices will not change frequently.
    """
    return bool(_compile_fnmatch(pattern).match(name))
--- a/homeassistant/components/dhcp/init.py
+++ b/homeassistant/components/dhcp/init.py
@ -7,10 +7,12 @@ from collections.abc import Callable, Iterable
 import contextlib
 from dataclasses import dataclass
 from datetime import timedelta
-import fnmatch
+from fnmatch import translate
 from functools import lru_cache
 from ipaddress import ip_address as make_ip_address
 import logging
 import os
 import re
 import threading
 from typing import TYPE_CHECKING, Any, Final, cast
@ -204,12 +206,14 @@ class WatcherBase:
            if (
                matcher_mac := matcher.get(MAC_ADDRESS)
-            ) is not None and not fnmatch.fnmatch(uppercase_mac, matcher_mac):
+            ) is not None and not _memorized_fnmatch(uppercase_mac, matcher_mac):
                continue
            if (
                matcher_hostname := matcher.get(HOSTNAME)
-            ) is not None and not fnmatch.fnmatch(lowercase_hostname, matcher_hostname):
+            ) is not None and not _memorized_fnmatch(
                lowercase_hostname, matcher_hostname
            ):
                continue
            _LOGGER.debug("Matched %s against %s", data, matcher)
@ -514,3 +518,24 @@ def _verify_working_pcap(cap_filter: str) -> None:
    )
    compile_filter(cap_filter)
@lru_cache(maxsize=4096, typed=True)
 def _compile_fnmatch(pattern: str) -> re.Pattern:
    """Compile a fnmatch pattern."""
    return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
 def _memorized_fnmatch(name: str, pattern: str) -> bool:
    """Memorized version of fnmatch that has a larger lru_cache.
    The default version of fnmatch only has a lru_cache of 256 entries.
    With many devices we quickly reach that limit and end up compiling
    the same pattern over and over again.
    DHCP has its own memorized fnmatch with its own lru_cache
    since the data is going to be relatively the same
    since the devices will not change frequently
    """
    return bool(_compile_fnmatch(pattern).match(name))
--- a/homeassistant/components/zeroconf/init.py
+++ b/homeassistant/components/zeroconf/init.py
@ -5,9 +5,11 @@ import asyncio
 import contextlib
 from contextlib import suppress
 from dataclasses import dataclass
-import fnmatch
+from fnmatch import translate
 from functools import lru_cache
 from ipaddress import IPv4Address, IPv6Address, ip_address
 import logging
 import re
 import socket
 import sys
 from typing import Any, Final, cast
@ -302,7 +304,8 @@ def _match_against_data(
            return False
        match_val = matcher[key]
        assert isinstance(match_val, str)
-        if not fnmatch.fnmatch(match_data[key], match_val):
+
        if not _memorized_fnmatch(match_data[key], match_val):
            return False
    return True
@ -312,7 +315,7 @@ def _match_against_props(matcher: dict[str, str], props: dict[str, str]) -> bool
    return not any(
        key
        for key in matcher
-        if key not in props or not fnmatch.fnmatch(props[key].lower(), matcher[key])
+        if key not in props or not _memorized_fnmatch(props[key].lower(), matcher[key])
    )
@ -484,7 +487,7 @@ def async_get_homekit_discovery_domain(
        if (
            model != test_model
            and not model.startswith((f"{test_model} ", f"{test_model}-"))
-            and not fnmatch.fnmatch(model, test_model)
+            and not _memorized_fnmatch(model, test_model)
        ):
            continue
@ -575,3 +578,24 @@ def _truncate_location_name_to_valid(location_name: str) -> str:
        location_name,
    )
    return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore")
@lru_cache(maxsize=4096, typed=True)
 def _compile_fnmatch(pattern: str) -> re.Pattern:
    """Compile a fnmatch pattern."""
    return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
 def _memorized_fnmatch(name: str, pattern: str) -> bool:
    """Memorized version of fnmatch that has a larger lru_cache.
    The default version of fnmatch only has a lru_cache of 256 entries.
    With many devices we quickly reach that limit and end up compiling
    the same pattern over and over again.
    Zeroconf has its own memorized fnmatch with its own lru_cache
    since the data is going to be relatively the same
    since the devices will not change frequently
    """
    return bool(_compile_fnmatch(pattern).match(name))