Reduce discovery integration matching overhead (#77194)

2025-07-23 05:07:41 +00:00 · 2022-08-23 04:35:20 -10:00 · 2022-08-23 04:35:20 -10:00 · c975146146
commit c975146146
parent bf5ab64b99
3 changed files with 89 additions and 14 deletions
--- a/homeassistant/components/bluetooth/match.py
+++ b/homeassistant/components/bluetooth/match.py
@ -2,7 +2,9 @@
 from __future__ import annotations

 from dataclasses import dataclass
-import fnmatch
+from fnmatch import translate
+from functools import lru_cache
+import re
 from typing import TYPE_CHECKING, Final, TypedDict

 from lru import LRU  # pylint: disable=no-name-in-module
@ -136,12 +138,6 @@ def ble_device_matches(
        return False

    advertisement_data = service_info.advertisement
-    if (local_name := matcher.get(LOCAL_NAME)) is not None and not fnmatch.fnmatch(
-        advertisement_data.local_name or device.name or device.address,
-        local_name,
-    ):
-        return False
-
    if (
        service_uuid := matcher.get(SERVICE_UUID)
    ) is not None and service_uuid not in advertisement_data.service_uuids:
@ -165,4 +161,34 @@ def ble_device_matches(
        ):
            return False

+    if (local_name := matcher.get(LOCAL_NAME)) is not None and (
+        (device_name := advertisement_data.local_name or device.name) is None
+        or not _memorized_fnmatch(
+            device_name,
+            local_name,
+        )
+    ):
+        return False
+
    return True
+
+
+@lru_cache(maxsize=4096, typed=True)
+def _compile_fnmatch(pattern: str) -> re.Pattern:
+    """Compile a fnmatch pattern."""
+    return re.compile(translate(pattern))
+
+
+@lru_cache(maxsize=1024, typed=True)
+def _memorized_fnmatch(name: str, pattern: str) -> bool:
+    """Memorized version of fnmatch that has a larger lru_cache.
+
+    The default version of fnmatch only has a lru_cache of 256 entries.
+    With many devices we quickly reach that limit and end up compiling
+    the same pattern over and over again.
+
+    Bluetooth has its own memorized fnmatch with its own lru_cache
+    since the data is going to be relatively the same
+    since the devices will not change frequently.
+    """
+    return bool(_compile_fnmatch(pattern).match(name))
--- a/homeassistant/components/dhcp/init.py
+++ b/homeassistant/components/dhcp/init.py
@ -7,10 +7,12 @@ from collections.abc import Callable, Iterable
 import contextlib
 from dataclasses import dataclass
 from datetime import timedelta
-import fnmatch
+from fnmatch import translate
+from functools import lru_cache
 from ipaddress import ip_address as make_ip_address
 import logging
 import os
+import re
 import threading
 from typing import TYPE_CHECKING, Any, Final, cast

@ -204,12 +206,14 @@ class WatcherBase:

            if (
                matcher_mac := matcher.get(MAC_ADDRESS)
-            ) is not None and not fnmatch.fnmatch(uppercase_mac, matcher_mac):
+            ) is not None and not _memorized_fnmatch(uppercase_mac, matcher_mac):
                continue

            if (
                matcher_hostname := matcher.get(HOSTNAME)
-            ) is not None and not fnmatch.fnmatch(lowercase_hostname, matcher_hostname):
+            ) is not None and not _memorized_fnmatch(
+                lowercase_hostname, matcher_hostname
+            ):
                continue

            _LOGGER.debug("Matched %s against %s", data, matcher)
@ -514,3 +518,24 @@ def _verify_working_pcap(cap_filter: str) -> None:
    )

    compile_filter(cap_filter)
+
+
+@lru_cache(maxsize=4096, typed=True)
+def _compile_fnmatch(pattern: str) -> re.Pattern:
+    """Compile a fnmatch pattern."""
+    return re.compile(translate(pattern))
+
+
+@lru_cache(maxsize=1024, typed=True)
+def _memorized_fnmatch(name: str, pattern: str) -> bool:
+    """Memorized version of fnmatch that has a larger lru_cache.
+
+    The default version of fnmatch only has a lru_cache of 256 entries.
+    With many devices we quickly reach that limit and end up compiling
+    the same pattern over and over again.
+
+    DHCP has its own memorized fnmatch with its own lru_cache
+    since the data is going to be relatively the same
+    since the devices will not change frequently
+    """
+    return bool(_compile_fnmatch(pattern).match(name))
--- a/homeassistant/components/zeroconf/init.py
+++ b/homeassistant/components/zeroconf/init.py
@ -5,9 +5,11 @@ import asyncio
 import contextlib
 from contextlib import suppress
 from dataclasses import dataclass
-import fnmatch
+from fnmatch import translate
+from functools import lru_cache
 from ipaddress import IPv4Address, IPv6Address, ip_address
 import logging
+import re
 import socket
 import sys
 from typing import Any, Final, cast
@ -302,7 +304,8 @@ def _match_against_data(
            return False
        match_val = matcher[key]
        assert isinstance(match_val, str)
-        if not fnmatch.fnmatch(match_data[key], match_val):
+
+        if not _memorized_fnmatch(match_data[key], match_val):
            return False
    return True

@ -312,7 +315,7 @@ def _match_against_props(matcher: dict[str, str], props: dict[str, str]) -> bool
    return not any(
        key
        for key in matcher
-        if key not in props or not fnmatch.fnmatch(props[key].lower(), matcher[key])
+        if key not in props or not _memorized_fnmatch(props[key].lower(), matcher[key])
    )


@ -484,7 +487,7 @@ def async_get_homekit_discovery_domain(
        if (
            model != test_model
            and not model.startswith((f"{test_model} ", f"{test_model}-"))
-            and not fnmatch.fnmatch(model, test_model)
+            and not _memorized_fnmatch(model, test_model)
        ):
            continue

@ -575,3 +578,24 @@ def _truncate_location_name_to_valid(location_name: str) -> str:
        location_name,
    )
    return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore")
+
+
+@lru_cache(maxsize=4096, typed=True)
+def _compile_fnmatch(pattern: str) -> re.Pattern:
+    """Compile a fnmatch pattern."""
+    return re.compile(translate(pattern))
+
+
+@lru_cache(maxsize=1024, typed=True)
+def _memorized_fnmatch(name: str, pattern: str) -> bool:
+    """Memorized version of fnmatch that has a larger lru_cache.
+
+    The default version of fnmatch only has a lru_cache of 256 entries.
+    With many devices we quickly reach that limit and end up compiling
+    the same pattern over and over again.
+
+    Zeroconf has its own memorized fnmatch with its own lru_cache
+    since the data is going to be relatively the same
+    since the devices will not change frequently
+    """
+    return bool(_compile_fnmatch(pattern).match(name))