From c975146146ebd1d52b7e208cb78ba816536619d6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 23 Aug 2022 04:35:20 -1000 Subject: [PATCH] Reduce discovery integration matching overhead (#77194) --- homeassistant/components/bluetooth/match.py | 40 +++++++++++++++---- homeassistant/components/dhcp/__init__.py | 31 ++++++++++++-- homeassistant/components/zeroconf/__init__.py | 32 +++++++++++++-- 3 files changed, 89 insertions(+), 14 deletions(-) diff --git a/homeassistant/components/bluetooth/match.py b/homeassistant/components/bluetooth/match.py index 08b3716c50a..333ba020b74 100644 --- a/homeassistant/components/bluetooth/match.py +++ b/homeassistant/components/bluetooth/match.py @@ -2,7 +2,9 @@ from __future__ import annotations from dataclasses import dataclass -import fnmatch +from fnmatch import translate +from functools import lru_cache +import re from typing import TYPE_CHECKING, Final, TypedDict from lru import LRU # pylint: disable=no-name-in-module @@ -136,12 +138,6 @@ def ble_device_matches( return False advertisement_data = service_info.advertisement - if (local_name := matcher.get(LOCAL_NAME)) is not None and not fnmatch.fnmatch( - advertisement_data.local_name or device.name or device.address, - local_name, - ): - return False - if ( service_uuid := matcher.get(SERVICE_UUID) ) is not None and service_uuid not in advertisement_data.service_uuids: @@ -165,4 +161,34 @@ def ble_device_matches( ): return False + if (local_name := matcher.get(LOCAL_NAME)) is not None and ( + (device_name := advertisement_data.local_name or device.name) is None + or not _memorized_fnmatch( + device_name, + local_name, + ) + ): + return False + return True + + +@lru_cache(maxsize=4096, typed=True) +def _compile_fnmatch(pattern: str) -> re.Pattern: + """Compile a fnmatch pattern.""" + return re.compile(translate(pattern)) + + +@lru_cache(maxsize=1024, typed=True) +def _memorized_fnmatch(name: str, pattern: str) -> bool: + """Memorized version of fnmatch that has a larger lru_cache. + + The default version of fnmatch only has a lru_cache of 256 entries. + With many devices we quickly reach that limit and end up compiling + the same pattern over and over again. + + Bluetooth has its own memorized fnmatch with its own lru_cache + since the data is going to be relatively the same + since the devices will not change frequently. + """ + return bool(_compile_fnmatch(pattern).match(name)) diff --git a/homeassistant/components/dhcp/__init__.py b/homeassistant/components/dhcp/__init__.py index be9cbd7426d..7a5854fc53e 100644 --- a/homeassistant/components/dhcp/__init__.py +++ b/homeassistant/components/dhcp/__init__.py @@ -7,10 +7,12 @@ from collections.abc import Callable, Iterable import contextlib from dataclasses import dataclass from datetime import timedelta -import fnmatch +from fnmatch import translate +from functools import lru_cache from ipaddress import ip_address as make_ip_address import logging import os +import re import threading from typing import TYPE_CHECKING, Any, Final, cast @@ -204,12 +206,14 @@ class WatcherBase: if ( matcher_mac := matcher.get(MAC_ADDRESS) - ) is not None and not fnmatch.fnmatch(uppercase_mac, matcher_mac): + ) is not None and not _memorized_fnmatch(uppercase_mac, matcher_mac): continue if ( matcher_hostname := matcher.get(HOSTNAME) - ) is not None and not fnmatch.fnmatch(lowercase_hostname, matcher_hostname): + ) is not None and not _memorized_fnmatch( + lowercase_hostname, matcher_hostname + ): continue _LOGGER.debug("Matched %s against %s", data, matcher) @@ -514,3 +518,24 @@ def _verify_working_pcap(cap_filter: str) -> None: ) compile_filter(cap_filter) + + +@lru_cache(maxsize=4096, typed=True) +def _compile_fnmatch(pattern: str) -> re.Pattern: + """Compile a fnmatch pattern.""" + return re.compile(translate(pattern)) + + +@lru_cache(maxsize=1024, typed=True) +def _memorized_fnmatch(name: str, pattern: str) -> bool: + """Memorized version of fnmatch that has a larger lru_cache. + + The default version of fnmatch only has a lru_cache of 256 entries. + With many devices we quickly reach that limit and end up compiling + the same pattern over and over again. + + DHCP has its own memorized fnmatch with its own lru_cache + since the data is going to be relatively the same + since the devices will not change frequently + """ + return bool(_compile_fnmatch(pattern).match(name)) diff --git a/homeassistant/components/zeroconf/__init__.py b/homeassistant/components/zeroconf/__init__.py index 1bfa44f3894..476cbd82cf8 100644 --- a/homeassistant/components/zeroconf/__init__.py +++ b/homeassistant/components/zeroconf/__init__.py @@ -5,9 +5,11 @@ import asyncio import contextlib from contextlib import suppress from dataclasses import dataclass -import fnmatch +from fnmatch import translate +from functools import lru_cache from ipaddress import IPv4Address, IPv6Address, ip_address import logging +import re import socket import sys from typing import Any, Final, cast @@ -302,7 +304,8 @@ def _match_against_data( return False match_val = matcher[key] assert isinstance(match_val, str) - if not fnmatch.fnmatch(match_data[key], match_val): + + if not _memorized_fnmatch(match_data[key], match_val): return False return True @@ -312,7 +315,7 @@ def _match_against_props(matcher: dict[str, str], props: dict[str, str]) -> bool return not any( key for key in matcher - if key not in props or not fnmatch.fnmatch(props[key].lower(), matcher[key]) + if key not in props or not _memorized_fnmatch(props[key].lower(), matcher[key]) ) @@ -484,7 +487,7 @@ def async_get_homekit_discovery_domain( if ( model != test_model and not model.startswith((f"{test_model} ", f"{test_model}-")) - and not fnmatch.fnmatch(model, test_model) + and not _memorized_fnmatch(model, test_model) ): continue @@ -575,3 +578,24 @@ def _truncate_location_name_to_valid(location_name: str) -> str: location_name, ) return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore") + + +@lru_cache(maxsize=4096, typed=True) +def _compile_fnmatch(pattern: str) -> re.Pattern: + """Compile a fnmatch pattern.""" + return re.compile(translate(pattern)) + + +@lru_cache(maxsize=1024, typed=True) +def _memorized_fnmatch(name: str, pattern: str) -> bool: + """Memorized version of fnmatch that has a larger lru_cache. + + The default version of fnmatch only has a lru_cache of 256 entries. + With many devices we quickly reach that limit and end up compiling + the same pattern over and over again. + + Zeroconf has its own memorized fnmatch with its own lru_cache + since the data is going to be relatively the same + since the devices will not change frequently + """ + return bool(_compile_fnmatch(pattern).match(name))