Reduce discovery integration matching overhead (#77194)

This commit is contained in:
J. Nick Koston 2022-08-23 04:35:20 -10:00 committed by GitHub
parent bf5ab64b99
commit c975146146
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 14 deletions

View File

@ -2,7 +2,9 @@
from __future__ import annotations
from dataclasses import dataclass
import fnmatch
from fnmatch import translate
from functools import lru_cache
import re
from typing import TYPE_CHECKING, Final, TypedDict
from lru import LRU # pylint: disable=no-name-in-module
@ -136,12 +138,6 @@ def ble_device_matches(
return False
advertisement_data = service_info.advertisement
if (local_name := matcher.get(LOCAL_NAME)) is not None and not fnmatch.fnmatch(
advertisement_data.local_name or device.name or device.address,
local_name,
):
return False
if (
service_uuid := matcher.get(SERVICE_UUID)
) is not None and service_uuid not in advertisement_data.service_uuids:
@ -165,4 +161,34 @@ def ble_device_matches(
):
return False
if (local_name := matcher.get(LOCAL_NAME)) is not None and (
(device_name := advertisement_data.local_name or device.name) is None
or not _memorized_fnmatch(
device_name,
local_name,
)
):
return False
return True
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
Bluetooth has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently.
"""
return bool(_compile_fnmatch(pattern).match(name))

View File

@ -7,10 +7,12 @@ from collections.abc import Callable, Iterable
import contextlib
from dataclasses import dataclass
from datetime import timedelta
import fnmatch
from fnmatch import translate
from functools import lru_cache
from ipaddress import ip_address as make_ip_address
import logging
import os
import re
import threading
from typing import TYPE_CHECKING, Any, Final, cast
@ -204,12 +206,14 @@ class WatcherBase:
if (
matcher_mac := matcher.get(MAC_ADDRESS)
) is not None and not fnmatch.fnmatch(uppercase_mac, matcher_mac):
) is not None and not _memorized_fnmatch(uppercase_mac, matcher_mac):
continue
if (
matcher_hostname := matcher.get(HOSTNAME)
) is not None and not fnmatch.fnmatch(lowercase_hostname, matcher_hostname):
) is not None and not _memorized_fnmatch(
lowercase_hostname, matcher_hostname
):
continue
_LOGGER.debug("Matched %s against %s", data, matcher)
@ -514,3 +518,24 @@ def _verify_working_pcap(cap_filter: str) -> None:
)
compile_filter(cap_filter)
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
DHCP has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently
"""
return bool(_compile_fnmatch(pattern).match(name))

View File

@ -5,9 +5,11 @@ import asyncio
import contextlib
from contextlib import suppress
from dataclasses import dataclass
import fnmatch
from fnmatch import translate
from functools import lru_cache
from ipaddress import IPv4Address, IPv6Address, ip_address
import logging
import re
import socket
import sys
from typing import Any, Final, cast
@ -302,7 +304,8 @@ def _match_against_data(
return False
match_val = matcher[key]
assert isinstance(match_val, str)
if not fnmatch.fnmatch(match_data[key], match_val):
if not _memorized_fnmatch(match_data[key], match_val):
return False
return True
@ -312,7 +315,7 @@ def _match_against_props(matcher: dict[str, str], props: dict[str, str]) -> bool
return not any(
key
for key in matcher
if key not in props or not fnmatch.fnmatch(props[key].lower(), matcher[key])
if key not in props or not _memorized_fnmatch(props[key].lower(), matcher[key])
)
@ -484,7 +487,7 @@ def async_get_homekit_discovery_domain(
if (
model != test_model
and not model.startswith((f"{test_model} ", f"{test_model}-"))
and not fnmatch.fnmatch(model, test_model)
and not _memorized_fnmatch(model, test_model)
):
continue
@ -575,3 +578,24 @@ def _truncate_location_name_to_valid(location_name: str) -> str:
location_name,
)
return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore")
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
Zeroconf has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently
"""
return bool(_compile_fnmatch(pattern).match(name))