Reduce discovery integration matching overhead (#77194)

This commit is contained in:
J. Nick Koston 2022-08-23 04:35:20 -10:00 committed by GitHub
parent bf5ab64b99
commit c975146146
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 14 deletions

View File

@ -2,7 +2,9 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass from dataclasses import dataclass
import fnmatch from fnmatch import translate
from functools import lru_cache
import re
from typing import TYPE_CHECKING, Final, TypedDict from typing import TYPE_CHECKING, Final, TypedDict
from lru import LRU # pylint: disable=no-name-in-module from lru import LRU # pylint: disable=no-name-in-module
@ -136,12 +138,6 @@ def ble_device_matches(
return False return False
advertisement_data = service_info.advertisement advertisement_data = service_info.advertisement
if (local_name := matcher.get(LOCAL_NAME)) is not None and not fnmatch.fnmatch(
advertisement_data.local_name or device.name or device.address,
local_name,
):
return False
if ( if (
service_uuid := matcher.get(SERVICE_UUID) service_uuid := matcher.get(SERVICE_UUID)
) is not None and service_uuid not in advertisement_data.service_uuids: ) is not None and service_uuid not in advertisement_data.service_uuids:
@ -165,4 +161,34 @@ def ble_device_matches(
): ):
return False return False
if (local_name := matcher.get(LOCAL_NAME)) is not None and (
(device_name := advertisement_data.local_name or device.name) is None
or not _memorized_fnmatch(
device_name,
local_name,
)
):
return False
return True return True
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
Bluetooth has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently.
"""
return bool(_compile_fnmatch(pattern).match(name))

View File

@ -7,10 +7,12 @@ from collections.abc import Callable, Iterable
import contextlib import contextlib
from dataclasses import dataclass from dataclasses import dataclass
from datetime import timedelta from datetime import timedelta
import fnmatch from fnmatch import translate
from functools import lru_cache
from ipaddress import ip_address as make_ip_address from ipaddress import ip_address as make_ip_address
import logging import logging
import os import os
import re
import threading import threading
from typing import TYPE_CHECKING, Any, Final, cast from typing import TYPE_CHECKING, Any, Final, cast
@ -204,12 +206,14 @@ class WatcherBase:
if ( if (
matcher_mac := matcher.get(MAC_ADDRESS) matcher_mac := matcher.get(MAC_ADDRESS)
) is not None and not fnmatch.fnmatch(uppercase_mac, matcher_mac): ) is not None and not _memorized_fnmatch(uppercase_mac, matcher_mac):
continue continue
if ( if (
matcher_hostname := matcher.get(HOSTNAME) matcher_hostname := matcher.get(HOSTNAME)
) is not None and not fnmatch.fnmatch(lowercase_hostname, matcher_hostname): ) is not None and not _memorized_fnmatch(
lowercase_hostname, matcher_hostname
):
continue continue
_LOGGER.debug("Matched %s against %s", data, matcher) _LOGGER.debug("Matched %s against %s", data, matcher)
@ -514,3 +518,24 @@ def _verify_working_pcap(cap_filter: str) -> None:
) )
compile_filter(cap_filter) compile_filter(cap_filter)
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
DHCP has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently
"""
return bool(_compile_fnmatch(pattern).match(name))

View File

@ -5,9 +5,11 @@ import asyncio
import contextlib import contextlib
from contextlib import suppress from contextlib import suppress
from dataclasses import dataclass from dataclasses import dataclass
import fnmatch from fnmatch import translate
from functools import lru_cache
from ipaddress import IPv4Address, IPv6Address, ip_address from ipaddress import IPv4Address, IPv6Address, ip_address
import logging import logging
import re
import socket import socket
import sys import sys
from typing import Any, Final, cast from typing import Any, Final, cast
@ -302,7 +304,8 @@ def _match_against_data(
return False return False
match_val = matcher[key] match_val = matcher[key]
assert isinstance(match_val, str) assert isinstance(match_val, str)
if not fnmatch.fnmatch(match_data[key], match_val):
if not _memorized_fnmatch(match_data[key], match_val):
return False return False
return True return True
@ -312,7 +315,7 @@ def _match_against_props(matcher: dict[str, str], props: dict[str, str]) -> bool
return not any( return not any(
key key
for key in matcher for key in matcher
if key not in props or not fnmatch.fnmatch(props[key].lower(), matcher[key]) if key not in props or not _memorized_fnmatch(props[key].lower(), matcher[key])
) )
@ -484,7 +487,7 @@ def async_get_homekit_discovery_domain(
if ( if (
model != test_model model != test_model
and not model.startswith((f"{test_model} ", f"{test_model}-")) and not model.startswith((f"{test_model} ", f"{test_model}-"))
and not fnmatch.fnmatch(model, test_model) and not _memorized_fnmatch(model, test_model)
): ):
continue continue
@ -575,3 +578,24 @@ def _truncate_location_name_to_valid(location_name: str) -> str:
location_name, location_name,
) )
return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore") return location_name.encode("utf-8")[:MAX_NAME_LEN].decode("utf-8", "ignore")
@lru_cache(maxsize=4096, typed=True)
def _compile_fnmatch(pattern: str) -> re.Pattern:
"""Compile a fnmatch pattern."""
return re.compile(translate(pattern))
@lru_cache(maxsize=1024, typed=True)
def _memorized_fnmatch(name: str, pattern: str) -> bool:
"""Memorized version of fnmatch that has a larger lru_cache.
The default version of fnmatch only has a lru_cache of 256 entries.
With many devices we quickly reach that limit and end up compiling
the same pattern over and over again.
Zeroconf has its own memorized fnmatch with its own lru_cache
since the data is going to be relatively the same
since the devices will not change frequently
"""
return bool(_compile_fnmatch(pattern).match(name))