From 59daceafd2c70256e01382542cd2bed70be469df Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 21 Sep 2023 09:48:41 +0200 Subject: [PATCH] Avoid calling extract_stack in system_log since it does blocking I/O (#100455) --- .../components/system_log/__init__.py | 75 +++++++++++++------ homeassistant/components/zha/core/gateway.py | 18 ++--- tests/components/system_log/test_init.py | 56 ++++++++++---- 3 files changed, 103 insertions(+), 46 deletions(-) diff --git a/homeassistant/components/system_log/__init__.py b/homeassistant/components/system_log/__init__.py index ab271ec676c..fab2b7ee291 100644 --- a/homeassistant/components/system_log/__init__.py +++ b/homeassistant/components/system_log/__init__.py @@ -4,6 +4,7 @@ from __future__ import annotations from collections import OrderedDict, deque import logging import re +import sys import traceback from typing import Any, cast @@ -59,31 +60,65 @@ SERVICE_WRITE_SCHEMA = vol.Schema( def _figure_out_source( - record: logging.LogRecord, call_stack: list[tuple[str, int]], paths_re: re.Pattern + record: logging.LogRecord, paths_re: re.Pattern ) -> tuple[str, int]: + """Figure out where a log message came from.""" # If a stack trace exists, extract file names from the entire call stack. # The other case is when a regular "log" is made (without an attached # exception). In that case, just use the file where the log was made from. if record.exc_info: stack = [(x[0], x[1]) for x in traceback.extract_tb(record.exc_info[2])] - else: - index = -1 - for i, frame in enumerate(call_stack): - if frame[0] == record.pathname: - index = i + for i, (filename, _) in enumerate(stack): + # Slice the stack to the first frame that matches + # the record pathname. + if filename == record.pathname: + stack = stack[0 : i + 1] break - if index == -1: - # For some reason we couldn't find pathname in the stack. - stack = [(record.pathname, record.lineno)] - else: - stack = call_stack[0 : index + 1] + # Iterate through the stack call (in reverse) and find the last call from + # a file in Home Assistant. Try to figure out where error happened. + for path, line_number in reversed(stack): + # Try to match with a file within Home Assistant + if match := paths_re.match(path): + return (cast(str, match.group(1)), line_number) + else: + # + # We need to figure out where the log call came from if we + # don't have an exception. + # + # We do this by walking up the stack until we find the first + # frame match the record pathname so the code below + # can be used to reverse the remaining stack frames + # and find the first one that is from a file within Home Assistant. + # + # We do not call traceback.extract_stack() because it is + # it makes many stat() syscalls calls which do blocking I/O, + # and since this code is running in the event loop, we need to avoid + # blocking I/O. + + frame = sys._getframe(4) # pylint: disable=protected-access + # + # We use _getframe with 4 to skip the following frames: + # + # Jump 2 frames up to get to the actual caller + # since we are in a function, and always called from another function + # that are never the original source of the log message. + # + # Next try to skip any frames that are from the logging module + # We know that the logger module typically has 5 frames itself + # but it may change in the future so we are conservative and + # only skip 2. + # + # _getframe is cpython only but we are already using cpython specific + # code everywhere in HA so it's fine as its unlikely we will ever + # support other python implementations. + # + # Iterate through the stack call (in reverse) and find the last call from + # a file in Home Assistant. Try to figure out where error happened. + while back := frame.f_back: + if match := paths_re.match(frame.f_code.co_filename): + return (cast(str, match.group(1)), frame.f_lineno) + frame = back - # Iterate through the stack call (in reverse) and find the last call from - # a file in Home Assistant. Try to figure out where error happened. - for pathname in reversed(stack): - # Try to match with a file within Home Assistant - if match := paths_re.match(pathname[0]): - return (cast(str, match.group(1)), pathname[1]) # Ok, we don't know what this is return (record.pathname, record.lineno) @@ -217,11 +252,7 @@ class LogErrorHandler(logging.Handler): default upper limit is set to 50 (older entries are discarded) but can be changed if needed. """ - stack = [] - if not record.exc_info: - stack = [(f[0], f[1]) for f in traceback.extract_stack()] - - entry = LogEntry(record, _figure_out_source(record, stack, self.paths_re)) + entry = LogEntry(record, _figure_out_source(record, self.paths_re)) self.records.add_entry(entry) if self.fire_event: self.hass.bus.fire(EVENT_SYSTEM_LOG, entry.to_dict()) diff --git a/homeassistant/components/zha/core/gateway.py b/homeassistant/components/zha/core/gateway.py index 5fe84005d7a..c5d04dda961 100644 --- a/homeassistant/components/zha/core/gateway.py +++ b/homeassistant/components/zha/core/gateway.py @@ -10,7 +10,6 @@ import itertools import logging import re import time -import traceback from typing import TYPE_CHECKING, Any, NamedTuple from zigpy.application import ControllerApplication @@ -814,21 +813,20 @@ class LogRelayHandler(logging.Handler): super().__init__() self.hass = hass self.gateway = gateway - - def emit(self, record: LogRecord) -> None: - """Relay log message via dispatcher.""" - stack = [] - if record.levelno >= logging.WARN and not record.exc_info: - stack = [f for f, _, _, _ in traceback.extract_stack()] - hass_path: str = HOMEASSISTANT_PATH[0] config_dir = self.hass.config.config_dir - paths_re = re.compile( + self.paths_re = re.compile( r"(?:{})/(.*)".format( "|".join([re.escape(x) for x in (hass_path, config_dir)]) ) ) - entry = LogEntry(record, _figure_out_source(record, stack, paths_re)) + + def emit(self, record: LogRecord) -> None: + """Relay log message via dispatcher.""" + if record.levelno >= logging.WARN: + entry = LogEntry(record, _figure_out_source(record, self.paths_re)) + else: + entry = LogEntry(record, (record.pathname, record.lineno)) async_dispatcher_send( self.hass, ZHA_GW_MSG, diff --git a/tests/components/system_log/test_init.py b/tests/components/system_log/test_init.py index bd861ac7668..1357d9e5e9e 100644 --- a/tests/components/system_log/test_init.py +++ b/tests/components/system_log/test_init.py @@ -4,6 +4,7 @@ from __future__ import annotations import asyncio from collections.abc import Awaitable import logging +import re import traceback from typing import Any from unittest.mock import MagicMock, patch @@ -87,11 +88,6 @@ class WatchLogErrorHandler(system_log.LogErrorHandler): self.watch_event.set() -def get_frame(name): - """Get log stack frame.""" - return (name, 5, None, None) - - async def async_setup_system_log(hass, config) -> WatchLogErrorHandler: """Set up the system_log component.""" WatchLogErrorHandler.instances = [] @@ -362,21 +358,28 @@ async def test_unknown_path( assert log["source"] == ["unknown_path", 0] +def get_frame(path: str, previous_frame: MagicMock | None) -> MagicMock: + """Get log stack frame.""" + return MagicMock( + f_back=previous_frame, + f_code=MagicMock(co_filename=path), + f_lineno=5, + ) + + async def async_log_error_from_test_path(hass, path, watcher): """Log error while mocking the path.""" call_path = "internal_path.py" + main_frame = get_frame("main_path/main.py", None) + path_frame = get_frame(path, main_frame) + call_path_frame = get_frame(call_path, path_frame) + logger_frame = get_frame("venv_path/logging/log.py", call_path_frame) + with patch.object( _LOGGER, "findCaller", MagicMock(return_value=(call_path, 0, None, None)) ), patch( - "traceback.extract_stack", - MagicMock( - return_value=[ - get_frame("main_path/main.py"), - get_frame(path), - get_frame(call_path), - get_frame("venv_path/logging/log.py"), - ] - ), + "homeassistant.components.system_log.sys._getframe", + return_value=logger_frame, ): wait_empty = watcher.add_watcher("error message") _LOGGER.error("error message") @@ -441,3 +444,28 @@ async def test_raise_during_log_capture( log = find_log(await get_error_log(hass_ws_client), "ERROR") assert log is not None assert_log(log, "", "Bad logger message: repr error", "ERROR") + + +async def test__figure_out_source(hass: HomeAssistant) -> None: + """Test that source is figured out correctly. + + We have to test this directly for exception tracebacks since + we cannot generate a trackback from a Home Assistant component + in a test because the test is not a component. + """ + try: + raise ValueError("test") + except ValueError as ex: + exc_info = (type(ex), ex, ex.__traceback__) + mock_record = MagicMock( + pathname="should not hit", + lineno=5, + exc_info=exc_info, + ) + regex_str = f"({__file__})" + file, line_no = system_log._figure_out_source( + mock_record, + re.compile(regex_str), + ) + assert file == __file__ + assert line_no != 5