From 2a88cb9339e1dc5541da6373ba87bd7542b773f7 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 29 May 2025 11:42:28 +0200 Subject: [PATCH] Improve Supervisor startup error handling (#5918) Instead of starting a task in the background synchronously wait for Supervisor start sequence to complete. This should be functional equivalent, as we anyways would loop forever in the event loop just afterwards. The advantage is that we now can catch any exceptions during the start sequence and report any errors with critical logging to report those to Sentry, if enabled. It also avoids "Task exception was never retrieved" errors. Reporting errors is especially important since we can't use the asyncio Sentry integration (see #5729 for details). Also handle early add-on start errors just like other add-on start errors (make sure the finally block is executed as well). And finally, register signal handlers synchronously. There is no real benefit in doing them asynchronously, and it avoids a potential race condition. --- supervisor/__main__.py | 11 +++++++++-- supervisor/bootstrap.py | 5 +++-- supervisor/core.py | 11 +++++++---- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/supervisor/__main__.py b/supervisor/__main__.py index 6e2417690..732ca4bd0 100644 --- a/supervisor/__main__.py +++ b/supervisor/__main__.py @@ -66,8 +66,15 @@ if __name__ == "__main__": _LOGGER.info("Setting up Supervisor") loop.run_until_complete(coresys.core.setup()) - loop.call_soon_threadsafe(loop.create_task, coresys.core.start()) - loop.call_soon_threadsafe(bootstrap.reg_signal, loop, coresys) + bootstrap.register_signal_handlers(loop, coresys) + + try: + loop.run_until_complete(coresys.core.start()) + except Exception as err: # pylint: disable=broad-except + # Supervisor itself is running at this point, just something didn't + # start as expected. Log with traceback to get more insights for + # such cases. + _LOGGER.critical("Supervisor start failed: %s", err, exc_info=True) try: _LOGGER.info("Running Supervisor") diff --git a/supervisor/bootstrap.py b/supervisor/bootstrap.py index 344897857..c198ba779 100644 --- a/supervisor/bootstrap.py +++ b/supervisor/bootstrap.py @@ -1,6 +1,7 @@ """Bootstrap Supervisor.""" # ruff: noqa: T100 +import asyncio from importlib import import_module import logging import os @@ -284,8 +285,8 @@ def check_environment() -> None: _LOGGER.critical("Can't find Docker socket!") -def reg_signal(loop, coresys: CoreSys) -> None: - """Register SIGTERM and SIGKILL to stop system.""" +def register_signal_handlers(loop: asyncio.BaseEventLoop, coresys: CoreSys) -> None: + """Register SIGTERM, SIGHUP and SIGKILL to stop the Supervisor.""" try: loop.add_signal_handler( signal.SIGTERM, lambda: loop.create_task(coresys.core.stop()) diff --git a/supervisor/core.py b/supervisor/core.py index 423457ab4..967ba6f5e 100644 --- a/supervisor/core.py +++ b/supervisor/core.py @@ -188,7 +188,10 @@ class Core(CoreSysAttributes): await setup_task except Exception as err: # pylint: disable=broad-except _LOGGER.critical( - "Fatal error happening on load Task %s: %s", setup_task, err + "Fatal error happening on load Task %s: %s", + setup_task, + err, + exc_info=True, ) self.sys_resolution.add_unhealthy_reason(UnhealthyReason.SETUP) await async_capture_exception(err) @@ -237,10 +240,10 @@ class Core(CoreSysAttributes): await self.sys_supervisor.update() return - # Start addon mark as initialize - await self.sys_addons.boot(AddonStartup.INITIALIZE) - try: + # Start addon mark as initialize + await self.sys_addons.boot(AddonStartup.INITIALIZE) + # HomeAssistant is already running, only Supervisor restarted if await self.sys_hardware.helper.last_boot() == self.sys_config.last_boot: _LOGGER.info("Detected Supervisor restart")