From 953f7d01d79b43ece1706f642613fb08d85cce43 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 9 Jul 2025 11:35:03 +0200 Subject: [PATCH] Improve DNS plug-in restart (#5999) * Improve DNS plug-in restart Instead of simply go by PrimaryConnectioon change, use the DnsManager Configuration property. This property is ultimately used to write the DNS plug-in configuration, so it is really the relevant information we pass on to the plug-in. * Check for changes and restart DNS plugin * Check for changes in plug-in DNS Cache last local (NetworkManager) provided DNS servers. Check against this DNS server list when deciding when to restart the DNS plug-in. * Check connectivity unthrottled in certain situations * Fix pytest * Fix pytest * Improve test coverage for DNS plugins restart functionality * Apply suggestions from code review Co-authored-by: Mike Degatano * Debounce local DNS changes and event based connectivity checks * Remove connection check logic * Remove unthrottled connectivity check * Fix delayed call * Store restart task and cancel in case a restart is running * Improve DNS configuration change tests * Remove stale code * Improve DNS plug-in tests, less mocking * Cover multiple private functions at once Improve tests around notify_locals_changed() to cover multiple functions at once. --------- Co-authored-by: Mike Degatano --- supervisor/host/network.py | 34 ++++--- supervisor/plugins/dns.py | 58 +++++++++++ supervisor/supervisor.py | 8 +- tests/conftest.py | 9 ++ tests/host/test_connectivity.py | 59 +++++++---- tests/plugins/test_dns.py | 167 ++++++++++++++++++++++++++++++++ 6 files changed, 303 insertions(+), 32 deletions(-) diff --git a/supervisor/host/network.py b/supervisor/host/network.py index ad9c6a47b..81bc94168 100644 --- a/supervisor/host/network.py +++ b/supervisor/host/network.py @@ -8,11 +8,11 @@ from typing import Any from ..const import ATTR_HOST_INTERNET from ..coresys import CoreSys, CoreSysAttributes from ..dbus.const import ( + DBUS_ATTR_CONFIGURATION, DBUS_ATTR_CONNECTION_ENABLED, DBUS_ATTR_CONNECTIVITY, - DBUS_ATTR_PRIMARY_CONNECTION, + DBUS_IFACE_DNS, DBUS_IFACE_NM, - DBUS_OBJECT_BASE, DBUS_SIGNAL_NM_CONNECTION_ACTIVE_CHANGED, ConnectionStateType, ConnectivityState, @@ -46,6 +46,8 @@ class NetworkManager(CoreSysAttributes): """Initialize system center handling.""" self.coresys: CoreSys = coresys self._connectivity: bool | None = None + # No event need on initial change (NetworkManager initializes with empty list) + self._dns_configuration: list = [] @property def connectivity(self) -> bool | None: @@ -142,6 +144,10 @@ class NetworkManager(CoreSysAttributes): "properties_changed", self._check_connectivity_changed ) + self.sys_dbus.network.dns.dbus.properties.on( + "properties_changed", self._check_dns_changed + ) + async def _check_connectivity_changed( self, interface: str, changed: dict[str, Any], invalidated: list[str] ): @@ -152,16 +158,6 @@ class NetworkManager(CoreSysAttributes): connectivity_check: bool | None = changed.get(DBUS_ATTR_CONNECTION_ENABLED) connectivity: int | None = changed.get(DBUS_ATTR_CONNECTIVITY) - # This potentially updated the DNS configuration. Make sure the DNS plug-in - # picks up the latest settings. - if ( - DBUS_ATTR_PRIMARY_CONNECTION in changed - and changed[DBUS_ATTR_PRIMARY_CONNECTION] - and changed[DBUS_ATTR_PRIMARY_CONNECTION] != DBUS_OBJECT_BASE - and await self.sys_plugins.dns.is_running() - ): - await self.sys_plugins.dns.restart() - if ( connectivity_check is True or DBUS_ATTR_CONNECTION_ENABLED in invalidated @@ -175,6 +171,20 @@ class NetworkManager(CoreSysAttributes): elif connectivity is not None: self.connectivity = connectivity == ConnectivityState.CONNECTIVITY_FULL + async def _check_dns_changed( + self, interface: str, changed: dict[str, Any], invalidated: list[str] + ): + """Check if DNS properties have changed.""" + if interface != DBUS_IFACE_DNS: + return + + if ( + DBUS_ATTR_CONFIGURATION in changed + and self._dns_configuration != changed[DBUS_ATTR_CONFIGURATION] + ): + self._dns_configuration = changed[DBUS_ATTR_CONFIGURATION] + self.sys_plugins.dns.notify_locals_changed() + async def update(self, *, force_connectivity_check: bool = False): """Update properties over dbus.""" _LOGGER.info("Updating local network information") diff --git a/supervisor/plugins/dns.py b/supervisor/plugins/dns.py index a37c5024b..f29e7bf58 100644 --- a/supervisor/plugins/dns.py +++ b/supervisor/plugins/dns.py @@ -77,6 +77,11 @@ class PluginDns(PluginBase): self._hosts: list[HostEntry] = [] self._loop: bool = False + self._cached_locals: list[str] | None = None + + # Debouncing system for rapid local changes + self._locals_changed_handle: asyncio.TimerHandle | None = None + self._restart_after_locals_change_handle: asyncio.Task | None = None @property def hosts(self) -> Path: @@ -91,6 +96,12 @@ class PluginDns(PluginBase): @property def locals(self) -> list[str]: """Return list of local system DNS servers.""" + if self._cached_locals is None: + self._cached_locals = self._compute_locals() + return self._cached_locals + + def _compute_locals(self) -> list[str]: + """Compute list of local system DNS servers.""" servers: list[str] = [] for server in [ f"dns://{server!s}" for server in self.sys_host.network.dns_servers @@ -100,6 +111,43 @@ class PluginDns(PluginBase): return servers + async def _restart_dns_after_locals_change(self) -> None: + """Restart DNS after a debounced delay for local changes.""" + old_locals = self._cached_locals + new_locals = self._compute_locals() + if old_locals == new_locals: + return + + _LOGGER.debug("DNS locals changed from %s to %s", old_locals, new_locals) + self._cached_locals = new_locals + if not await self.instance.is_running(): + return + + await self.restart() + self._restart_after_locals_change_handle = None + + def _trigger_restart_dns_after_locals_change(self) -> None: + """Trigger a restart of DNS after local changes.""" + # Cancel existing restart task if any + if self._restart_after_locals_change_handle: + self._restart_after_locals_change_handle.cancel() + + self._restart_after_locals_change_handle = self.sys_create_task( + self._restart_dns_after_locals_change() + ) + self._locals_changed_handle = None + + def notify_locals_changed(self) -> None: + """Schedule a debounced DNS restart for local changes.""" + # Cancel existing timer if any + if self._locals_changed_handle: + self._locals_changed_handle.cancel() + + # Schedule new timer with 1 second delay + self._locals_changed_handle = self.sys_call_later( + 1.0, self._trigger_restart_dns_after_locals_change + ) + @property def servers(self) -> list[str]: """Return list of DNS servers.""" @@ -243,6 +291,16 @@ class PluginDns(PluginBase): async def stop(self) -> None: """Stop CoreDNS.""" + # Cancel any pending locals change timer + if self._locals_changed_handle: + self._locals_changed_handle.cancel() + self._locals_changed_handle = None + + # Wait for any pending restart before stopping + if self._restart_after_locals_change_handle: + self._restart_after_locals_change_handle.cancel() + self._restart_after_locals_change_handle = None + _LOGGER.info("Stopping CoreDNS plugin") try: await self.instance.stop() diff --git a/supervisor/supervisor.py b/supervisor/supervisor.py index e1d7b2f4b..362cbe738 100644 --- a/supervisor/supervisor.py +++ b/supervisor/supervisor.py @@ -291,14 +291,16 @@ class Supervisor(CoreSysAttributes): limit=JobExecutionLimit.THROTTLE, throttle_period=_check_connectivity_throttle_period, ) - async def check_connectivity(self): - """Check the connection.""" + async def check_connectivity(self) -> None: + """Check the Internet connectivity from Supervisor's point of view.""" timeout = aiohttp.ClientTimeout(total=10) try: await self.sys_websession.head( "https://checkonline.home-assistant.io/online.txt", timeout=timeout ) - except (ClientError, TimeoutError): + except (ClientError, TimeoutError) as err: + _LOGGER.debug("Supervisor Connectivity check failed: %s", err) self.connectivity = False else: + _LOGGER.debug("Supervisor Connectivity check succeeded") self.connectivity = True diff --git a/tests/conftest.py b/tests/conftest.py index bcbd5f27d..f6c8d8815 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -66,6 +66,7 @@ from .dbus_service_mocks.base import DBusServiceMock from .dbus_service_mocks.network_connection_settings import ( ConnectionSettings as ConnectionSettingsService, ) +from .dbus_service_mocks.network_dns_manager import DnsManager as DnsManagerService from .dbus_service_mocks.network_manager import NetworkManager as NetworkManagerService # pylint: disable=redefined-outer-name, protected-access @@ -220,6 +221,14 @@ async def network_manager_service( yield network_manager_services["network_manager"] +@pytest.fixture +async def dns_manager_service( + network_manager_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], +) -> AsyncGenerator[DnsManagerService]: + """Return DNS Manager service mock.""" + yield network_manager_services["network_dns_manager"] + + @pytest.fixture(name="connection_settings_service") async def fixture_connection_settings_service( network_manager_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], diff --git a/tests/host/test_connectivity.py b/tests/host/test_connectivity.py index a68a5ab5c..ad186f2f1 100644 --- a/tests/host/test_connectivity.py +++ b/tests/host/test_connectivity.py @@ -2,8 +2,9 @@ # pylint: disable=protected-access import asyncio -from unittest.mock import AsyncMock, PropertyMock, patch +from unittest.mock import PropertyMock, patch +from dbus_fast import Variant import pytest from supervisor.coresys import CoreSys @@ -87,23 +88,47 @@ async def test_connectivity_events(coresys: CoreSys, force: bool): ) -async def test_dns_restart_on_connection_change( - coresys: CoreSys, network_manager_service: NetworkManagerService +async def test_dns_configuration_change_triggers_notify_locals_changed( + coresys: CoreSys, dns_manager_service ): - """Test dns plugin is restarted when primary connection changes.""" + """Test that DNS configuration changes trigger notify_locals_changed.""" await coresys.host.network.load() - with ( - patch.object(PluginDns, "restart") as restart, - patch.object( - PluginDns, "is_running", new_callable=AsyncMock, return_value=True - ), - ): - network_manager_service.emit_properties_changed({"PrimaryConnection": "/"}) - await network_manager_service.ping() - restart.assert_not_called() - network_manager_service.emit_properties_changed( - {"PrimaryConnection": "/org/freedesktop/NetworkManager/ActiveConnection/2"} + with patch.object(PluginDns, "notify_locals_changed") as notify_locals_changed: + # Test that non-Configuration changes don't trigger notify_locals_changed + dns_manager_service.emit_properties_changed({"Mode": "default"}) + await dns_manager_service.ping() + notify_locals_changed.assert_not_called() + + # Test that Configuration changes trigger notify_locals_changed + configuration = [ + { + "nameservers": Variant("as", ["192.168.2.2"]), + "domains": Variant("as", ["lan"]), + "interface": Variant("s", "eth0"), + "priority": Variant("i", 100), + "vpn": Variant("b", False), + } + ] + + dns_manager_service.emit_properties_changed({"Configuration": configuration}) + await dns_manager_service.ping() + notify_locals_changed.assert_called_once() + + notify_locals_changed.reset_mock() + # Test that subsequent Configuration changes also trigger notify_locals_changed + different_configuration = [ + { + "nameservers": Variant("as", ["8.8.8.8"]), + "domains": Variant("as", ["example.com"]), + "interface": Variant("s", "wlan0"), + "priority": Variant("i", 200), + "vpn": Variant("b", True), + } + ] + + dns_manager_service.emit_properties_changed( + {"Configuration": different_configuration} ) - await network_manager_service.ping() - restart.assert_called_once() + await dns_manager_service.ping() + notify_locals_changed.assert_called_once() diff --git a/tests/plugins/test_dns.py b/tests/plugins/test_dns.py index 665e58626..8787af342 100644 --- a/tests/plugins/test_dns.py +++ b/tests/plugins/test_dns.py @@ -35,6 +35,17 @@ async def fixture_write_json() -> Mock: yield write_json_file +@pytest.fixture(name="mock_call_later") +def fixture_mock_call_later(coresys: CoreSys): + """Mock sys_call_later with zero delay for testing.""" + + def mock_call_later(_delay, *args, **kwargs) -> asyncio.TimerHandle: + """Mock to remove delay.""" + return coresys.call_later(0, *args, **kwargs) + + return mock_call_later + + async def test_config_write( coresys: CoreSys, docker_interface: tuple[AsyncMock, AsyncMock], @@ -98,6 +109,7 @@ async def test_reset(coresys: CoreSys): unlink.assert_called_once() write_hosts.assert_called_once() + # Verify the hosts data structure is properly initialized # pylint: disable=protected-access assert coresys.plugins.dns._hosts == [ HostEntry( @@ -239,3 +251,158 @@ async def test_load_error_writing_resolv( assert "Can't write/update /etc/resolv.conf" in caplog.text assert coresys.core.healthy is False + + +async def test_notify_locals_changed_end_to_end_with_changes_and_running( + coresys: CoreSys, mock_call_later +): + """Test notify_locals_changed end-to-end: local DNS changes detected and plugin restarted.""" + dns_plugin = coresys.plugins.dns + + # Set cached locals to something different from current network state + current_locals = dns_plugin._compute_locals() + dns_plugin._cached_locals = ( + ["dns://192.168.1.1"] + if current_locals != ["dns://192.168.1.1"] + else ["dns://192.168.1.2"] + ) + + with ( + patch.object(dns_plugin, "restart") as mock_restart, + patch.object(dns_plugin.instance, "is_running", return_value=True), + patch.object(dns_plugin, "sys_call_later", new=mock_call_later), + ): + # Call notify_locals_changed + dns_plugin.notify_locals_changed() + + # Wait for the async task to complete + await asyncio.sleep(0.1) + + # Verify restart was called and cached locals were updated + mock_restart.assert_called_once() + assert dns_plugin._cached_locals == current_locals + + +async def test_notify_locals_changed_end_to_end_with_changes_but_not_running( + coresys: CoreSys, mock_call_later +): + """Test notify_locals_changed end-to-end: local DNS changes detected but plugin not running.""" + dns_plugin = coresys.plugins.dns + + # Set cached locals to something different from current network state + current_locals = dns_plugin._compute_locals() + dns_plugin._cached_locals = ( + ["dns://192.168.1.1"] + if current_locals != ["dns://192.168.1.1"] + else ["dns://192.168.1.2"] + ) + + with ( + patch.object(dns_plugin, "restart") as mock_restart, + patch.object(dns_plugin.instance, "is_running", return_value=False), + patch.object(dns_plugin, "sys_call_later", new=mock_call_later), + ): + # Call notify_locals_changed + dns_plugin.notify_locals_changed() + + # Wait for the async task to complete + await asyncio.sleep(0.1) + + # Verify restart was NOT called but cached locals were still updated + mock_restart.assert_not_called() + assert dns_plugin._cached_locals == current_locals + + +async def test_notify_locals_changed_end_to_end_no_changes( + coresys: CoreSys, mock_call_later +): + """Test notify_locals_changed end-to-end: no local DNS changes detected.""" + dns_plugin = coresys.plugins.dns + + # Set cached locals to match current network state + current_locals = dns_plugin._compute_locals() + dns_plugin._cached_locals = current_locals + + with ( + patch.object(dns_plugin, "restart") as mock_restart, + patch.object(dns_plugin, "sys_call_later", new=mock_call_later), + ): + # Call notify_locals_changed + dns_plugin.notify_locals_changed() + + # Wait for the async task to complete + await asyncio.sleep(0.1) + + # Verify restart was NOT called since no changes + mock_restart.assert_not_called() + assert dns_plugin._cached_locals == current_locals + + +async def test_notify_locals_changed_debouncing_cancels_previous_timer( + coresys: CoreSys, +): + """Test notify_locals_changed debouncing cancels previous timer before creating new one.""" + dns_plugin = coresys.plugins.dns + + # Set cached locals to trigger change detection + current_locals = dns_plugin._compute_locals() + dns_plugin._cached_locals = ( + ["dns://192.168.1.1"] + if current_locals != ["dns://192.168.1.1"] + else ["dns://192.168.1.2"] + ) + + call_count = 0 + handles = [] + + def mock_call_later_with_tracking(_delay, *args, **kwargs) -> asyncio.TimerHandle: + """Mock to remove delay and track calls.""" + nonlocal call_count + call_count += 1 + handle = coresys.call_later(0, *args, **kwargs) + handles.append(handle) + return handle + + with ( + patch.object(dns_plugin, "restart") as mock_restart, + patch.object(dns_plugin.instance, "is_running", return_value=True), + patch.object(dns_plugin, "sys_call_later", new=mock_call_later_with_tracking), + ): + # First call sets up timer + dns_plugin.notify_locals_changed() + assert call_count == 1 + first_handle = dns_plugin._locals_changed_handle + assert first_handle is not None + + # Second call should cancel first timer and create new one + dns_plugin.notify_locals_changed() + assert call_count == 2 + second_handle = dns_plugin._locals_changed_handle + assert second_handle is not None + assert first_handle != second_handle + + # Wait for the async task to complete + await asyncio.sleep(0.1) + + # Verify restart was called once for the final timer + mock_restart.assert_called_once() + assert dns_plugin._cached_locals == current_locals + + +async def test_stop_cancels_pending_timers_and_tasks(coresys: CoreSys): + """Test stop cancels pending locals change timers and restart tasks to prevent resource leaks.""" + dns_plugin = coresys.plugins.dns + + mock_timer_handle = Mock() + mock_task_handle = Mock() + dns_plugin._locals_changed_handle = mock_timer_handle + dns_plugin._restart_after_locals_change_handle = mock_task_handle + + with patch.object(dns_plugin.instance, "stop"): + await dns_plugin.stop() + + # Should cancel pending timer and task, then clean up + mock_timer_handle.cancel.assert_called_once() + mock_task_handle.cancel.assert_called_once() + assert dns_plugin._locals_changed_handle is None + assert dns_plugin._restart_after_locals_change_handle is None