From f3e2ccce43b00d99f69faa6e7e7058b9fc120624 Mon Sep 17 00:00:00 2001 From: Mike Degatano Date: Thu, 21 Apr 2022 04:55:49 -0400 Subject: [PATCH] Create issue for detected DNS server problem (#3578) * Create issue for detected DNS server problem * Validate behavior on restart as well * tls:// not supported, remove check * Move DNS server checks into resolution checks * Revert all changes to plugins.dns * Run DNS server checks if affected * Mock aiodns query during all checks tests --- requirements.txt | 1 + supervisor/resolution/checks/base.py | 6 +- .../resolution/checks/dns_server_failure.py | 83 ++++++++++ .../checks/dns_server_ipv6_error.py | 89 +++++++++++ supervisor/resolution/const.py | 20 ++- tests/plugins/test_dns.py | 55 +++++++ tests/resolution/check/test_check.py | 15 +- .../check/test_check_dns_server_failure.py | 128 +++++++++++++++ .../check/test_check_dns_server_ipv6_error.py | 148 ++++++++++++++++++ 9 files changed, 536 insertions(+), 9 deletions(-) create mode 100644 supervisor/resolution/checks/dns_server_failure.py create mode 100644 supervisor/resolution/checks/dns_server_ipv6_error.py create mode 100644 tests/plugins/test_dns.py create mode 100644 tests/resolution/check/test_check_dns_server_failure.py create mode 100644 tests/resolution/check/test_check_dns_server_ipv6_error.py diff --git a/requirements.txt b/requirements.txt index 9b70facc8..ec9cb67dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +aiodns==3.0.0 aiohttp==3.8.1 async_timeout==4.0.2 atomicwrites==1.4.0 diff --git a/supervisor/resolution/checks/base.py b/supervisor/resolution/checks/base.py index acfc0f079..68b6fafbc 100644 --- a/supervisor/resolution/checks/base.py +++ b/supervisor/resolution/checks/base.py @@ -41,7 +41,11 @@ class CheckBase(ABC, CoreSysAttributes): self.sys_resolution.dismiss_issue(issue) # System is not affected - if affected and self.context not in (ContextType.ADDON, ContextType.PLUGIN): + if affected and self.context not in ( + ContextType.ADDON, + ContextType.PLUGIN, + ContextType.DNS_SERVER, + ): return _LOGGER.info("Run check for %s/%s", self.issue, self.context) await self.run_check() diff --git a/supervisor/resolution/checks/dns_server_failure.py b/supervisor/resolution/checks/dns_server_failure.py new file mode 100644 index 000000000..bf3863c96 --- /dev/null +++ b/supervisor/resolution/checks/dns_server_failure.py @@ -0,0 +1,83 @@ +"""Helpers to check DNS servers for failure.""" +import asyncio +from datetime import timedelta +from typing import Optional + +from aiodns import DNSResolver +from aiodns.error import DNSError + +from supervisor.jobs.const import JobCondition, JobExecutionLimit +from supervisor.jobs.decorator import Job + +from ...const import CoreState +from ...coresys import CoreSys +from ..const import DNS_CHECK_HOST, ContextType, IssueType +from .base import CheckBase + + +def setup(coresys: CoreSys) -> CheckBase: + """Check setup function.""" + return CheckDNSServerFailures(coresys) + + +class CheckDNSServerFailures(CheckBase): + """CheckDNSServerFailures class for check.""" + + @Job( + conditions=[JobCondition.INTERNET_SYSTEM], + limit=JobExecutionLimit.THROTTLE, + throttle_period=timedelta(hours=24), + ) + async def run_check(self) -> None: + """Run check if not affected by issue.""" + dns_servers = self.dns_servers + results = await asyncio.gather( + *[self._check_server(server) for server in dns_servers], + return_exceptions=True, + ) + for i in (r for r in range(len(results)) if isinstance(results[r], DNSError)): + self.sys_resolution.create_issue( + IssueType.DNS_SERVER_FAILED, + ContextType.DNS_SERVER, + reference=dns_servers[i], + ) + self.sys_capture_exception(results[i]) + + @Job(conditions=[JobCondition.INTERNET_SYSTEM]) + async def approve_check(self, reference: Optional[str] = None) -> bool: + """Approve check if it is affected by issue.""" + if reference not in self.dns_servers: + return False + + try: + await self._check_server(reference) + except DNSError: + return True + + return False + + async def _check_server(self, server: str): + """Check a DNS server and report issues.""" + ip_addr = server[6:] if server.startswith("dns://") else server + resolver = DNSResolver(nameservers=[ip_addr]) + await resolver.query(DNS_CHECK_HOST, "A") + + @property + def dns_servers(self) -> list[str]: + """All user and system provided dns servers.""" + return self.sys_plugins.dns.servers + self.sys_plugins.dns.locals + + @property + def issue(self) -> IssueType: + """Return a IssueType enum.""" + return IssueType.DNS_SERVER_FAILED + + @property + def context(self) -> ContextType: + """Return a ContextType enum.""" + return ContextType.DNS_SERVER + + @property + def states(self) -> list[CoreState]: + """Return a list of valid states when this check can run.""" + return [CoreState.RUNNING] diff --git a/supervisor/resolution/checks/dns_server_ipv6_error.py b/supervisor/resolution/checks/dns_server_ipv6_error.py new file mode 100644 index 000000000..df1761f06 --- /dev/null +++ b/supervisor/resolution/checks/dns_server_ipv6_error.py @@ -0,0 +1,89 @@ +"""Helpers to check DNS servers for IPv6 errors.""" +import asyncio +from datetime import timedelta +from typing import Optional + +from aiodns import DNSResolver +from aiodns.error import DNSError + +from supervisor.jobs.const import JobCondition, JobExecutionLimit +from supervisor.jobs.decorator import Job + +from ...const import CoreState +from ...coresys import CoreSys +from ..const import DNS_CHECK_HOST, DNS_ERROR_NO_DATA, ContextType, IssueType +from .base import CheckBase + + +def setup(coresys: CoreSys) -> CheckBase: + """Check setup function.""" + return CheckDNSServerIPv6Errors(coresys) + + +class CheckDNSServerIPv6Errors(CheckBase): + """CheckDNSServerIPv6Errors class for check.""" + + @Job( + conditions=[JobCondition.INTERNET_SYSTEM], + limit=JobExecutionLimit.THROTTLE, + throttle_period=timedelta(hours=24), + ) + async def run_check(self) -> None: + """Run check if not affected by issue.""" + dns_servers = self.dns_servers + results = await asyncio.gather( + *[self._check_server(server) for server in dns_servers], + return_exceptions=True, + ) + for i in ( + r + for r in range(len(results)) + if isinstance(results[r], DNSError) + and results[r].args[0] != DNS_ERROR_NO_DATA + ): + self.sys_resolution.create_issue( + IssueType.DNS_SERVER_IPV6_ERROR, + ContextType.DNS_SERVER, + reference=dns_servers[i], + ) + self.sys_capture_exception(results[i]) + + @Job(conditions=[JobCondition.INTERNET_SYSTEM]) + async def approve_check(self, reference: Optional[str] = None) -> bool: + """Approve check if it is affected by issue.""" + if reference not in self.dns_servers: + return False + + try: + await self._check_server(reference) + except DNSError as dns_error: + if dns_error.args[0] != DNS_ERROR_NO_DATA: + return True + + return False + + async def _check_server(self, server: str): + """Check a DNS server and report issues.""" + ip_addr = server[6:] if server.startswith("dns://") else server + resolver = DNSResolver(nameservers=[ip_addr]) + await resolver.query(DNS_CHECK_HOST, "AAAA") + + @property + def dns_servers(self) -> list[str]: + """All user and system provided dns servers.""" + return self.sys_plugins.dns.servers + self.sys_plugins.dns.locals + + @property + def issue(self) -> IssueType: + """Return a IssueType enum.""" + return IssueType.DNS_SERVER_IPV6_ERROR + + @property + def context(self) -> ContextType: + """Return a ContextType enum.""" + return ContextType.DNS_SERVER + + @property + def states(self) -> list[CoreState]: + """Return a list of valid states when this check can run.""" + return [CoreState.RUNNING] diff --git a/supervisor/resolution/const.py b/supervisor/resolution/const.py index 252897e27..c187727b3 100644 --- a/supervisor/resolution/const.py +++ b/supervisor/resolution/const.py @@ -11,12 +11,16 @@ SCHEDULED_HEALTHCHECK = 3600 MINIMUM_FREE_SPACE_THRESHOLD = 1 MINIMUM_FULL_BACKUPS = 2 +DNS_CHECK_HOST = "_checkdns.home-assistant.io" +DNS_ERROR_NO_DATA = 1 + class ContextType(str, Enum): """Place where somethings was happening.""" ADDON = "addon" CORE = "core" + DNS_SERVER = "dns_server" OS = "os" PLUGIN = "plugin" SUPERVISOR = "supervisor" @@ -57,18 +61,20 @@ class UnhealthyReason(str, Enum): class IssueType(str, Enum): """Issue type.""" - FREE_SPACE = "free_space" - DOCKER_RATELIMIT = "docker_ratelimit" CORRUPT_DOCKER = "corrupt_docker" CORRUPT_REPOSITORY = "corrupt_repository" - SECURITY = "security" + DNS_LOOP = "dns_loop" + DNS_SERVER_FAILED = "dns_server_failed" + DNS_SERVER_IPV6_ERROR = "dns_server_ipv6_error" + DOCKER_RATELIMIT = "docker_ratelimit" + FATAL_ERROR = "fatal_error" + FREE_SPACE = "free_space" MISSING_IMAGE = "missing_image" + PWNED = "pwned" + SECURITY = "security" + TRUST = "trust" UPDATE_FAILED = "update_failed" UPDATE_ROLLBACK = "update_rollback" - FATAL_ERROR = "fatal_error" - DNS_LOOP = "dns_loop" - PWNED = "pwned" - TRUST = "trust" class SuggestionType(str, Enum): diff --git a/tests/plugins/test_dns.py b/tests/plugins/test_dns.py new file mode 100644 index 000000000..06024c797 --- /dev/null +++ b/tests/plugins/test_dns.py @@ -0,0 +1,55 @@ +"""Test DNS plugin.""" +from pathlib import Path +from unittest.mock import AsyncMock, Mock, patch + +import pytest + +from supervisor.coresys import CoreSys +from supervisor.docker.interface import DockerInterface + + +@pytest.fixture(name="docker_interface") +async def fixture_docker_interface() -> tuple[AsyncMock, AsyncMock]: + """Mock docker interface methods.""" + # with patch("supervisor.docker.interface.DockerInterface.run"), patch("supervisor.docker.interface.DockerInterface.restart") + with patch.object(DockerInterface, "run") as run, patch.object( + DockerInterface, "restart" + ) as restart: + yield (run, restart) + + +@pytest.fixture(name="write_json") +async def fixture_write_json() -> Mock: + """Mock json file writer.""" + with patch("supervisor.plugins.dns.write_json_file") as write_json_file: + yield write_json_file + + +@pytest.mark.parametrize("start", [True, False]) +async def test_config_write( + coresys: CoreSys, + docker_interface: tuple[AsyncMock, AsyncMock], + write_json: Mock, + start: bool, +): + """Test config write on DNS start and restart.""" + assert coresys.plugins.dns.locals == ["dns://192.168.30.1"] + coresys.plugins.dns.servers = ["dns://1.1.1.1", "dns://8.8.8.8"] + + if start: + await coresys.plugins.dns.start() + docker_interface[0].assert_called_once() + docker_interface[1].assert_not_called() + else: + await coresys.plugins.dns.restart() + docker_interface[0].assert_not_called() + docker_interface[1].assert_called_once() + + write_json.assert_called_once_with( + Path("/data/dns/coredns.json"), + { + "servers": ["dns://1.1.1.1", "dns://8.8.8.8"], + "locals": ["dns://192.168.30.1"], + "debug": False, + }, + ) diff --git a/tests/resolution/check/test_check.py b/tests/resolution/check/test_check.py index 0c1e50781..a646062a6 100644 --- a/tests/resolution/check/test_check.py +++ b/tests/resolution/check/test_check.py @@ -1,6 +1,6 @@ """Test check.""" # pylint: disable=import-error,protected-access -from unittest.mock import patch +from unittest.mock import AsyncMock, patch import pytest @@ -11,6 +11,19 @@ from supervisor.resolution.const import IssueType from supervisor.resolution.validate import get_valid_modules +@pytest.fixture(autouse=True) +def fixture_mock_dns_query(): + """Mock aiodns query.""" + with patch( + "supervisor.resolution.checks.dns_server_failure.DNSResolver.query", + new_callable=AsyncMock, + ), patch( + "supervisor.resolution.checks.dns_server_ipv6_error.DNSResolver.query", + new_callable=AsyncMock, + ): + yield + + async def test_check_setup(coresys: CoreSys): """Test check for setup.""" coresys.core.state = CoreState.SETUP diff --git a/tests/resolution/check/test_check_dns_server_failure.py b/tests/resolution/check/test_check_dns_server_failure.py new file mode 100644 index 000000000..f19d84815 --- /dev/null +++ b/tests/resolution/check/test_check_dns_server_failure.py @@ -0,0 +1,128 @@ +"""Test check DNS Servers for failures.""" +from unittest.mock import AsyncMock, call, patch + +from aiodns.error import DNSError +import pytest + +from supervisor.const import CoreState +from supervisor.coresys import CoreSys +from supervisor.resolution.checks.dns_server_failure import CheckDNSServerFailures +from supervisor.resolution.const import ContextType, IssueType + + +@pytest.fixture(name="dns_query") +async def fixture_dns_query() -> AsyncMock: + """Mock aiodns query.""" + with patch( + "supervisor.resolution.checks.dns_server_failure.DNSResolver.query", + new_callable=AsyncMock, + ) as dns_query: + yield dns_query + + +async def test_base(coresys: CoreSys): + """Test check basics.""" + dns_server_failures = CheckDNSServerFailures(coresys) + assert dns_server_failures.slug == "dns_server_failure" + assert dns_server_failures.enabled + + +async def test_check(coresys: CoreSys, dns_query: AsyncMock): + """Test check for DNS server failures.""" + dns_server_failures = CheckDNSServerFailures(coresys) + coresys.core.state = CoreState.RUNNING + + coresys.plugins.dns.servers = ["dns://1.1.1.1"] + assert dns_server_failures.dns_servers == [ + "dns://1.1.1.1", + "dns://192.168.30.1", + ] + assert len(coresys.resolution.issues) == 0 + + await dns_server_failures.run_check.__wrapped__(dns_server_failures) + assert dns_query.call_args_list == [ + call("_checkdns.home-assistant.io", "A"), + call("_checkdns.home-assistant.io", "A"), + ] + assert len(coresys.resolution.issues) == 0 + + dns_query.reset_mock() + coresys.plugins.dns.servers = [] + assert dns_server_failures.dns_servers == ["dns://192.168.30.1"] + + dns_query.side_effect = DNSError() + await dns_server_failures.run_check.__wrapped__(dns_server_failures) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "A") + + assert len(coresys.resolution.issues) == 1 + assert coresys.resolution.issues[0].type is IssueType.DNS_SERVER_FAILED + assert coresys.resolution.issues[0].context is ContextType.DNS_SERVER + assert coresys.resolution.issues[0].reference == "dns://192.168.30.1" + + +async def test_approve(coresys: CoreSys, dns_query: AsyncMock): + """Test approve existing DNS Server failure issues.""" + dns_server_failures = CheckDNSServerFailures(coresys) + coresys.core.state = CoreState.RUNNING + + assert dns_server_failures.dns_servers == ["dns://192.168.30.1"] + dns_query.side_effect = DNSError() + + assert await dns_server_failures.approve_check(reference="dns://1.1.1.1") is False + dns_query.assert_not_called() + + assert ( + await dns_server_failures.approve_check(reference="dns://192.168.30.1") is True + ) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "A") + + dns_query.reset_mock() + dns_query.side_effect = None + assert ( + await dns_server_failures.approve_check(reference="dns://192.168.30.1") is False + ) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "A") + + +async def test_did_run(coresys: CoreSys): + """Test that the check ran as expected.""" + dns_server_failures = CheckDNSServerFailures(coresys) + should_run = dns_server_failures.states + should_not_run = [state for state in CoreState if state not in should_run] + assert should_run == [CoreState.RUNNING] + assert len(should_not_run) != 0 + + with patch.object(CheckDNSServerFailures, "run_check", return_value=None) as check: + for state in should_run: + coresys.core.state = state + await dns_server_failures() + check.assert_called_once() + check.reset_mock() + + for state in should_not_run: + coresys.core.state = state + await dns_server_failures() + check.assert_not_called() + check.reset_mock() + + +async def test_check_if_affected(coresys: CoreSys): + """Test that check is still executed even if already affected.""" + dns_server_failures = CheckDNSServerFailures(coresys) + coresys.core.state = CoreState.RUNNING + + coresys.resolution.create_issue( + IssueType.DNS_SERVER_FAILED, + ContextType.DNS_SERVER, + reference="dns://192.168.30.1", + ) + assert len(coresys.resolution.issues) == 1 + + with patch.object( + CheckDNSServerFailures, "approve_check", return_value=True + ) as approve, patch.object( + CheckDNSServerFailures, "run_check", return_value=None + ) as check: + await dns_server_failures() + approve.assert_called_once() + check.assert_called_once() diff --git a/tests/resolution/check/test_check_dns_server_ipv6_error.py b/tests/resolution/check/test_check_dns_server_ipv6_error.py new file mode 100644 index 000000000..3809c9e7b --- /dev/null +++ b/tests/resolution/check/test_check_dns_server_ipv6_error.py @@ -0,0 +1,148 @@ +"""Test check DNS Servers for IPv6 errors.""" +from unittest.mock import AsyncMock, call, patch + +from aiodns.error import DNSError +import pytest + +from supervisor.const import CoreState +from supervisor.coresys import CoreSys +from supervisor.resolution.checks.dns_server_ipv6_error import CheckDNSServerIPv6Errors +from supervisor.resolution.const import ContextType, IssueType + + +@pytest.fixture(name="dns_query") +async def fixture_dns_query() -> AsyncMock: + """Mock aiodns query.""" + with patch( + "supervisor.resolution.checks.dns_server_ipv6_error.DNSResolver.query", + new_callable=AsyncMock, + ) as dns_query: + yield dns_query + + +async def test_base(coresys: CoreSys): + """Test check basics.""" + dns_server_ipv6_errors = CheckDNSServerIPv6Errors(coresys) + assert dns_server_ipv6_errors.slug == "dns_server_ipv6_error" + assert dns_server_ipv6_errors.enabled + + +async def test_check(coresys: CoreSys, dns_query: AsyncMock): + """Test check for DNS server IPv6 errors.""" + dns_server_ipv6_errors = CheckDNSServerIPv6Errors(coresys) + coresys.core.state = CoreState.RUNNING + + coresys.plugins.dns.servers = ["dns://1.1.1.1"] + assert dns_server_ipv6_errors.dns_servers == [ + "dns://1.1.1.1", + "dns://192.168.30.1", + ] + assert len(coresys.resolution.issues) == 0 + + await dns_server_ipv6_errors.run_check.__wrapped__(dns_server_ipv6_errors) + assert dns_query.call_args_list == [ + call("_checkdns.home-assistant.io", "AAAA"), + call("_checkdns.home-assistant.io", "AAAA"), + ] + assert len(coresys.resolution.issues) == 0 + + dns_query.reset_mock() + coresys.plugins.dns.servers = [] + assert dns_server_ipv6_errors.dns_servers == ["dns://192.168.30.1"] + + dns_query.side_effect = DNSError(1, "DNS server returned answer with no data") + await dns_server_ipv6_errors.run_check.__wrapped__(dns_server_ipv6_errors) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "AAAA") + assert len(coresys.resolution.issues) == 0 + + dns_query.reset_mock() + dns_query.side_effect = DNSError(4, "Domain name not found") + await dns_server_ipv6_errors.run_check.__wrapped__(dns_server_ipv6_errors) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "AAAA") + + assert len(coresys.resolution.issues) == 1 + assert coresys.resolution.issues[0].type is IssueType.DNS_SERVER_IPV6_ERROR + assert coresys.resolution.issues[0].context is ContextType.DNS_SERVER + assert coresys.resolution.issues[0].reference == "dns://192.168.30.1" + + +async def test_approve(coresys: CoreSys, dns_query: AsyncMock): + """Test approve existing DNS Server IPv6 error issues.""" + dns_server_ipv6_errors = CheckDNSServerIPv6Errors(coresys) + coresys.core.state = CoreState.RUNNING + + assert dns_server_ipv6_errors.dns_servers == ["dns://192.168.30.1"] + dns_query.side_effect = DNSError(4, "Domain name not found") + + assert ( + await dns_server_ipv6_errors.approve_check(reference="dns://1.1.1.1") is False + ) + dns_query.assert_not_called() + + assert ( + await dns_server_ipv6_errors.approve_check(reference="dns://192.168.30.1") + is True + ) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "AAAA") + + dns_query.reset_mock() + dns_query.side_effect = DNSError(1, "DNS server returned answer with no data") + assert ( + await dns_server_ipv6_errors.approve_check(reference="dns://192.168.30.1") + is False + ) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "AAAA") + + dns_query.reset_mock() + dns_query.side_effect = None + assert ( + await dns_server_ipv6_errors.approve_check(reference="dns://192.168.30.1") + is False + ) + dns_query.assert_called_once_with("_checkdns.home-assistant.io", "AAAA") + + +async def test_did_run(coresys: CoreSys): + """Test that the check ran as expected.""" + dns_server_ipv6_errors = CheckDNSServerIPv6Errors(coresys) + should_run = dns_server_ipv6_errors.states + should_not_run = [state for state in CoreState if state not in should_run] + assert should_run == [CoreState.RUNNING] + assert len(should_not_run) != 0 + + with patch.object( + CheckDNSServerIPv6Errors, "run_check", return_value=None + ) as check: + for state in should_run: + coresys.core.state = state + await dns_server_ipv6_errors() + check.assert_called_once() + check.reset_mock() + + for state in should_not_run: + coresys.core.state = state + await dns_server_ipv6_errors() + check.assert_not_called() + check.reset_mock() + + +async def test_check_if_affected(coresys: CoreSys): + """Test that check is still executed even if already affected.""" + dns_server_ipv6_errors = CheckDNSServerIPv6Errors(coresys) + coresys.core.state = CoreState.RUNNING + + coresys.resolution.create_issue( + IssueType.DNS_SERVER_IPV6_ERROR, + ContextType.DNS_SERVER, + reference="dns://192.168.30.1", + ) + assert len(coresys.resolution.issues) == 1 + + with patch.object( + CheckDNSServerIPv6Errors, "approve_check", return_value=True + ) as approve, patch.object( + CheckDNSServerIPv6Errors, "run_check", return_value=None + ) as check: + await dns_server_ipv6_errors() + approve.assert_called_once() + check.assert_called_once()