Compare commits

..

1 Commits

Author SHA1 Message Date
Stefan Agner
afc1165912 Don't fail Supervisor setup when an app image is missing
A missing builder image (docker:<version>-cli) during a build-required app
load aborted Supervisor setup entirely, leaving the system stuck in setup
state where every subsequent operation was blocked by the not-healthy
guard. Triggered in practice when the host's Docker patch version had no
matching `-cli` tag published on Docker Hub.

Two issues compounded the failure: `images.pull` in `run_command` leaked a
raw `aiodocker.DockerError` past the `@Job` decorator, which rewrapped it
as `JobException` and bypassed the `suppress(DockerError, ...)` guard in
`addon.load()`; and the load path treated all Docker errors the same
whether the image was simply missing or the daemon itself was misbehaving.

Wrap the pull error in `run_command` so it propagates as Supervisor's
`DockerError` (a `HassioError`) and is preserved by the decorator.
Distinguish 404s in `attach()` and `check_image()` by raising
`DockerNotFound`/`DockerAPIError` instead of generic `DockerError`. In
`addon.load()`, only the `DockerNotFound` path is treated as "image
missing": for build-required apps we skip the inline build and surface a
`MISSING_IMAGE` repair so the resolution autofix loop handles it off the
critical path; for pull-based apps we still attempt install during load
and create the repair on failure. Other `DockerError`s (daemon trouble or
a failed internal install in `check_image`) are logged at CRITICAL — which
the Sentry logging integration captures — and the addon is left detached
rather than masked as a misleading missing-image repair.

In the autofix path, swallow `DockerBuildError`, `DockerNoSpaceOnDevice`,
`DockerRegistryAuthError`, and `DockerRegistryRateLimitExceeded` as
`ResolutionFixupError` so they don't generate Sentry events on every
retry. The repair stays available for manual retry once the underlying
cause (registry tag published, disk freed, credentials fixed, rate limit
expired) is resolved.
2026-05-06 16:55:18 +02:00
22 changed files with 192 additions and 253 deletions

View File

@@ -170,7 +170,7 @@ jobs:
- name: Install Cosign
if: needs.init.outputs.publish == 'true'
uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
with:
cosign-release: ${{ env.COSIGN_VERSION }}

View File

@@ -13,8 +13,8 @@ env:
MYPY_CACHE_VERSION: 1
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name == 'pull_request' && 'pr' || github.run_id }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
jobs:
# Separate job to pre-populate the base dependency cache
@@ -331,7 +331,7 @@ jobs:
- name: Run mypy
run: |
. venv/bin/activate
mypy supervisor
mypy --ignore-missing-imports supervisor
pytest:
runs-on: ubuntu-latest
@@ -346,7 +346,7 @@ jobs:
with:
python-version: ${{ needs.prepare.outputs.python-version }}
- name: Install Cosign
uses: sigstore/cosign-installer@6f9f17788090df1f26f669e9d70d6ae9567deba6 # v4.1.2
uses: sigstore/cosign-installer@cad07c2e89fa2edd6e2d7bab4c1aa38e53f76003 # v4.1.1
with:
cosign-release: "v2.5.3"
- name: Restore Python virtual environment
@@ -380,8 +380,6 @@ jobs:
. venv/bin/activate
pytest \
-qq \
-n auto \
--dist=loadfile \
--durations=10 \
--cov supervisor \
-o console_output_style=count \

View File

@@ -223,13 +223,6 @@ overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"]
[tool.pylint.DESIGN]
max-positional-arguments = 10
[tool.mypy]
warn_unreachable = true
warn_redundant_casts = true
warn_unused_ignores = true
enable_error_code = ["exhaustive-match", "possibly-undefined"]
disable_error_code = ["import-not-found", "import-untyped"]
[tool.pytest.ini_options]
testpaths = ["tests"]
norecursedirs = [".git"]

View File

@@ -17,7 +17,7 @@ faust-cchardet==2.1.19
gitpython==3.1.50
jinja2==3.1.6
log-rate-limit==1.4.2
orjson==3.11.9
orjson==3.11.8
pulsectl==24.12.0
pyudev==0.24.4
PyYAML==6.0.3

View File

@@ -7,9 +7,8 @@ pytest-aiohttp==1.1.0
pytest-asyncio==1.3.0
pytest-cov==7.1.0
pytest-timeout==2.4.0
pytest-xdist==3.8.0
pytest==9.0.3
ruff==0.15.12
time-machine==3.2.0
types-pyyaml==6.0.12.20260508
urllib3==2.7.0
types-pyyaml==6.0.12.20260408
urllib3==2.6.3

View File

@@ -81,6 +81,7 @@ from ..exceptions import (
DockerBuildError,
DockerContainerPortConflict,
DockerError,
DockerNotFound,
DockerRegistryAuthError,
HostAppArmorError,
StoreAppNotFoundError,
@@ -258,14 +259,41 @@ class App(AppModel):
# Ensure we are using correct image for this system
await self.instance.check_image(self.version, default_image, self.arch)
except DockerError:
except DockerNotFound:
_LOGGER.info("No %s app Docker image %s found", self.slug, self.image)
with suppress(DockerError, AppNotSupportedError):
await self.instance.install(self.version, default_image, arch=self.arch)
if self.need_build:
# Don't run a local build during setup. Surface a repair so
# the resolution autofix loop can handle it off the critical
# path.
self._create_missing_image_issue()
else:
try:
await self.instance.install(
self.version, default_image, arch=self.arch
)
except (DockerError, AppNotSupportedError):
self._create_missing_image_issue()
except DockerError as err:
# Docker error other than a clean "image not found" - we can't
# tell whether the image is actually missing. Log and leave the
# addon detached; a future load will reattempt and surface a
# MISSING_IMAGE repair if appropriate.
_LOGGER.critical(
"Docker error loading app %s, leaving detached: %s", self.slug, err
)
self.persist[ATTR_IMAGE] = default_image
await self.save_persist()
def _create_missing_image_issue(self) -> None:
"""Surface a repair suggestion for a missing app image."""
self.sys_resolution.create_issue(
IssueType.MISSING_IMAGE,
ContextType.ADDON,
reference=self.slug,
suggestions=[SuggestionType.EXECUTE_REPAIR],
)
@property
def ip_address(self) -> IPv4Address:
"""Return IP of app instance."""

View File

@@ -632,10 +632,12 @@ class CoreSys:
self, coroutine: Coroutine, *, eager_start: bool | None = None
) -> asyncio.Task:
"""Create an async task."""
# eager_start kwarg works but wasn't added for mypy visibility until 3.14
# can remove the type ignore then
return self.loop.create_task(
coroutine,
context=self._create_context(),
eager_start=eager_start,
eager_start=eager_start, # type: ignore
)
def call_later(

View File

@@ -332,17 +332,15 @@ class DockerInterface(JobGroup, ABC):
)
await async_capture_exception(err)
# Get credentials for private registries to pass to aiodocker.
# Done before registering the listener so a failure here does not
# leak a stale event listener.
credentials, pull_image_name = self._get_credentials(image)
listener = self.sys_bus.register_event(
BusEvent.DOCKER_IMAGE_PULL_UPDATE, process_pull_event
)
_LOGGER.info("Downloading docker image %s with tag %s.", image, version)
try:
# Get credentials for private registries to pass to aiodocker
credentials, pull_image_name = self._get_credentials(image)
# Pull new image, passing credentials to aiodocker
docker_image = await self.sys_docker.pull_image(
current_job.uuid,
@@ -448,15 +446,20 @@ class DockerInterface(JobGroup, ABC):
),
)
with suppress(aiodocker.DockerError):
if not self._meta and self.image:
if not self._meta and self.image:
try:
self._meta = await self.sys_docker.images.inspect(
f"{self.image}:{version!s}"
)
except aiodocker.DockerError as err:
if err.status != HTTPStatus.NOT_FOUND:
raise DockerAPIError(
f"Docker API error inspecting image {self.image}:{version!s}: {err!s}"
) from err
# Successful?
if not self._meta:
raise DockerError(
raise DockerNotFound(
f"Could not get metadata on container or image for {self.name}"
)
_LOGGER.info("Attaching to %s with version %s", self.image, self.version)
@@ -552,7 +555,11 @@ class DockerInterface(JobGroup, ABC):
try:
image = await self.sys_docker.images.inspect(image_name)
except aiodocker.DockerError as err:
raise DockerError(
if err.status == HTTPStatus.NOT_FOUND:
raise DockerNotFound(
f"Image {image_name} not found", _LOGGER.info
) from err
raise DockerAPIError(
f"Could not get {image_name} for check due to: {err!s}",
_LOGGER.error,
) from err

View File

@@ -640,13 +640,17 @@ class DockerAPI(CoreSysAttributes):
try:
await self.images.inspect(f"{image}:{tag}")
except aiodocker.DockerError as err:
if err.status == HTTPStatus.NOT_FOUND:
_LOGGER.info("Pulling image %s:%s", image, tag)
await self.images.pull(image, tag=tag)
else:
if err.status != HTTPStatus.NOT_FOUND:
raise DockerError(
f"Can't inspect image {image}:{tag}: {err}", _LOGGER.error
) from err
_LOGGER.info("Pulling image %s:%s", image, tag)
try:
await self.images.pull(image, tag=tag)
except aiodocker.DockerError as pull_err:
raise DockerError(
f"Can't pull image {image}:{tag}: {pull_err}", _LOGGER.error
) from pull_err
try:
container = await self._run(

View File

@@ -27,7 +27,6 @@ _LOGGER: logging.Logger = logging.getLogger(__name__)
CORE_UNIX_SOCKET_MIN_VERSION: AwesomeVersion = AwesomeVersion(
"2026.4.0.dev202603250907"
)
CORE_UNIX_SOCKET_DEFAULT_VERSION: AwesomeVersion = AwesomeVersion("2026.5.1")
GET_CORE_STATE_MIN_VERSION: AwesomeVersion = AwesomeVersion("2023.8.0.dev20230720")
@@ -57,26 +56,15 @@ class HomeAssistantAPI(CoreSysAttributes):
def supports_unix_socket(self) -> bool:
"""Return True if the installed Core version supports Unix socket communication.
Enabled by default for Core >= CORE_UNIX_SOCKET_DEFAULT_VERSION; for
older versions down to CORE_UNIX_SOCKET_MIN_VERSION it is gated behind
the UNIX_SOCKET_CORE_API feature flag.
Used to decide whether to configure the env var when starting Core.
"""
if (
self.sys_homeassistant.version is None
or self.sys_homeassistant.version == LANDINGPAGE
or not version_is_new_enough(
return (
self.sys_config.feature_flags.get(FeatureFlag.UNIX_SOCKET_CORE_API, False)
and self.sys_homeassistant.version is not None
and self.sys_homeassistant.version != LANDINGPAGE
and version_is_new_enough(
self.sys_homeassistant.version, CORE_UNIX_SOCKET_MIN_VERSION
)
):
return False
if version_is_new_enough(
self.sys_homeassistant.version, CORE_UNIX_SOCKET_DEFAULT_VERSION
):
return True
return self.sys_config.feature_flags.get(
FeatureFlag.UNIX_SOCKET_CORE_API, False
)
@property

View File

@@ -280,9 +280,7 @@ class HomeAssistantCore(JobGroup):
)
old_image = self.sys_homeassistant.image
rollback_version = (
self.sys_homeassistant.version if not self.error_state else None
)
rollback = self.sys_homeassistant.version if not self.error_state else None
running = await self.instance.is_running()
exists = await self.instance.exists()
@@ -329,17 +327,7 @@ class HomeAssistantCore(JobGroup):
with suppress(HomeAssistantError):
await _update(to_version)
# If Core wasn't running on entry, the caller is responsible for
# starting it (e.g. backup restore, which stops and removes Core
# before calling update() and starts it later in its own stage).
# _update() correspondingly skipped the start step, so there is no
# running Core to health-check. Returning early avoids a spurious
# rollback that would otherwise overwrite the freshly installed
# image with the previous version.
if not running:
return
if not self.error_state and rollback_version:
if not self.error_state and rollback:
try:
data = await self.sys_homeassistant.api.get_config()
except HomeAssistantError:
@@ -365,7 +353,7 @@ class HomeAssistantCore(JobGroup):
return
# Update going wrong, revert it
if self.error_state and rollback_version:
if self.error_state and rollback:
_LOGGER.critical("HomeAssistant update failed -> rollback!")
self.sys_resolution.create_issue(
IssueType.UPDATE_ROLLBACK, ContextType.CORE
@@ -382,7 +370,7 @@ class HomeAssistantCore(JobGroup):
_LOGGER.info(
"A backup of the logfile is stored in /config/home-assistant-rollback.log"
)
await _update(rollback_version)
await _update(rollback)
else:
self.sys_resolution.create_issue(IssueType.UPDATE_FAILED, ContextType.CORE)
raise HomeAssistantUpdateError()

View File

@@ -267,8 +267,7 @@ class Interface:
return InterfaceMethod.AUTO
case NMInterfaceMethod.MANUAL:
return InterfaceMethod.STATIC
case _:
return InterfaceMethod.DISABLED
return InterfaceMethod.DISABLED
@staticmethod
def _map_nm_addr_gen_mode(addr_gen_mode: int | None) -> InterfaceAddrGenMode:
@@ -334,8 +333,7 @@ class Interface:
return InterfaceType.WIRELESS
case DeviceType.VLAN.value:
return InterfaceType.VLAN
case _:
raise ValueError(f"Invalid device type: {device_type}")
raise ValueError(f"Invalid device type: {device_type}")
@staticmethod
def _map_nm_wifi(inet: NetworkInterface) -> WifiConfig | None:

View File

@@ -73,22 +73,26 @@ class Scheduler(CoreSysAttributes):
def _schedule_task(self, task: _Task) -> None:
"""Schedule a task on loop."""
match task.interval:
case int() | float():
task.next = self.sys_call_later(task.interval, self._run_task, task)
case time():
today = datetime.combine(date.today(), task.interval)
tomorrow = datetime.combine(
date.today() + timedelta(days=1), task.interval
)
if isinstance(task.interval, (int, float)):
task.next = self.sys_call_later(task.interval, self._run_task, task)
elif isinstance(task.interval, time):
today = datetime.combine(date.today(), task.interval)
tomorrow = datetime.combine(date.today() + timedelta(days=1), task.interval)
# Check if we run it today or next day
if today > datetime.today():
calc = today
else:
calc = tomorrow
# Check if we run it today or next day
if today > datetime.today():
calc = today
else:
calc = tomorrow
task.next = self.sys_call_at(calc, self._run_task, task)
task.next = self.sys_call_at(calc, self._run_task, task)
else:
_LOGGER.critical(
"Unknown interval %s (type: %s) for scheduler %s",
task.interval,
type(task.interval),
task.id,
)
async def shutdown(self, timeout=10) -> None:
"""Shutdown all task inside the scheduler."""

View File

@@ -151,8 +151,7 @@ class Mount(CoreSysAttributes, ABC):
return PurePath(PATH_MEDIA, self.name)
case MountUsage.SHARE:
return PurePath(PATH_SHARE, self.name)
case MountUsage.BACKUP | None:
return None
return None
@property
def failed_issue(self) -> Issue:

View File

@@ -4,6 +4,7 @@ from dataclasses import dataclass
from datetime import datetime
import logging
from pathlib import Path, PurePath
from typing import cast
import aiohttp
from awesomeversion import AwesomeVersion, AwesomeVersionException
@@ -57,8 +58,8 @@ class SlotStatus:
device=PurePath(data["device"]),
bundle_compatible=data.get("bundle.compatible"),
sha256=data.get("sha256"),
size=data.get("size"),
installed_count=data.get("installed.count"),
size=cast(int | None, data.get("size")),
installed_count=cast(int | None, data.get("installed.count")),
bundle_version=AwesomeVersion(data["bundle.version"])
if "bundle.version" in data
else None,
@@ -66,7 +67,7 @@ class SlotStatus:
if "installed.timestamp" in data
else None,
status=data.get("status"),
activated_count=data.get("activated.count"),
activated_count=cast(int | None, data.get("activated.count")),
activated_timestamp=datetime.fromisoformat(data["activated.timestamp"])
if "activated.timestamp" in data
else None,

View File

@@ -3,6 +3,13 @@
import logging
from ...coresys import CoreSys
from ...exceptions import (
DockerBuildError,
DockerNoSpaceOnDevice,
DockerRegistryAuthError,
DockerRegistryRateLimitExceeded,
ResolutionFixupError,
)
from ..const import ContextType, IssueType, SuggestionType
from .base import FixupBase
@@ -44,7 +51,21 @@ class FixupAppExecuteRepair(FixupBase):
_LOGGER.info("Installing image for app %s", reference)
self.attempts += 1
await app.instance.install(app.version)
try:
await app.instance.install(app.version)
except (
DockerBuildError,
DockerNoSpaceOnDevice,
DockerRegistryAuthError,
DockerRegistryRateLimitExceeded,
) as err:
# These failures won't be resolved by an immediate retry (broken
# Dockerfile or unavailable base/builder image; disk full; bad
# credentials; registry rate limit). Surface as a fixup error so
# FixupBase swallows it without a Sentry event. The repair stays
# available for manual retry once the underlying cause is fixed.
_LOGGER.warning("Cannot repair app %s: %s", reference, err)
raise ResolutionFixupError() from err
@property
def suggestion(self) -> SuggestionType:

View File

@@ -186,7 +186,8 @@ class RepositoryGit(Repository, ABC):
repository_file = Path(self._git.path / f"repository{filetype}")
if repository_file.exists():
break
else:
if not repository_file.exists():
return False
# If valid?

View File

@@ -17,7 +17,6 @@ from securetar import SecureTarArchive, SecureTarFile
from supervisor.addons.addon import App
from supervisor.addons.const import AppBackupMode
from supervisor.addons.model import AppModel
from supervisor.config import CoreConfig
from supervisor.const import ATTR_ADVANCED, AppBoot, AppState, BusEvent
from supervisor.coresys import CoreSys
from supervisor.docker.addon import DockerApp
@@ -1078,75 +1077,70 @@ async def test_app_loads_wrong_image(
@pytest.mark.usefixtures("mock_amd64_arch_supported")
async def test_app_loads_missing_image(coresys: CoreSys, install_app_ssh: App):
"""Test app corrects a missing image on load."""
async def test_app_loads_missing_image_build(coresys: CoreSys, install_app_ssh: App):
"""Test build-required app surfaces a repair when image is missing on load."""
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)
with (
patch("pathlib.Path.is_file", return_value=True),
patch.object(
coresys.docker,
"run_command",
return_value=CommandReturn(0, ["Build successful"]),
) as mock_run_command,
patch.object(
type(coresys.config),
"local_to_extern_path",
return_value=PurePath("/addon/path/on/host"),
),
):
with patch.object(
coresys.docker,
"run_command",
return_value=CommandReturn(0, ["Build successful"]),
) as mock_run_command:
await install_app_ssh.load()
mock_run_command.assert_called_once()
assert mock_run_command.call_args.args[0] == "docker"
assert mock_run_command.call_args.kwargs["tag"] == "1.0.0-cli"
command = mock_run_command.call_args.kwargs["command"]
assert is_in_list(
["--platform", "linux/amd64"],
command,
# Build-required apps must not run a build during load. A repair is
# raised so the resolution autofix loop handles it off the critical path.
mock_run_command.assert_not_called()
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)
assert is_in_list(
["--tag", "local/amd64-addon-ssh:9.2.1"],
command,
assert issue in coresys.resolution.issues
suggestions = coresys.resolution.suggestions_for_issue(issue)
assert any(s.type == SuggestionType.EXECUTE_REPAIR for s in suggestions)
@pytest.mark.usefixtures("mock_amd64_arch_supported")
async def test_app_loads_missing_image_pull(coresys: CoreSys, install_app_ssh: App):
"""Test pullable app installs the missing image during load."""
install_app_ssh.data["image"] = "test/amd64-addon-ssh"
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)
assert install_app_ssh.image == "local/amd64-addon-ssh"
with patch.object(DockerAPI, "pull_image") as mock_pull_image:
await install_app_ssh.load()
mock_pull_image.assert_called_once()
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)
assert issue not in coresys.resolution.issues
@pytest.mark.usefixtures("container", "mock_amd64_arch_supported")
async def test_app_load_succeeds_with_docker_errors(
coresys: CoreSys, install_app_ssh: App, caplog: pytest.LogCaptureFixture
):
"""Docker errors while building/pulling an image during load should not raise and fail setup."""
# Build env invalid failure
"""Docker errors during load should not raise and fail setup."""
issue = Issue(
IssueType.MISSING_IMAGE, ContextType.ADDON, reference=install_app_ssh.slug
)
# Build-required app with missing image: repair issue raised, no exception
coresys.docker.images.inspect.side_effect = aiodocker.DockerError(
HTTPStatus.NOT_FOUND, {"message": "missing"}
)
caplog.clear()
await install_app_ssh.load()
assert "Cannot build app 'local_ssh' because dockerfile is missing" in caplog.text
assert issue in coresys.resolution.issues
# Image build failure
caplog.clear()
with (
patch("pathlib.Path.is_file", return_value=True),
patch.object(
CoreConfig,
"local_to_extern_path",
return_value=PurePath("/addon/path/on/host"),
),
patch.object(
DockerAPI, "run_command", return_value=CommandReturn(1, ["error"])
),
):
await install_app_ssh.load()
assert (
"Docker build failed for local/amd64-addon-ssh:9.2.1 (exit code 1). Build output:\nerror"
in caplog.text
)
# Image pull failure
# Pull-based app where check_image's internal install fails: addon left
# detached, no exception escapes to abort setup. The next load will hit
# DockerNotFound and trigger the proper repair path.
stored = coresys.resolution.get_issue_if_present(issue)
coresys.resolution.dismiss_issue(stored)
install_app_ssh.data["image"] = "test/amd64-addon-ssh"
caplog.clear()
with patch.object(
@@ -1155,7 +1149,11 @@ async def test_app_load_succeeds_with_docker_errors(
side_effect=aiodocker.DockerError(400, {"message": "error"}),
):
await install_app_ssh.load()
assert "Can't install test/amd64-addon-ssh:9.2.1:" in caplog.text
assert "Docker error loading app local_ssh, leaving detached" in caplog.text
assert any(
"Docker error loading app local_ssh" in r.message and r.levelname == "CRITICAL"
for r in caplog.records
)
@pytest.mark.usefixtures("coresys")

View File

@@ -22,7 +22,6 @@ from supervisor.homeassistant.core import HomeAssistantCore
from supervisor.homeassistant.module import HomeAssistant
from supervisor.resolution.const import ContextType, IssueType
from supervisor.resolution.data import Issue
from supervisor.updater import Updater
from tests.common import AsyncIterator, load_json_fixture
@@ -490,7 +489,6 @@ async def test_update_frontend_check_success(
coresys.homeassistant.version = AwesomeVersion("2025.8.0")
with (
patch.object(DockerInterface, "is_running", AsyncMock(return_value=True)),
patch.object(
DockerHomeAssistant,
"version",
@@ -536,7 +534,6 @@ async def test_update_frontend_check_fails_triggers_rollback(
with (
patch.object(DockerInterface, "update", new=mock_update),
patch.object(DockerInterface, "is_running", AsyncMock(return_value=True)),
patch.object(
DockerHomeAssistant,
"version",
@@ -588,7 +585,6 @@ async def test_update_websocket_api_missing_triggers_rollback(
with (
patch.object(DockerInterface, "update", new=mock_update),
patch.object(DockerInterface, "is_running", AsyncMock(return_value=True)),
patch.object(
DockerHomeAssistant,
"version",
@@ -640,7 +636,6 @@ async def test_update_get_config_error_triggers_rollback(
with (
patch.object(DockerInterface, "update", new=mock_update),
patch.object(DockerInterface, "is_running", AsyncMock(return_value=True)),
patch.object(
DockerHomeAssistant,
"version",
@@ -662,63 +657,3 @@ async def test_update_get_config_error_triggers_rollback(
Issue(IssueType.UPDATE_ROLLBACK, ContextType.CORE) in coresys.resolution.issues
)
mock_cleanup.assert_not_called()
async def test_update_skips_health_check_when_core_not_running(
coresys: CoreSys,
caplog: pytest.LogCaptureFixture,
tmp_supervisor_data: Path,
):
"""Test that update skips health check and rollback when Core was stopped on entry.
Reproduces the backup-restore regression: the restore flow stops and
removes Core before calling core.update(); the post-update API check
must not fire because Core hasn't been started yet, otherwise it
triggers a spurious rollback that overwrites the restored image.
"""
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
coresys.homeassistant.version = AwesomeVersion("2026.5.0b0")
coresys.homeassistant.set_image("ghcr.io/home-assistant/qemux86-64-homeassistant")
update_call_count = 0
async def mock_update(*args, **kwargs):
nonlocal update_call_count
update_call_count += 1
coresys.homeassistant.version = AwesomeVersion("2026.4.4")
with (
patch.object(DockerInterface, "update", new=mock_update),
patch.object(DockerInterface, "is_running", AsyncMock(return_value=False)),
patch.object(DockerInterface, "exists", AsyncMock(return_value=False)),
patch.object(
Updater,
"image_homeassistant",
new=PropertyMock(
return_value="ghcr.io/home-assistant/qemux86-64-homeassistant"
),
),
patch.object(
DockerHomeAssistant,
"version",
new=PropertyMock(return_value=AwesomeVersion("2026.4.4")),
),
patch.object(HomeAssistantAPI, "get_config") as mock_get_config,
patch.object(ha_core, "verify_frontend", AsyncMock()) as mock_frontend,
patch.object(DockerInterface, "cleanup") as mock_cleanup,
):
await coresys.homeassistant.core.update(AwesomeVersion("2026.4.4"))
# Only one update call: no rollback fired.
assert update_call_count == 1
assert "HomeAssistant update failed -> rollback!" not in caplog.text
# Health check must not run when Core wasn't running on entry.
mock_get_config.assert_not_called()
mock_frontend.assert_not_called()
# Caller (restore flow) is responsible for cleanup later.
mock_cleanup.assert_not_called()
assert (
Issue(IssueType.UPDATE_ROLLBACK, ContextType.CORE)
not in coresys.resolution.issues
)
assert coresys.homeassistant.version == AwesomeVersion("2026.4.4")

View File

@@ -1,7 +1,7 @@
"""Common test functions."""
import asyncio
from collections.abc import Callable, Sequence
from collections.abc import Sequence
from datetime import datetime
from functools import partial
from importlib import import_module
@@ -32,29 +32,6 @@ async def fire_bus_event(coresys: CoreSys, event: BusEvent, data: Any) -> None:
await asyncio.gather(*coresys.bus.fire_event(event, data))
async def wait_for(
predicate: Callable[[], bool],
*,
timeout: float = 5.0,
interval: float = 0.01,
) -> None:
"""Poll a synchronous predicate until truthy or the deadline elapses.
Useful when a test fires a D-Bus signal (or another out-of-band
trigger) and needs to observe state mutated by the resulting async
chain — e.g. a signal handler that schedules its own follow-up
tasks. Completes the moment the predicate is true, so the wait
costs only what's actually needed; this avoids the choice between a
fixed sleep that's fast on idle and racy under load and a fixed
sleep that's robust under load and wasteful on idle.
"""
deadline = asyncio.get_running_loop().time() + timeout
while not predicate():
if asyncio.get_running_loop().time() >= deadline:
raise AssertionError(f"Predicate did not become true within {timeout}s")
await asyncio.sleep(interval)
def get_fixture_path(filename: str) -> Path:
"""Get path for fixture."""
return Path(Path(__file__).parent.joinpath("fixtures"), filename)

View File

@@ -105,9 +105,6 @@ async def test_get_config_api_error(coresys: CoreSys):
[
("2026.4.0", True, True),
("2026.4.0", False, False),
("2026.5.1", True, True),
("2026.5.1", False, True),
("2026.6.0", False, True),
("2024.1.0", True, False),
(LANDINGPAGE, True, False),
],

View File

@@ -1,5 +1,6 @@
"""Test OS API."""
import asyncio
from dataclasses import replace
from pathlib import PosixPath
from unittest.mock import patch
@@ -15,7 +16,7 @@ from supervisor.os.data_disk import Disk
from supervisor.resolution.const import ContextType, IssueType
from supervisor.resolution.data import Issue
from tests.common import mock_dbus_services, wait_for
from tests.common import mock_dbus_services
from tests.dbus_service_mocks.agent_datadisk import DataDisk as DataDiskService
from tests.dbus_service_mocks.agent_system import System as SystemService
from tests.dbus_service_mocks.base import DBusServiceMock
@@ -356,13 +357,11 @@ async def test_multiple_datadisk_add_remove_signals(
},
)
await udisks2_service.ping()
await wait_for(
lambda: (
Issue(
IssueType.MULTIPLE_DATA_DISKS, ContextType.SYSTEM, reference="/dev/sdb1"
)
in coresys.resolution.issues
)
await asyncio.sleep(0.2)
assert (
Issue(IssueType.MULTIPLE_DATA_DISKS, ContextType.SYSTEM, reference="/dev/sdb1")
in coresys.resolution.issues
)
udisks2_service.InterfacesRemoved(
@@ -370,7 +369,9 @@ async def test_multiple_datadisk_add_remove_signals(
["org.freedesktop.UDisks2.Block", "org.freedesktop.UDisks2.Filesystem"],
)
await udisks2_service.ping()
await wait_for(lambda: coresys.resolution.issues == [])
await asyncio.sleep(0.2)
assert coresys.resolution.issues == []
async def test_disabled_datadisk_add_remove_signals(
@@ -408,13 +409,11 @@ async def test_disabled_datadisk_add_remove_signals(
},
)
await udisks2_service.ping()
await wait_for(
lambda: (
Issue(
IssueType.DISABLED_DATA_DISK, ContextType.SYSTEM, reference="/dev/sdb1"
)
in coresys.resolution.issues
)
await asyncio.sleep(0.2)
assert (
Issue(IssueType.DISABLED_DATA_DISK, ContextType.SYSTEM, reference="/dev/sdb1")
in coresys.resolution.issues
)
udisks2_service.InterfacesRemoved(
@@ -422,4 +421,6 @@ async def test_disabled_datadisk_add_remove_signals(
["org.freedesktop.UDisks2.Block", "org.freedesktop.UDisks2.Filesystem"],
)
await udisks2_service.ping()
await wait_for(lambda: coresys.resolution.issues == [])
await asyncio.sleep(0.2)
assert coresys.resolution.issues == []