mirror of
https://github.com/home-assistant/supervisor.git
synced 2025-04-19 18:57:16 +00:00
Support CGroup v2 on Supervised with manual restarts (#5419)
This commit is contained in:
parent
1f893117cc
commit
829193fe84
@ -148,6 +148,9 @@ class Addon(AddonModel):
|
||||
self._boot_failed_issue = Issue(
|
||||
IssueType.BOOT_FAIL, ContextType.ADDON, reference=self.slug
|
||||
)
|
||||
self._device_access_missing_issue = Issue(
|
||||
IssueType.DEVICE_ACCESS_MISSING, ContextType.ADDON, reference=self.slug
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
"""Return internal representation."""
|
||||
@ -158,6 +161,11 @@ class Addon(AddonModel):
|
||||
"""Get issue used if start on boot failed."""
|
||||
return self._boot_failed_issue
|
||||
|
||||
@property
|
||||
def device_access_missing_issue(self) -> Issue:
|
||||
"""Get issue used if device access is missing and can't be automatically added."""
|
||||
return self._device_access_missing_issue
|
||||
|
||||
@property
|
||||
def state(self) -> AddonState:
|
||||
"""Return state of the add-on."""
|
||||
@ -182,6 +190,13 @@ class Addon(AddonModel):
|
||||
):
|
||||
self.sys_resolution.dismiss_issue(self.boot_failed_issue)
|
||||
|
||||
# Dismiss device access missing issue if present and we stopped
|
||||
if (
|
||||
new_state == AddonState.STOPPED
|
||||
and self.device_access_missing_issue in self.sys_resolution.issues
|
||||
):
|
||||
self.sys_resolution.dismiss_issue(self.device_access_missing_issue)
|
||||
|
||||
self.sys_homeassistant.websocket.send_message(
|
||||
{
|
||||
ATTR_TYPE: WSType.SUPERVISOR_EVENT,
|
||||
|
@ -10,6 +10,7 @@ import os
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from attr import evolve
|
||||
from awesomeversion import AwesomeVersion
|
||||
import docker
|
||||
from docker.types import Mount
|
||||
@ -40,7 +41,7 @@ from ..hardware.const import PolicyGroup
|
||||
from ..hardware.data import Device
|
||||
from ..jobs.const import JobCondition, JobExecutionLimit
|
||||
from ..jobs.decorator import Job
|
||||
from ..resolution.const import ContextType, IssueType, SuggestionType
|
||||
from ..resolution.const import CGROUP_V2_VERSION, ContextType, IssueType, SuggestionType
|
||||
from ..utils.sentry import capture_exception
|
||||
from .const import (
|
||||
ENV_TIME,
|
||||
@ -802,6 +803,13 @@ class DockerAddon(DockerInterface):
|
||||
|
||||
await super().stop(remove_container)
|
||||
|
||||
# If there is a device access issue and the container is removed, clear it
|
||||
if (
|
||||
remove_container
|
||||
and self.addon.device_access_missing_issue in self.sys_resolution.issues
|
||||
):
|
||||
self.sys_resolution.dismiss_issue(self.addon.device_access_missing_issue)
|
||||
|
||||
async def _validate_trust(
|
||||
self, image_id: str, image: str, version: AwesomeVersion
|
||||
) -> None:
|
||||
@ -839,6 +847,16 @@ class DockerAddon(DockerInterface):
|
||||
f"Can't process Hardware Event on {self.name}: {err!s}", _LOGGER.error
|
||||
) from err
|
||||
|
||||
if (
|
||||
self.sys_docker.info.cgroup == CGROUP_V2_VERSION
|
||||
and not self.sys_os.available
|
||||
):
|
||||
self.sys_resolution.add_issue(
|
||||
evolve(self.addon.device_access_missing_issue),
|
||||
suggestions=[SuggestionType.EXECUTE_RESTART],
|
||||
)
|
||||
return
|
||||
|
||||
permission = self.sys_hardware.policy.get_cgroups_rule(device)
|
||||
try:
|
||||
await self.sys_dbus.agent.cgroup.add_devices_allowed(
|
||||
|
@ -15,6 +15,9 @@ MINIMUM_FULL_BACKUPS = 2
|
||||
DNS_CHECK_HOST = "_checkdns.home-assistant.io"
|
||||
DNS_ERROR_NO_DATA = 1
|
||||
|
||||
CGROUP_V1_VERSION = "1"
|
||||
CGROUP_V2_VERSION = "2"
|
||||
|
||||
|
||||
class ContextType(StrEnum):
|
||||
"""Place where somethings was happening."""
|
||||
@ -77,6 +80,7 @@ class IssueType(StrEnum):
|
||||
CORRUPT_FILESYSTEM = "corrupt_filesystem"
|
||||
DETACHED_ADDON_MISSING = "detached_addon_missing"
|
||||
DETACHED_ADDON_REMOVED = "detached_addon_removed"
|
||||
DEVICE_ACCESS_MISSING = "device_access_missing"
|
||||
DISABLED_DATA_DISK = "disabled_data_disk"
|
||||
DNS_LOOP = "dns_loop"
|
||||
DNS_SERVER_FAILED = "dns_server_failed"
|
||||
@ -112,6 +116,7 @@ class SuggestionType(StrEnum):
|
||||
EXECUTE_REMOVE = "execute_remove"
|
||||
EXECUTE_REPAIR = "execute_repair"
|
||||
EXECUTE_RESET = "execute_reset"
|
||||
EXECUTE_RESTART = "execute_restart"
|
||||
EXECUTE_START = "execute_start"
|
||||
EXECUTE_STOP = "execute_stop"
|
||||
EXECUTE_UPDATE = "execute_update"
|
||||
|
@ -2,12 +2,9 @@
|
||||
|
||||
from ...const import CoreState
|
||||
from ...coresys import CoreSys
|
||||
from ..const import UnsupportedReason
|
||||
from ..const import CGROUP_V1_VERSION, CGROUP_V2_VERSION, UnsupportedReason
|
||||
from .base import EvaluateBase
|
||||
|
||||
CGROUP_V1_VERSION = "1"
|
||||
CGROUP_V2_VERSION = "2"
|
||||
|
||||
|
||||
def setup(coresys: CoreSys) -> EvaluateBase:
|
||||
"""Initialize evaluation-setup function."""
|
||||
@ -20,9 +17,7 @@ class EvaluateCGroupVersion(EvaluateBase):
|
||||
@property
|
||||
def expected_versions(self) -> set[str]:
|
||||
"""Return expected cgroup versions."""
|
||||
if self.coresys.os.available:
|
||||
return {CGROUP_V1_VERSION, CGROUP_V2_VERSION}
|
||||
return {CGROUP_V1_VERSION}
|
||||
return {CGROUP_V1_VERSION, CGROUP_V2_VERSION}
|
||||
|
||||
@property
|
||||
def reason(self) -> UnsupportedReason:
|
||||
|
60
supervisor/resolution/fixups/addon_execute_restart.py
Normal file
60
supervisor/resolution/fixups/addon_execute_restart.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""Helpers to fix addon by restarting it."""
|
||||
|
||||
import logging
|
||||
|
||||
from ...coresys import CoreSys
|
||||
from ...exceptions import AddonsError, ResolutionFixupError
|
||||
from ..const import ContextType, IssueType, SuggestionType
|
||||
from .base import FixupBase
|
||||
|
||||
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def setup(coresys: CoreSys) -> FixupBase:
|
||||
"""Check setup function."""
|
||||
return FixupAddonExecuteRestart(coresys)
|
||||
|
||||
|
||||
class FixupAddonExecuteRestart(FixupBase):
|
||||
"""Storage class for fixup."""
|
||||
|
||||
async def process_fixup(self, reference: str | None = None) -> None:
|
||||
"""Initialize the fixup class."""
|
||||
if not (addon := self.sys_addons.get(reference, local_only=True)):
|
||||
_LOGGER.info("Cannot restart addon %s as it does not exist", reference)
|
||||
return
|
||||
|
||||
# Stop addon
|
||||
try:
|
||||
await addon.stop()
|
||||
except AddonsError as err:
|
||||
_LOGGER.error("Could not stop %s due to %s", reference, err)
|
||||
raise ResolutionFixupError() from None
|
||||
|
||||
# Start addon
|
||||
# Removing the container has already fixed the issue and dismissed it
|
||||
# So any errors on startup are just logged. We won't wait on the startup task either
|
||||
try:
|
||||
await addon.start()
|
||||
except AddonsError as err:
|
||||
_LOGGER.error("Could not restart %s due to %s", reference, err)
|
||||
|
||||
@property
|
||||
def suggestion(self) -> SuggestionType:
|
||||
"""Return a SuggestionType enum."""
|
||||
return SuggestionType.EXECUTE_RESTART
|
||||
|
||||
@property
|
||||
def context(self) -> ContextType:
|
||||
"""Return a ContextType enum."""
|
||||
return ContextType.ADDON
|
||||
|
||||
@property
|
||||
def issues(self) -> list[IssueType]:
|
||||
"""Return a IssueType enum list."""
|
||||
return [IssueType.DEVICE_ACCESS_MISSING]
|
||||
|
||||
@property
|
||||
def auto(self) -> bool:
|
||||
"""Return if a fixup can be apply as auto fix."""
|
||||
return False
|
@ -1,6 +1,8 @@
|
||||
"""Test docker addon setup."""
|
||||
|
||||
import asyncio
|
||||
from ipaddress import IPv4Address
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, Mock, PropertyMock, patch
|
||||
|
||||
@ -12,12 +14,17 @@ from supervisor.addons import validate as vd
|
||||
from supervisor.addons.addon import Addon
|
||||
from supervisor.addons.model import Data
|
||||
from supervisor.addons.options import AddonOptions
|
||||
from supervisor.const import BusEvent
|
||||
from supervisor.coresys import CoreSys
|
||||
from supervisor.dbus.agent.cgroup import CGroup
|
||||
from supervisor.docker.addon import DockerAddon
|
||||
from supervisor.docker.manager import DockerAPI
|
||||
from supervisor.exceptions import CoreDNSError, DockerNotFound
|
||||
from supervisor.hardware.data import Device
|
||||
from supervisor.os.manager import OSManager
|
||||
from supervisor.plugins.dns import PluginDns
|
||||
from supervisor.resolution.const import ContextType, IssueType
|
||||
from supervisor.resolution.data import Issue
|
||||
from supervisor.resolution.const import ContextType, IssueType, SuggestionType
|
||||
from supervisor.resolution.data import Issue, Suggestion
|
||||
|
||||
from ..common import load_json_fixture
|
||||
from . import DEV_MOUNT
|
||||
@ -380,3 +387,113 @@ async def test_addon_stop_delete_host_error(
|
||||
await docker_addon.stop()
|
||||
|
||||
capture_exception.assert_called_once_with(err)
|
||||
|
||||
|
||||
TEST_DEV_PATH = "/dev/ttyAMA0"
|
||||
TEST_SYSFS_PATH = "/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0/tty/ttyACM0"
|
||||
TEST_HW_DEVICE = Device(
|
||||
name="ttyACM0",
|
||||
path=Path("/dev/ttyAMA0"),
|
||||
sysfs=Path(
|
||||
"/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0/tty/ttyACM0"
|
||||
),
|
||||
subsystem="tty",
|
||||
parent=Path(
|
||||
"/sys/devices/platform/soc/ffe09000.usb/ff500000.usb/xhci-hcd.0.auto/usb1/1-1/1-1.1/1-1.1:1.0"
|
||||
),
|
||||
links=[
|
||||
Path(
|
||||
"/dev/serial/by-id/usb-Texas_Instruments_TI_CC2531_USB_CDC___0X0123456789ABCDEF-if00"
|
||||
),
|
||||
Path("/dev/serial/by-path/platform-xhci-hcd.0.auto-usb-0:1.1:1.0"),
|
||||
Path("/dev/serial/by-path/platform-xhci-hcd.0.auto-usbv2-0:1.1:1.0"),
|
||||
],
|
||||
attributes={},
|
||||
children=[],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("path_extern")
|
||||
@pytest.mark.parametrize(
|
||||
("dev_path", "cgroup", "is_os"),
|
||||
[
|
||||
(TEST_DEV_PATH, "1", True),
|
||||
(TEST_SYSFS_PATH, "1", True),
|
||||
(TEST_DEV_PATH, "1", False),
|
||||
(TEST_SYSFS_PATH, "1", False),
|
||||
(TEST_DEV_PATH, "2", True),
|
||||
(TEST_SYSFS_PATH, "2", True),
|
||||
],
|
||||
)
|
||||
async def test_addon_new_device(
|
||||
coresys: CoreSys,
|
||||
install_addon_ssh: Addon,
|
||||
container: MagicMock,
|
||||
docker: DockerAPI,
|
||||
dev_path: str,
|
||||
cgroup: str,
|
||||
is_os: bool,
|
||||
):
|
||||
"""Test new device that is listed in static devices."""
|
||||
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
|
||||
install_addon_ssh.data["devices"] = [dev_path]
|
||||
container.id = 123
|
||||
docker.info.cgroup = cgroup
|
||||
|
||||
with (
|
||||
patch.object(Addon, "write_options"),
|
||||
patch.object(OSManager, "available", new=PropertyMock(return_value=is_os)),
|
||||
patch.object(CGroup, "add_devices_allowed") as add_devices,
|
||||
):
|
||||
await install_addon_ssh.start()
|
||||
|
||||
coresys.bus.fire_event(
|
||||
BusEvent.HARDWARE_NEW_DEVICE,
|
||||
TEST_HW_DEVICE,
|
||||
)
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
add_devices.assert_called_once_with(123, "c 0:0 rwm")
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("path_extern")
|
||||
@pytest.mark.parametrize("dev_path", [TEST_DEV_PATH, TEST_SYSFS_PATH])
|
||||
async def test_addon_new_device_no_haos(
|
||||
coresys: CoreSys,
|
||||
install_addon_ssh: Addon,
|
||||
docker: DockerAPI,
|
||||
dev_path: str,
|
||||
):
|
||||
"""Test new device that is listed in static devices on non HAOS system with CGroup V2."""
|
||||
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
|
||||
install_addon_ssh.data["devices"] = [dev_path]
|
||||
docker.info.cgroup = "2"
|
||||
|
||||
with (
|
||||
patch.object(Addon, "write_options"),
|
||||
patch.object(OSManager, "available", new=PropertyMock(return_value=False)),
|
||||
patch.object(CGroup, "add_devices_allowed") as add_devices,
|
||||
):
|
||||
await install_addon_ssh.start()
|
||||
|
||||
coresys.bus.fire_event(
|
||||
BusEvent.HARDWARE_NEW_DEVICE,
|
||||
TEST_HW_DEVICE,
|
||||
)
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
add_devices.assert_not_called()
|
||||
|
||||
# Issue added with hardware event since access cannot be added dynamically
|
||||
assert install_addon_ssh.device_access_missing_issue in coresys.resolution.issues
|
||||
assert (
|
||||
Suggestion(
|
||||
SuggestionType.EXECUTE_RESTART, ContextType.ADDON, reference="local_ssh"
|
||||
)
|
||||
in coresys.resolution.suggestions
|
||||
)
|
||||
|
||||
# Stopping and removing the container clears it as access granted on next start
|
||||
await install_addon_ssh.stop()
|
||||
assert coresys.resolution.issues == []
|
||||
assert coresys.resolution.suggestions == []
|
||||
|
@ -26,7 +26,7 @@ async def test_evaluation(coresys: CoreSys):
|
||||
|
||||
coresys.docker.info.cgroup = CGROUP_V2_VERSION
|
||||
await cgroup_version()
|
||||
assert cgroup_version.reason in coresys.resolution.unsupported
|
||||
assert cgroup_version.reason not in coresys.resolution.unsupported
|
||||
coresys.resolution.unsupported.clear()
|
||||
|
||||
coresys.docker.info.cgroup = CGROUP_V1_VERSION
|
||||
|
120
tests/resolution/fixup/test_addon_execute_restart.py
Normal file
120
tests/resolution/fixup/test_addon_execute_restart.py
Normal file
@ -0,0 +1,120 @@
|
||||
"""Test fixup addon execute restart."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from supervisor.addons.addon import Addon
|
||||
from supervisor.const import AddonState
|
||||
from supervisor.coresys import CoreSys
|
||||
from supervisor.docker.addon import DockerAddon
|
||||
from supervisor.docker.interface import DockerInterface
|
||||
from supervisor.exceptions import DockerError
|
||||
from supervisor.resolution.const import ContextType, IssueType, SuggestionType
|
||||
from supervisor.resolution.data import Issue, Suggestion
|
||||
from supervisor.resolution.fixups.addon_execute_restart import FixupAddonExecuteRestart
|
||||
|
||||
from tests.const import TEST_ADDON_SLUG
|
||||
|
||||
DEVICE_ACCESS_MISSING_ISSUE = Issue(
|
||||
IssueType.DEVICE_ACCESS_MISSING,
|
||||
ContextType.ADDON,
|
||||
reference=TEST_ADDON_SLUG,
|
||||
)
|
||||
EXECUTE_RESTART_SUGGESTION = Suggestion(
|
||||
SuggestionType.EXECUTE_RESTART, ContextType.ADDON, reference="local_ssh"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("path_extern")
|
||||
async def test_fixup(coresys: CoreSys, install_addon_ssh: Addon):
|
||||
"""Test fixup restarts addon."""
|
||||
install_addon_ssh.state = AddonState.STARTED
|
||||
addon_execute_restart = FixupAddonExecuteRestart(coresys)
|
||||
assert addon_execute_restart.auto is False
|
||||
|
||||
async def mock_stop(*args, **kwargs):
|
||||
install_addon_ssh.state = AddonState.STOPPED
|
||||
|
||||
coresys.resolution.add_issue(
|
||||
DEVICE_ACCESS_MISSING_ISSUE,
|
||||
suggestions=[SuggestionType.EXECUTE_RESTART],
|
||||
)
|
||||
with (
|
||||
patch.object(DockerInterface, "stop") as stop,
|
||||
patch.object(DockerAddon, "run") as run,
|
||||
patch.object(Addon, "_wait_for_startup"),
|
||||
patch.object(Addon, "write_options"),
|
||||
):
|
||||
await addon_execute_restart()
|
||||
stop.assert_called_once()
|
||||
run.assert_called_once()
|
||||
|
||||
assert not coresys.resolution.issues
|
||||
assert not coresys.resolution.suggestions
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("path_extern")
|
||||
async def test_fixup_stop_error(
|
||||
coresys: CoreSys, install_addon_ssh: Addon, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""Test fixup fails on stop addon failure."""
|
||||
install_addon_ssh.state = AddonState.STARTED
|
||||
addon_execute_start = FixupAddonExecuteRestart(coresys)
|
||||
|
||||
coresys.resolution.add_issue(
|
||||
DEVICE_ACCESS_MISSING_ISSUE,
|
||||
suggestions=[SuggestionType.EXECUTE_RESTART],
|
||||
)
|
||||
with (
|
||||
patch.object(DockerInterface, "stop", side_effect=DockerError),
|
||||
patch.object(DockerAddon, "run") as run,
|
||||
):
|
||||
await addon_execute_start()
|
||||
run.assert_not_called()
|
||||
|
||||
assert DEVICE_ACCESS_MISSING_ISSUE in coresys.resolution.issues
|
||||
assert EXECUTE_RESTART_SUGGESTION in coresys.resolution.suggestions
|
||||
assert "Could not stop local_ssh" in caplog.text
|
||||
|
||||
|
||||
@pytest.mark.usefixtures("path_extern")
|
||||
async def test_fixup_start_error(
|
||||
coresys: CoreSys, install_addon_ssh: Addon, caplog: pytest.LogCaptureFixture
|
||||
):
|
||||
"""Test fixup logs a start addon failure."""
|
||||
install_addon_ssh.state = AddonState.STARTED
|
||||
addon_execute_start = FixupAddonExecuteRestart(coresys)
|
||||
|
||||
coresys.resolution.add_issue(
|
||||
DEVICE_ACCESS_MISSING_ISSUE,
|
||||
suggestions=[SuggestionType.EXECUTE_RESTART],
|
||||
)
|
||||
with (
|
||||
patch.object(DockerInterface, "stop") as stop,
|
||||
patch.object(DockerAddon, "run", side_effect=DockerError),
|
||||
patch.object(Addon, "write_options"),
|
||||
):
|
||||
await addon_execute_start()
|
||||
stop.assert_called_once()
|
||||
|
||||
assert DEVICE_ACCESS_MISSING_ISSUE not in coresys.resolution.issues
|
||||
assert EXECUTE_RESTART_SUGGESTION not in coresys.resolution.suggestions
|
||||
assert "Could not restart local_ssh" in caplog.text
|
||||
|
||||
|
||||
async def test_fixup_no_addon(coresys: CoreSys, caplog: pytest.LogCaptureFixture):
|
||||
"""Test fixup dismisses if addon is missing."""
|
||||
addon_execute_start = FixupAddonExecuteRestart(coresys)
|
||||
|
||||
coresys.resolution.add_issue(
|
||||
DEVICE_ACCESS_MISSING_ISSUE,
|
||||
suggestions=[SuggestionType.EXECUTE_RESTART],
|
||||
)
|
||||
with patch.object(DockerAddon, "stop") as stop:
|
||||
await addon_execute_start()
|
||||
stop.assert_not_called()
|
||||
|
||||
assert not coresys.resolution.issues
|
||||
assert not coresys.resolution.suggestions
|
||||
assert "Cannot restart addon local_ssh as it does not exist" in caplog.text
|
Loading…
x
Reference in New Issue
Block a user