mirror of
https://github.com/home-assistant/supervisor.git
synced 2025-07-13 04:06:33 +00:00
Watchdog addon on successful but unexpected exit (#3815)
This commit is contained in:
parent
024b813865
commit
f4811a0243
@ -112,6 +112,9 @@ class Addon(AddonModel):
|
|||||||
super().__init__(coresys, slug)
|
super().__init__(coresys, slug)
|
||||||
self.instance: DockerAddon = DockerAddon(coresys, self)
|
self.instance: DockerAddon = DockerAddon(coresys, self)
|
||||||
self._state: AddonState = AddonState.UNKNOWN
|
self._state: AddonState = AddonState.UNKNOWN
|
||||||
|
self._manual_stop: bool = (
|
||||||
|
self.sys_hardware.helper.last_boot == self.sys_config.last_boot
|
||||||
|
)
|
||||||
|
|
||||||
@Job(
|
@Job(
|
||||||
name=f"addon_{slug}_restart_after_problem",
|
name=f"addon_{slug}_restart_after_problem",
|
||||||
@ -682,6 +685,7 @@ class Addon(AddonModel):
|
|||||||
|
|
||||||
async def stop(self) -> None:
|
async def stop(self) -> None:
|
||||||
"""Stop add-on."""
|
"""Stop add-on."""
|
||||||
|
self._manual_stop = True
|
||||||
try:
|
try:
|
||||||
await self.instance.stop()
|
await self.instance.stop()
|
||||||
except DockerError as err:
|
except DockerError as err:
|
||||||
@ -950,6 +954,7 @@ class Addon(AddonModel):
|
|||||||
ContainerState.HEALTHY,
|
ContainerState.HEALTHY,
|
||||||
ContainerState.UNHEALTHY,
|
ContainerState.UNHEALTHY,
|
||||||
]:
|
]:
|
||||||
|
self._manual_stop = False
|
||||||
self.state = AddonState.STARTED
|
self.state = AddonState.STARTED
|
||||||
elif event.state == ContainerState.STOPPED:
|
elif event.state == ContainerState.STOPPED:
|
||||||
self.state = AddonState.STOPPED
|
self.state = AddonState.STOPPED
|
||||||
@ -958,8 +963,15 @@ class Addon(AddonModel):
|
|||||||
|
|
||||||
async def watchdog_container(self, event: DockerContainerStateEvent) -> None:
|
async def watchdog_container(self, event: DockerContainerStateEvent) -> None:
|
||||||
"""Process state changes in addon container and restart if necessary."""
|
"""Process state changes in addon container and restart if necessary."""
|
||||||
if not (event.name == self.instance.name and self.watchdog):
|
if (
|
||||||
|
not (event.name == self.instance.name and self.watchdog)
|
||||||
|
or self._manual_stop
|
||||||
|
):
|
||||||
return
|
return
|
||||||
|
|
||||||
if event.state in [ContainerState.FAILED, ContainerState.UNHEALTHY]:
|
if event.state in [
|
||||||
|
ContainerState.FAILED,
|
||||||
|
ContainerState.STOPPED,
|
||||||
|
ContainerState.UNHEALTHY,
|
||||||
|
]:
|
||||||
await self._restart_after_problem(self, event.state)
|
await self._restart_after_problem(self, event.state)
|
||||||
|
@ -41,7 +41,7 @@ class HwHelper(CoreSysAttributes):
|
|||||||
return bool(self.sys_hardware.filter_devices(subsystem=UdevSubsystem.USB))
|
return bool(self.sys_hardware.filter_devices(subsystem=UdevSubsystem.USB))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_boot(self) -> str | None:
|
def last_boot(self) -> datetime | None:
|
||||||
"""Return last boot time."""
|
"""Return last boot time."""
|
||||||
try:
|
try:
|
||||||
stats: str = _PROC_STAT.read_text(encoding="utf-8")
|
stats: str = _PROC_STAT.read_text(encoding="utf-8")
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
"""Test Home Assistant Add-ons."""
|
"""Test Home Assistant Add-ons."""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from datetime import timedelta
|
||||||
from unittest.mock import MagicMock, PropertyMock, patch
|
from unittest.mock import MagicMock, PropertyMock, patch
|
||||||
|
|
||||||
from docker.errors import DockerException
|
from docker.errors import DockerException
|
||||||
@ -9,13 +10,38 @@ import pytest
|
|||||||
from supervisor.addons.addon import Addon
|
from supervisor.addons.addon import Addon
|
||||||
from supervisor.const import AddonState, BusEvent
|
from supervisor.const import AddonState, BusEvent
|
||||||
from supervisor.coresys import CoreSys
|
from supervisor.coresys import CoreSys
|
||||||
|
from supervisor.docker.addon import DockerAddon
|
||||||
from supervisor.docker.const import ContainerState
|
from supervisor.docker.const import ContainerState
|
||||||
from supervisor.docker.monitor import DockerContainerStateEvent
|
from supervisor.docker.monitor import DockerContainerStateEvent
|
||||||
from supervisor.exceptions import AddonsJobError, AudioUpdateError
|
from supervisor.exceptions import AddonsJobError, AudioUpdateError
|
||||||
|
from supervisor.store.repository import Repository
|
||||||
|
from supervisor.utils.dt import utcnow
|
||||||
|
|
||||||
from ..const import TEST_ADDON_SLUG
|
from ..const import TEST_ADDON_SLUG
|
||||||
|
|
||||||
|
|
||||||
|
def _fire_test_event(coresys: CoreSys, name: str, state: ContainerState):
|
||||||
|
"""Fire a test event."""
|
||||||
|
coresys.bus.fire_event(
|
||||||
|
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
||||||
|
DockerContainerStateEvent(
|
||||||
|
name=name,
|
||||||
|
state=state,
|
||||||
|
id="abc123",
|
||||||
|
time=1,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def mock_current_state(state: ContainerState) -> ContainerState:
|
||||||
|
"""Mock for current state method."""
|
||||||
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
async def mock_stop() -> None:
|
||||||
|
"""Mock for stop method."""
|
||||||
|
|
||||||
|
|
||||||
def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
||||||
"""Test options merge."""
|
"""Test options merge."""
|
||||||
addon = coresys.addons.get(TEST_ADDON_SLUG)
|
addon = coresys.addons.get(TEST_ADDON_SLUG)
|
||||||
@ -71,174 +97,107 @@ def test_options_merge(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
|||||||
|
|
||||||
async def test_addon_state_listener(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
async def test_addon_state_listener(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
||||||
"""Test addon is setting state from docker events."""
|
"""Test addon is setting state from docker events."""
|
||||||
with patch.object(type(install_addon_ssh.instance), "attach"):
|
with patch.object(DockerAddon, "attach"):
|
||||||
await install_addon_ssh.load()
|
await install_addon_ssh.load()
|
||||||
|
|
||||||
assert install_addon_ssh.state == AddonState.UNKNOWN
|
assert install_addon_ssh.state == AddonState.UNKNOWN
|
||||||
|
|
||||||
with patch.object(type(install_addon_ssh), "watchdog_container"):
|
with patch.object(Addon, "watchdog_container"):
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.RUNNING,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert install_addon_ssh.state == AddonState.STARTED
|
assert install_addon_ssh.state == AddonState.STARTED
|
||||||
|
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.STOPPED,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert install_addon_ssh.state == AddonState.STOPPED
|
assert install_addon_ssh.state == AddonState.STOPPED
|
||||||
|
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.HEALTHY)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.HEALTHY,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert install_addon_ssh.state == AddonState.STARTED
|
assert install_addon_ssh.state == AddonState.STARTED
|
||||||
|
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.FAILED,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert install_addon_ssh.state == AddonState.ERROR
|
assert install_addon_ssh.state == AddonState.ERROR
|
||||||
|
|
||||||
# Test other addons are ignored
|
# Test other addons are ignored
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, "addon_local_non_installed", ContainerState.RUNNING)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name="addon_local_non_installed",
|
|
||||||
state=ContainerState.RUNNING,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert install_addon_ssh.state == AddonState.ERROR
|
assert install_addon_ssh.state == AddonState.ERROR
|
||||||
|
|
||||||
|
|
||||||
async def mock_current_state(state: ContainerState) -> ContainerState:
|
|
||||||
"""Mock for current state method."""
|
|
||||||
return state
|
|
||||||
|
|
||||||
|
|
||||||
async def mock_stop() -> None:
|
|
||||||
"""Mock for stop method."""
|
|
||||||
|
|
||||||
|
|
||||||
async def test_addon_watchdog(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
async def test_addon_watchdog(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
||||||
"""Test addon watchdog works correctly."""
|
"""Test addon watchdog works correctly."""
|
||||||
with patch.object(type(install_addon_ssh.instance), "attach"):
|
with patch.object(DockerAddon, "attach"):
|
||||||
await install_addon_ssh.load()
|
await install_addon_ssh.load()
|
||||||
|
|
||||||
install_addon_ssh.watchdog = True
|
install_addon_ssh.watchdog = True
|
||||||
|
|
||||||
with patch.object(Addon, "restart") as restart, patch.object(
|
with patch.object(Addon, "restart") as restart, patch.object(
|
||||||
Addon, "start"
|
Addon, "start"
|
||||||
) as start, patch.object(
|
) as start, patch.object(DockerAddon, "current_state") as current_state:
|
||||||
type(install_addon_ssh.instance), "current_state"
|
# Restart if it becomes unhealthy
|
||||||
) as current_state:
|
|
||||||
current_state.return_value = mock_current_state(ContainerState.UNHEALTHY)
|
current_state.return_value = mock_current_state(ContainerState.UNHEALTHY)
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.UNHEALTHY)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.UNHEALTHY,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
restart.assert_called_once()
|
restart.assert_called_once()
|
||||||
start.assert_not_called()
|
start.assert_not_called()
|
||||||
|
|
||||||
restart.reset_mock()
|
restart.reset_mock()
|
||||||
current_state.return_value = mock_current_state(ContainerState.FAILED)
|
|
||||||
|
|
||||||
with patch.object(
|
# Rebuild if it failed
|
||||||
type(install_addon_ssh.instance), "stop", return_value=mock_stop()
|
current_state.return_value = mock_current_state(ContainerState.FAILED)
|
||||||
) as stop:
|
with patch.object(DockerAddon, "stop", return_value=mock_stop()) as stop:
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.FAILED,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
stop.assert_called_once_with(remove_container=True)
|
stop.assert_called_once_with(remove_container=True)
|
||||||
restart.assert_not_called()
|
restart.assert_not_called()
|
||||||
start.assert_called_once()
|
start.assert_called_once()
|
||||||
|
|
||||||
start.reset_mock()
|
start.reset_mock()
|
||||||
|
|
||||||
# Do not process event if container state has changed since fired
|
# Do not process event if container state has changed since fired
|
||||||
current_state.return_value = mock_current_state(ContainerState.HEALTHY)
|
current_state.return_value = mock_current_state(ContainerState.HEALTHY)
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.FAILED)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.FAILED,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
|
||||||
restart.assert_not_called()
|
|
||||||
start.assert_not_called()
|
|
||||||
|
|
||||||
# Do not restart when addon stopped normally
|
|
||||||
coresys.bus.fire_event(
|
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.STOPPED,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
restart.assert_not_called()
|
restart.assert_not_called()
|
||||||
start.assert_not_called()
|
start.assert_not_called()
|
||||||
|
|
||||||
# Other addons ignored
|
# Other addons ignored
|
||||||
coresys.bus.fire_event(
|
current_state.return_value = mock_current_state(ContainerState.UNHEALTHY)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
_fire_test_event(coresys, "addon_local_non_installed", ContainerState.UNHEALTHY)
|
||||||
DockerContainerStateEvent(
|
|
||||||
name="addon_local_non_installed",
|
|
||||||
state=ContainerState.UNHEALTHY,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
restart.assert_not_called()
|
restart.assert_not_called()
|
||||||
start.assert_not_called()
|
start.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_on_stop(coresys: CoreSys, install_addon_ssh: Addon) -> None:
|
||||||
|
"""Test addon watchdog restarts addon on stop if not manual."""
|
||||||
|
with patch.object(DockerAddon, "attach"):
|
||||||
|
await install_addon_ssh.load()
|
||||||
|
|
||||||
|
install_addon_ssh.watchdog = True
|
||||||
|
|
||||||
|
with patch.object(Addon, "restart") as restart, patch.object(
|
||||||
|
DockerAddon,
|
||||||
|
"current_state",
|
||||||
|
return_value=mock_current_state(ContainerState.STOPPED),
|
||||||
|
), patch.object(DockerAddon, "stop", return_value=mock_stop()):
|
||||||
|
# Do not restart when addon stopped by user
|
||||||
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
await install_addon_ssh.stop()
|
||||||
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
# Do restart addon if it stops and user didn't do it
|
||||||
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
restart.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
async def test_listener_attached_on_install(coresys: CoreSys, repository):
|
async def test_listener_attached_on_install(coresys: CoreSys, repository):
|
||||||
"""Test events listener attached on addon install."""
|
"""Test events listener attached on addon install."""
|
||||||
container_collection = MagicMock()
|
container_collection = MagicMock()
|
||||||
@ -258,19 +217,45 @@ async def test_listener_attached_on_install(coresys: CoreSys, repository):
|
|||||||
):
|
):
|
||||||
await coresys.addons.install.__wrapped__(coresys.addons, TEST_ADDON_SLUG)
|
await coresys.addons.install.__wrapped__(coresys.addons, TEST_ADDON_SLUG)
|
||||||
|
|
||||||
coresys.bus.fire_event(
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.RUNNING)
|
||||||
BusEvent.DOCKER_CONTAINER_STATE_CHANGE,
|
|
||||||
DockerContainerStateEvent(
|
|
||||||
name=f"addon_{TEST_ADDON_SLUG}",
|
|
||||||
state=ContainerState.RUNNING,
|
|
||||||
id="abc123",
|
|
||||||
time=1,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
await asyncio.sleep(0)
|
await asyncio.sleep(0)
|
||||||
assert coresys.addons.get(TEST_ADDON_SLUG).state == AddonState.STARTED
|
assert coresys.addons.get(TEST_ADDON_SLUG).state == AddonState.STARTED
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"boot_timedelta,restart_count", [(timedelta(), 0), (timedelta(days=1), 1)]
|
||||||
|
)
|
||||||
|
async def test_watchdog_during_attach(
|
||||||
|
coresys: CoreSys,
|
||||||
|
repository: Repository,
|
||||||
|
boot_timedelta: timedelta,
|
||||||
|
restart_count: int,
|
||||||
|
):
|
||||||
|
"""Test host reboot treated as manual stop but not supervisor restart."""
|
||||||
|
store = coresys.addons.store[TEST_ADDON_SLUG]
|
||||||
|
coresys.addons.data.install(store)
|
||||||
|
|
||||||
|
with patch.object(Addon, "restart") as restart, patch.object(
|
||||||
|
type(coresys.hardware.helper),
|
||||||
|
"last_boot",
|
||||||
|
new=PropertyMock(return_value=utcnow()),
|
||||||
|
), patch.object(DockerAddon, "attach"), patch.object(
|
||||||
|
DockerAddon,
|
||||||
|
"current_state",
|
||||||
|
return_value=mock_current_state(ContainerState.STOPPED),
|
||||||
|
):
|
||||||
|
coresys.config.last_boot = coresys.hardware.helper.last_boot + boot_timedelta
|
||||||
|
addon = Addon(coresys, store.slug)
|
||||||
|
coresys.addons.local[addon.slug] = addon
|
||||||
|
addon.watchdog = True
|
||||||
|
|
||||||
|
await addon.load()
|
||||||
|
_fire_test_event(coresys, f"addon_{TEST_ADDON_SLUG}", ContainerState.STOPPED)
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
|
||||||
|
assert restart.call_count == restart_count
|
||||||
|
|
||||||
|
|
||||||
async def test_install_update_fails_if_out_of_date(
|
async def test_install_update_fails_if_out_of_date(
|
||||||
coresys: CoreSys, install_addon_ssh: Addon
|
coresys: CoreSys, install_addon_ssh: Addon
|
||||||
):
|
):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user