mirror of
https://github.com/home-assistant/supervisor.git
synced 2025-07-15 13:16:29 +00:00
Set max reanimation attempts on HA watchdog (#4784)
This commit is contained in:
parent
32f21d208f
commit
b7ddfba71d
@ -15,6 +15,8 @@ from ..utils.sentry import capture_exception
|
|||||||
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
HASS_WATCHDOG_API = "HASS_WATCHDOG_API"
|
HASS_WATCHDOG_API = "HASS_WATCHDOG_API"
|
||||||
|
HASS_WATCHDOG_REANIMATE_FAILURES = "HASS_WATCHDOG_REANIMATE_FAILURES"
|
||||||
|
HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS = 5
|
||||||
|
|
||||||
RUN_UPDATE_SUPERVISOR = 29100
|
RUN_UPDATE_SUPERVISOR = 29100
|
||||||
RUN_UPDATE_ADDONS = 57600
|
RUN_UPDATE_ADDONS = 57600
|
||||||
@ -154,6 +156,18 @@ class Tasks(CoreSysAttributes):
|
|||||||
return
|
return
|
||||||
if await self.sys_homeassistant.api.check_api_state():
|
if await self.sys_homeassistant.api.check_api_state():
|
||||||
# Home Assistant is running properly
|
# Home Assistant is running properly
|
||||||
|
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] = 0
|
||||||
|
return
|
||||||
|
|
||||||
|
# Give up after 5 reanimation failures in a row. Supervisor cannot fix this issue.
|
||||||
|
reanimate_fails = self._cache.get(HASS_WATCHDOG_REANIMATE_FAILURES, 0)
|
||||||
|
if reanimate_fails >= HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS:
|
||||||
|
if reanimate_fails == HASS_WATCHDOG_MAX_REANIMATE_ATTEMPTS:
|
||||||
|
_LOGGER.critical(
|
||||||
|
"Watchdog cannot reanimate Home Assistant, failed all %s attempts.",
|
||||||
|
reanimate_fails,
|
||||||
|
)
|
||||||
|
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] += 1
|
||||||
return
|
return
|
||||||
|
|
||||||
# Init cache data
|
# Init cache data
|
||||||
@ -171,7 +185,11 @@ class Tasks(CoreSysAttributes):
|
|||||||
await self.sys_homeassistant.core.restart()
|
await self.sys_homeassistant.core.restart()
|
||||||
except HomeAssistantError as err:
|
except HomeAssistantError as err:
|
||||||
_LOGGER.error("Home Assistant watchdog reanimation failed!")
|
_LOGGER.error("Home Assistant watchdog reanimation failed!")
|
||||||
|
if reanimate_fails == 0:
|
||||||
capture_exception(err)
|
capture_exception(err)
|
||||||
|
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] = reanimate_fails + 1
|
||||||
|
else:
|
||||||
|
self._cache[HASS_WATCHDOG_REANIMATE_FAILURES] = 0
|
||||||
finally:
|
finally:
|
||||||
self._cache[HASS_WATCHDOG_API] = 0
|
self._cache[HASS_WATCHDOG_API] = 0
|
||||||
|
|
||||||
|
133
tests/misc/test_tasks.py
Normal file
133
tests/misc/test_tasks.py
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
"""Test scheduled tasks."""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
|
|
||||||
|
from awesomeversion import AwesomeVersion
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from supervisor.coresys import CoreSys
|
||||||
|
from supervisor.exceptions import HomeAssistantError
|
||||||
|
from supervisor.homeassistant.api import HomeAssistantAPI
|
||||||
|
from supervisor.homeassistant.const import LANDINGPAGE
|
||||||
|
from supervisor.homeassistant.core import HomeAssistantCore
|
||||||
|
from supervisor.misc.tasks import Tasks
|
||||||
|
|
||||||
|
# pylint: disable=protected-access
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(name="tasks")
|
||||||
|
async def fixture_tasks(coresys: CoreSys, container: MagicMock) -> Tasks:
|
||||||
|
"""Return task manager."""
|
||||||
|
coresys.homeassistant.watchdog = True
|
||||||
|
coresys.homeassistant.version = AwesomeVersion("2023.12.0")
|
||||||
|
container.status = "running"
|
||||||
|
yield Tasks(coresys)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api(
|
||||||
|
tasks: Tasks, caplog: pytest.LogCaptureFixture
|
||||||
|
):
|
||||||
|
"""Test watchdog of homeassistant api."""
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(HomeAssistantCore, "restart") as restart:
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
|
||||||
|
restart.assert_not_called()
|
||||||
|
assert "Watchdog miss API response from Home Assistant" in caplog.text
|
||||||
|
assert "Watchdog found a problem with Home Assistant API!" not in caplog.text
|
||||||
|
|
||||||
|
caplog.clear()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
|
||||||
|
restart.assert_called_once()
|
||||||
|
assert "Watchdog miss API response from Home Assistant" not in caplog.text
|
||||||
|
assert "Watchdog found a problem with Home Assistant API!" in caplog.text
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api_off(tasks: Tasks, coresys: CoreSys):
|
||||||
|
"""Test watchdog of homeassistant api does not run when disabled."""
|
||||||
|
coresys.homeassistant.watchdog = False
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(HomeAssistantCore, "restart") as restart:
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api_error_state(tasks: Tasks, coresys: CoreSys):
|
||||||
|
"""Test watchdog of homeassistant api does not restart when in error state."""
|
||||||
|
coresys.homeassistant.core._error_state = True
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(HomeAssistantCore, "restart") as restart:
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api_landing_page(tasks: Tasks, coresys: CoreSys):
|
||||||
|
"""Test watchdog of homeassistant api does not monitor landing page."""
|
||||||
|
coresys.homeassistant.version = LANDINGPAGE
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(HomeAssistantCore, "restart") as restart:
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api_not_running(
|
||||||
|
tasks: Tasks, container: MagicMock
|
||||||
|
):
|
||||||
|
"""Test watchdog of homeassistant api does not monitor when home assistant not running."""
|
||||||
|
container.status = "stopped"
|
||||||
|
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(HomeAssistantCore, "restart") as restart:
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
async def test_watchdog_homeassistant_api_reanimation_limit(
|
||||||
|
tasks: Tasks, caplog: pytest.LogCaptureFixture, capture_exception: Mock
|
||||||
|
):
|
||||||
|
"""Test watchdog of homeassistant api stops after max reanimation failures."""
|
||||||
|
with patch.object(
|
||||||
|
HomeAssistantAPI, "check_api_state", return_value=False
|
||||||
|
), patch.object(
|
||||||
|
HomeAssistantCore, "restart", side_effect=(err := HomeAssistantError())
|
||||||
|
) as restart:
|
||||||
|
for _ in range(5):
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_called_once()
|
||||||
|
assert "Home Assistant watchdog reanimation failed!" in caplog.text
|
||||||
|
|
||||||
|
restart.reset_mock()
|
||||||
|
|
||||||
|
capture_exception.assert_called_once_with(err)
|
||||||
|
|
||||||
|
caplog.clear()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
|
||||||
|
restart.assert_not_called()
|
||||||
|
assert "Watchdog miss API response from Home Assistant" not in caplog.text
|
||||||
|
assert "Watchdog found a problem with Home Assistant API!" not in caplog.text
|
||||||
|
assert (
|
||||||
|
"Watchdog cannot reanimate Home Assistant, failed all 5 attempts."
|
||||||
|
in caplog.text
|
||||||
|
)
|
||||||
|
|
||||||
|
caplog.clear()
|
||||||
|
await tasks._watchdog_homeassistant_api()
|
||||||
|
restart.assert_not_called()
|
||||||
|
assert not caplog.text
|
Loading…
x
Reference in New Issue
Block a user