Write core state to /run/supervisor & monitor (#2027)

* Write core state to /run/supervisor & monitor

* Add watchdog

* Add log if they getting started

* fix stale comment

* Fix script

* avoid output

* fix tests

* fix lint

* Update rootfs/etc/services.d/watchdog/run

Co-authored-by: Franck Nijhof <git@frenck.dev>

Co-authored-by: Franck Nijhof <git@frenck.dev>
This commit is contained in:
Pascal Vizeli 2020-09-08 10:09:41 +02:00 committed by GitHub
parent 14b1ea4eb0
commit 40aa5c9caf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 111 additions and 5 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/with-contenv bashio
# ==============================================================================
# Start Service service
# Start Supervisor service
# ==============================================================================
export LD_PRELOAD="/usr/local/lib/libjemalloc.so.2"

View File

@ -0,0 +1,8 @@
#!/usr/bin/execlineb -S1
# ==============================================================================
# Take down the S6 supervision tree when Watchdog fails
# ==============================================================================
if { s6-test ${1} -ne 0 }
if { s6-test ${1} -ne 256 }
s6-svscanctl -t /var/run/s6/services

View File

@ -0,0 +1,34 @@
#!/usr/bin/with-contenv bashio
# ==============================================================================
# Start Watchdog service
# ==============================================================================
declare failed_count=0
declare supervisor_state
bashio::log.info "Start local supervisor watchdog..."
while [[ failed_count -lt 2 ]];
do
sleep 300
supervisor_state="$(cat /run/supervisor)"
if [[ "${supervisor_state}" = "running" ]]; then
# Check API
if bashio::supervisor.ping; then
failed_count=0
else
bashio::log.warning "Maybe found an issue on API healthy"
((failed_count++))
fi
elif [[ "close stopping" = *"${supervisor_state}"* ]]; then
bashio::log.warning "Maybe found an issue on shutdown"
((failed_count++))
else
failed_count=0
fi
done
basio::exit.nok "Watchdog detect issue with Supervisor - take container down!"

View File

@ -32,6 +32,7 @@ FILE_HASSIO_UPDATER = Path(SUPERVISOR_DATA, "updater.json")
MACHINE_ID = Path("/etc/machine-id")
SOCKET_DBUS = Path("/run/dbus/system_bus_socket")
SOCKET_DOCKER = Path("/run/docker.sock")
RUN_SUPERVISOR_STATE = Path("/run/supervisor")
DOCKER_NETWORK = "hassio"
DOCKER_NETWORK_MASK = ip_network("172.30.32.0/23")

View File

@ -2,10 +2,17 @@
import asyncio
from contextlib import suppress
import logging
from typing import Optional
import async_timeout
from .const import SOCKET_DBUS, SUPERVISED_SUPPORTED_OS, AddonStartup, CoreState
from .const import (
RUN_SUPERVISOR_STATE,
SOCKET_DBUS,
SUPERVISED_SUPPORTED_OS,
AddonStartup,
CoreState,
)
from .coresys import CoreSys, CoreSysAttributes
from .exceptions import (
DockerAPIError,
@ -23,12 +30,31 @@ class Core(CoreSysAttributes):
def __init__(self, coresys: CoreSys):
"""Initialize Supervisor object."""
self.coresys: CoreSys = coresys
self.state: CoreState = CoreState.INITIALIZE
self.healthy: bool = True
self.supported: bool = True
self._state: Optional[CoreState] = None
@property
def state(self) -> CoreState:
"""Return state of the core."""
return self._state
@state.setter
def state(self, new_state: CoreState) -> None:
"""Set core into new state."""
try:
RUN_SUPERVISOR_STATE.write_text(new_state.value)
except OSError as err:
_LOGGER.warning("Can't update supervisor state %s: %s", new_state, err)
finally:
self._state = new_state
async def connect(self):
"""Connect Supervisor container."""
self.state = CoreState.INITIALIZE
# Load information from container
await self.sys_supervisor.load()
# If host docker is supported?

View File

@ -90,11 +90,15 @@ async def coresys(loop, docker, dbus, network_manager, aiohttp_client) -> CoreSy
):
coresys_obj = await initialize_coresys()
# Mock save json
coresys_obj.ingress.save_data = MagicMock()
coresys_obj.arch._default_arch = "amd64"
# Mock test client
coresys_obj.arch._default_arch = "amd64"
coresys_obj._machine = "qemux86-64"
coresys_obj._machine_id = uuid4()
# Mock host communication
coresys_obj._dbus = dbus
coresys_obj._dbus.network = network_manager

33
tests/test_core_state.py Normal file
View File

@ -0,0 +1,33 @@
"""Testing handling with CoreState."""
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
import pytest
from supervisor.const import CoreState
# pylint: disable=redefined-outer-name
@pytest.fixture
def run_dir():
"""Fixture to inject hassio env."""
with patch(
"supervisor.core.RUN_SUPERVISOR_STATE"
) as mock_run, TemporaryDirectory() as tmp_run:
tmp_state = Path(tmp_run, "supervisor")
mock_run.write_text = tmp_state.write_text
yield tmp_state
def test_write_state(run_dir, coresys):
"""Test write corestate to /run/supervisor."""
coresys.core.state = CoreState.RUNNING
assert run_dir.read_text() == CoreState.RUNNING.value
coresys.core.state = CoreState.SHUTDOWN
assert run_dir.read_text() == CoreState.SHUTDOWN.value