From 2706b4d8665933ad50dc8455befd8f9aefe3d889 Mon Sep 17 00:00:00 2001 From: Franck Nijhof Date: Fri, 5 Jun 2020 23:28:05 +0200 Subject: [PATCH] Improve self healing capabilities of Supervisor service (#726) * Improve self healing capabilities of Supervisor service * Fixes shellcheck linter warnings --- .../rootfs-overlay/usr/sbin/hassos-supervisor | 71 +++++++++++++++++-- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor b/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor index fc468294b..cf7027949 100755 --- a/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor +++ b/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor @@ -9,16 +9,72 @@ set -e . /etc/os-release # Init supervisor +SUPERVISOR_MIN_RUNTIME=30 SUPERVISOR_IMAGE="homeassistant/${SUPERVISOR_ARCH}-hassio-supervisor" SUPERVISOR_DATA=/mnt/data/supervisor -SUPERVISOR_IMAGE_ID=$(docker inspect --format='{{.Id}}' "${SUPERVISOR_IMAGE}") +SUPERVISOR_IMAGE_ID=$(docker inspect --format='{{.Id}}' "${SUPERVISOR_IMAGE}" || echo "") SUPERVISOR_CONTAINER_ID=$(docker inspect --format='{{.Image}}' hassio_supervisor || echo "") -runSupervisor() { - docker container rm --force hassio_supervisor || true +# Check if previous runtime of the container meets the minimal runtime +# If not, we might be in trouble. Image or container corruption, bad release? +# Delete the container, delete the image, pull a fresh one +if [ -n "${SUPERVISOR_CONTAINER_ID}" ]; then + SUPERVISOR_START=$(docker inspect --format='{{.State.StartedAt}}' hassio_supervisor | sed -re "s/([-0-9]+)T([0-9\:]+).*/\1 \2/g") + SUPERVISOR_STOP=$(docker inspect --format='{{.State.FinishedAt}}' hassio_supervisor | sed -re "s/([-0-9]+)T([0-9\:]+).*/\1 \2/g") + if [ -n "${SUPERVISOR_START}" ] && [ -n "${SUPERVISOR_STOP}" ]; then + START=$(date --date="${SUPERVISOR_START}" +%s) + STOP=$(date --date="${SUPERVISOR_STOP}" +%s) + SUPERVISOR_RUNTIME=$((STOP-START)) + # Minimial runtime not met, remove container and image + if [ "${SUPERVISOR_RUNTIME}" -lt "${SUPERVISOR_MIN_RUNTIME}" ]; then + echo "[WARNING] Supervisor container exited too quickly, forcing a fresh image and container..." + docker container rm --force hassio_supervisor || true + SUPERVISOR_CONTAINER_ID="" + docker rmi --force "${SUPERVISOR_IMAGE_ID}" || true + SUPERVISOR_IMAGE_ID="" + fi + fi +fi + +# If Supervisor image is missing, pull it +if [ -z "${SUPERVISOR_IMAGE_ID}" ]; then + # Get the latest from update information + # Using updater information instead of config. If the config version is + # broken, this creates a way (e.g., bad release). + SUPERVISOR_VERSION_UPDATER=$(jq -r '.version // empty' "${SUPERVISOR_DATA}/updater.json" || echo "") + SUPERVISOR_VERSION="${SUPERVISOR_VERSION_UPDATER:-latest}" + + echo "[WARNING] Supervisor image missing, downloading a fresh one: ${SUPERVISOR_VERSION}" + + # Pull in the Supervisor + if docker pull "${SUPERVISOR_IMAGE}:${SUPERVISOR_VERSION}"; then + # Tag as latest if versioned + if [ "${SUPERVISOR_VERSION}" != "latest" ]; then + docker tag "${SUPERVISOR_IMAGE}:${SUPERVISOR_VERSION}" "${SUPERVISOR_IMAGE}:latest" + fi + else + # Pull failed, updater info might be corrupted, re-trying with latest + echo "[WARNING] Supervisor downloading failed trying: latest" + docker pull "${SUPERVISOR_IMAGE}:latest" + fi + + SUPERVISOR_IMAGE_ID=$(docker inspect --format='{{.Id}}' "${SUPERVISOR_IMAGE}" || echo "") +fi + +# Image changed, remove previous container +if [ -n "${SUPERVISOR_CONTAINER_ID}" ] && [ "${SUPERVISOR_IMAGE_ID}" != "${SUPERVISOR_CONTAINER_ID}" ]; then + echo "[INFO] Supervisor image has been updated, destroying previous container..." + docker container rm --force hassio_supervisor || true + SUPERVISOR_CONTAINER_ID="" +fi + +# If Supervisor container is missing, create it +if [ -z "${SUPERVISOR_CONTAINER_ID}" ]; then + echo "[INFO] Creating a new Supervisor container..." # shellcheck disable=SC2086 - docker container run --name hassio_supervisor \ + docker container create \ + --name hassio_supervisor \ --privileged --security-opt apparmor="hassio-supervisor" \ -v /var/run/docker.sock:/var/run/docker.sock \ -v /var/run/dbus:/var/run/dbus \ @@ -27,9 +83,10 @@ runSupervisor() { -e SUPERVISOR_SHARE=${SUPERVISOR_DATA} \ -e SUPERVISOR_NAME=hassio_supervisor \ -e SUPERVISOR_MACHINE=${SUPERVISOR_MACHINE} \ - "${SUPERVISOR_IMAGE}" -} + "${SUPERVISOR_IMAGE}:latest" +fi # Run supervisor mkdir -p ${SUPERVISOR_DATA} -([ "${SUPERVISOR_IMAGE_ID}" = "${SUPERVISOR_CONTAINER_ID}" ] && docker container start --attach hassio_supervisor) || runSupervisor +echo "[INFO] Starting the Supervisor..." +exec docker container start --attach hassio_supervisor