From 4f28a284be5a5965e050b3953f718b7d3d6c8140 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 9 Nov 2020 13:05:54 +0100 Subject: [PATCH] Make self healing capabilities more robust (#960) In case a container image is corrupted `docker inspect` might fail: # docker inspect --format='{{.Id}}' "${SUPERVISOR_IMAGE}" Error response from daemon: readlink /mnt/data/docker/overlay2: invalid argument In that same state the `docker images` command still shows the images. Since `docker inspect` returns an error SUPERVISOR_IMAGE_ID will be empty and a simple `docker pull` will be attempted. That does not suffice to recover from a corrupted container image. Use `docker images` to get the image ids and make sure to delete all image ids found by that command. Also don't use RuntimeDirectory since it deletes the runtime directory between the service start attempts which defeats the purpose. --- .../usr/lib/systemd/system/hassos-supervisor.service | 1 - .../rootfs-overlay/usr/sbin/hassos-supervisor | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/buildroot-external/rootfs-overlay/usr/lib/systemd/system/hassos-supervisor.service b/buildroot-external/rootfs-overlay/usr/lib/systemd/system/hassos-supervisor.service index 68f39ad89..9d7be631c 100644 --- a/buildroot-external/rootfs-overlay/usr/lib/systemd/system/hassos-supervisor.service +++ b/buildroot-external/rootfs-overlay/usr/lib/systemd/system/hassos-supervisor.service @@ -13,7 +13,6 @@ ConditionPathExists=/run/docker.sock Type=simple Restart=always RestartSec=5s -RuntimeDirectory=supervisor ExecStartPre=-/usr/bin/docker stop hassio_supervisor ExecStart=/usr/sbin/hassos-supervisor ExecStop=-/usr/bin/docker stop hassio_supervisor diff --git a/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor b/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor index ce3a4311a..4f367c327 100755 --- a/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor +++ b/buildroot-external/rootfs-overlay/usr/sbin/hassos-supervisor @@ -12,7 +12,7 @@ set -e SUPERVISOR_STARTUP_MARKER="/run/supervisor/startup-marker" SUPERVISOR_IMAGE="homeassistant/${SUPERVISOR_ARCH}-hassio-supervisor" SUPERVISOR_DATA=/mnt/data/supervisor -SUPERVISOR_IMAGE_ID=$(docker inspect --format='{{.Id}}' "${SUPERVISOR_IMAGE}" || echo "") +SUPERVISOR_IMAGE_ID=$(docker images --no-trunc --filter "reference=${SUPERVISOR_IMAGE}:latest" --format "{{.ID}}" || echo "") SUPERVISOR_CONTAINER_ID=$(docker inspect --format='{{.Image}}' hassio_supervisor || echo "") # Check if previous run left the startup-marker in place. If so, we assume the @@ -22,11 +22,15 @@ if [ -f "${SUPERVISOR_STARTUP_MARKER}" ]; then echo "[WARNING] Supervisor container did not remove the startup marker file. Assuming container image or container corruption." docker container rm --force hassio_supervisor || true SUPERVISOR_CONTAINER_ID="" - docker rmi --force "${SUPERVISOR_IMAGE_ID}" || true + # Make sure we delete all supervisor images + SUPERVISOR_IMAGE_IDS=$(docker images --no-trunc --filter "reference=${SUPERVISOR_IMAGE}" --format "{{.ID}}" | uniq || echo "") + docker image rm --force "${SUPERVISOR_IMAGE_IDS}" || true SUPERVISOR_IMAGE_ID="" fi # If Supervisor image is missing, pull it +mkdir -p "$(dirname ${SUPERVISOR_STARTUP_MARKER})" +touch ${SUPERVISOR_STARTUP_MARKER} if [ -z "${SUPERVISOR_IMAGE_ID}" ]; then # Get the latest from update information # Using updater information instead of config. If the config version is @@ -80,6 +84,5 @@ fi # Run supervisor mkdir -p ${SUPERVISOR_DATA} -touch ${SUPERVISOR_STARTUP_MARKER} echo "[INFO] Starting the Supervisor..." exec docker container start --attach hassio_supervisor