New fix for stale container on network (#2189)

* New fix for stale container on network

* fix core repair

* Clenaup before attach to network

* Fix reload

* Fix host network

* Fix prune network

* Remove refresh

* Fix unpack

* better name

* clean order

* Add comment why

* more comments

* Update supervisor/misc/tasks.py

Co-authored-by: Joakim Sørensen <joasoe@gmail.com>

Co-authored-by: Joakim Sørensen <joasoe@gmail.com>
This commit is contained in:
Pascal Vizeli 2020-10-30 10:03:05 +01:00 committed by GitHub
parent fe15bb6a30
commit f71549e3df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 78 additions and 17 deletions

View File

@ -349,10 +349,6 @@ class AddonManager(CoreSysAttributes):
for addon in needs_repair:
_LOGGER.info("Repairing for add-on: %s", addon.slug)
await self.sys_run_in_executor(
self.sys_docker.network.stale_cleanup, addon.instance.name
)
with suppress(DockerError, KeyError):
# Need pull a image again
if not addon.need_build:

View File

@ -14,6 +14,7 @@ from ..const import (
ATTR_REGISTRIES,
DNS_SUFFIX,
DOCKER_IMAGE_DENYLIST,
DOCKER_NETWORK,
FILE_HASSIO_DOCKER,
SOCKET_DOCKER,
)
@ -25,6 +26,7 @@ from .network import DockerNetwork
_LOGGER: logging.Logger = logging.getLogger(__name__)
MIN_SUPPORTED_DOCKER = "19.03.0"
DOCKER_NETWORK_HOST = "host"
@attr.s(frozen=True)
@ -171,6 +173,19 @@ class DockerAPI:
else:
with suppress(DockerError):
self.network.detach_default_bridge(container)
else:
host_network: docker.models.networks.Network = self.docker.networks.get(
DOCKER_NETWORK_HOST
)
# Check if container is register on host
# https://github.com/moby/moby/issues/23302
if name in (
val.get("Name")
for val in host_network.attrs.get("Containers", {}).values()
):
with suppress(docker.errors.NotFound):
host_network.disconnect(name, force=True)
# Run container
try:
@ -203,6 +218,7 @@ class DockerAPI:
stderr = kwargs.get("stderr", True)
_LOGGER.info("Runing command '%s' on %s", command, image)
container = None
try:
container = self.docker.containers.run(
f"{image}:{version}",
@ -222,8 +238,9 @@ class DockerAPI:
finally:
# cleanup container
with suppress(docker.errors.DockerException, requests.RequestException):
container.remove(force=True)
if container:
with suppress(docker.errors.DockerException, requests.RequestException):
container.remove(force=True)
return CommandReturn(result.get("StatusCode"), output)
@ -265,6 +282,42 @@ class DockerAPI:
except docker.errors.APIError as err:
_LOGGER.warning("Error for networks prune: %s", err)
_LOGGER.info("Fix stale container on hassio network")
try:
self.prune_networks(DOCKER_NETWORK)
except docker.errors.APIError as err:
_LOGGER.warning("Error for networks hassio prune: %s", err)
_LOGGER.info("Fix stale container on host network")
try:
self.prune_networks(DOCKER_NETWORK_HOST)
except docker.errors.APIError as err:
_LOGGER.warning("Error for networks host prune: %s", err)
def prune_networks(self, network_name: str) -> None:
"""Prune stale container from network.
Fix: https://github.com/moby/moby/issues/23302
"""
network: docker.models.networks.Network = self.docker.networks.get(network_name)
for cid, data in network.attrs.get("Containers", {}).items():
try:
self.docker.containers.get(cid)
continue
except docker.errors.NotFound:
_LOGGER.debug(
"Docker network %s is corrupt on container: %s", network_name, cid
)
except (docker.errors.DockerException, requests.RequestException):
_LOGGER.warning(
"Docker fatal error on container %s on %s", cid, network_name
)
continue
with suppress(docker.errors.DockerException, requests.RequestException):
network.disconnect(data.get("Name", cid), force=True)
def check_denylist_images(self) -> bool:
"""Return a boolean if the host has images in the denylist."""
denied_images = set()

View File

@ -33,12 +33,11 @@ class DockerNetwork:
def containers(self) -> List[docker.models.containers.Container]:
"""Return of connected containers from network."""
containers: List[docker.models.containers.Container] = []
for cid, data in self.network.attrs.get("Containers", {}).items():
for cid, _ in self.network.attrs.get("Containers", {}).items():
try:
containers.append(self.docker.containers.get(cid))
except docker.errors.NotFound:
_LOGGER.warning("Docker network is corrupt! %s - running autofix", cid)
self.stale_cleanup(data.get("Name", cid))
_LOGGER.warning("Docker network is corrupt! %s", cid)
except (docker.errors.DockerException, requests.RequestException) as err:
_LOGGER.error("Unknown error with container lookup %s", err)
@ -109,14 +108,23 @@ class DockerNetwork:
"""
ipv4_address = str(ipv4) if ipv4 else None
# Reload Network information
with suppress(docker.errors.DockerException, requests.RequestException):
self.network.reload()
# Check stale Network
if container.name in (
val.get("Name") for val in self.network.attrs.get("Containers", {}).values()
):
self.stale_cleanup(container.name)
# Attach Network
try:
self.network.connect(container, aliases=alias, ipv4_address=ipv4_address)
except docker.errors.APIError as err:
_LOGGER.error("Can't link container to hassio-net: %s", err)
raise DockerError() from err
self.network.reload()
def detach_default_bridge(
self, container: docker.models.containers.Container
) -> None:
@ -140,5 +148,9 @@ class DockerNetwork:
Fix: https://github.com/moby/moby/issues/23302
"""
with suppress(docker.errors.DockerException, requests.RequestException):
try:
self.network.disconnect(container_name, force=True)
except docker.errors.NotFound:
pass
except (docker.errors.DockerException, requests.RequestException) as err:
raise DockerError() from err

View File

@ -415,11 +415,6 @@ class HomeAssistantCore(CoreSysAttributes):
return
_LOGGER.info("Repair Home Assistant %s", self.sys_homeassistant.version)
await self.sys_run_in_executor(
self.sys_docker.network.stale_cleanup, self.instance.name
)
# Pull image
try:
await self.instance.install(self.sys_homeassistant.version)
except DockerError:

View File

@ -190,6 +190,11 @@ class Tasks(CoreSysAttributes):
except HomeAssistantError as err:
_LOGGER.error("Home Assistant watchdog reanimation failed!")
self.sys_capture_exception(err)
else:
return
_LOGGER.info("Rebuilding the Home Assistant Container")
await self.sys_homeassistant.core.rebuild()
async def _watchdog_homeassistant_api(self):
"""Create scheduler task for monitoring running state of API.