Improve error handling in backup restore (#4791)

This commit is contained in:
Mike Degatano 2023-12-29 05:45:50 -05:00 committed by GitHub
parent 37b6e09475
commit 6c66a7ba17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 132 additions and 82 deletions

View File

@ -398,10 +398,12 @@ class Backup(CoreSysAttributes):
return start_tasks
async def restore_addons(self, addon_list: list[str]) -> list[asyncio.Task]:
async def restore_addons(
self, addon_list: list[str]
) -> tuple[bool, list[asyncio.Task]]:
"""Restore a list add-on from backup."""
async def _addon_restore(addon_slug: str) -> asyncio.Task | None:
async def _addon_restore(addon_slug: str) -> tuple[bool, asyncio.Task | None]:
"""Task to restore an add-on into backup."""
tar_name = f"{addon_slug}.tar{'.gz' if self.compressed else ''}"
addon_file = SecureTarFile(
@ -415,25 +417,31 @@ class Backup(CoreSysAttributes):
# If exists inside backup
if not addon_file.path.exists():
_LOGGER.error("Can't find backup %s", addon_slug)
return
return (False, None)
# Perform a restore
try:
return await self.sys_addons.restore(addon_slug, addon_file)
return (True, await self.sys_addons.restore(addon_slug, addon_file))
except AddonsError:
_LOGGER.error("Can't restore backup %s", addon_slug)
return (False, None)
# Save Add-ons sequential
# avoid issue on slow IO
start_tasks: list[asyncio.Task] = []
success = True
for slug in addon_list:
try:
if start_task := await _addon_restore(slug):
start_tasks.append(start_task)
addon_success, start_task = await _addon_restore(slug)
except Exception as err: # pylint: disable=broad-except
_LOGGER.warning("Can't restore Add-on %s: %s", slug, err)
success = False
else:
success = success and addon_success
if start_task:
start_tasks.append(start_task)
return start_tasks
return (success, start_tasks)
async def store_folders(self, folder_list: list[str]):
"""Backup Supervisor data into backup."""
@ -483,10 +491,11 @@ class Backup(CoreSysAttributes):
f"Can't backup folder {folder}: {str(err)}", _LOGGER.error
) from err
async def restore_folders(self, folder_list: list[str]):
async def restore_folders(self, folder_list: list[str]) -> bool:
"""Backup Supervisor data into backup."""
success = True
async def _folder_restore(name: str) -> None:
async def _folder_restore(name: str) -> bool:
"""Intenal function to restore a folder."""
slug_name = name.replace("/", "_")
tar_name = Path(
@ -497,7 +506,7 @@ class Backup(CoreSysAttributes):
# Check if exists inside backup
if not tar_name.exists():
_LOGGER.warning("Can't find restore folder %s", name)
return
return False
# Unmount any mounts within folder
bind_mounts = [
@ -516,7 +525,7 @@ class Backup(CoreSysAttributes):
await remove_folder(origin_dir, content_only=True)
# Perform a restore
def _restore() -> None:
def _restore() -> bool:
try:
_LOGGER.info("Restore folder %s", name)
with SecureTarFile(
@ -530,9 +539,11 @@ class Backup(CoreSysAttributes):
_LOGGER.info("Restore folder %s done", name)
except (tarfile.TarError, OSError) as err:
_LOGGER.warning("Can't restore folder %s: %s", name, err)
return False
return True
try:
await self.sys_run_in_executor(_restore)
return await self.sys_run_in_executor(_restore)
finally:
if bind_mounts:
await asyncio.gather(
@ -543,9 +554,11 @@ class Backup(CoreSysAttributes):
# avoid issue on slow IO
for folder in folder_list:
try:
await _folder_restore(folder)
success = success and await _folder_restore(folder)
except Exception as err: # pylint: disable=broad-except
_LOGGER.warning("Can't restore folder %s: %s", folder, err)
success = False
return success
async def store_homeassistant(self, exclude_database: bool = False):
"""Backup Home Assistant Core configuration folder."""
@ -604,12 +617,12 @@ class Backup(CoreSysAttributes):
"""Store repository list into backup."""
self.repositories = self.sys_store.repository_urls
async def restore_repositories(self, replace: bool = False):
def restore_repositories(self, replace: bool = False) -> Awaitable[None]:
"""Restore repositories from backup.
Return a coroutine.
"""
await self.sys_store.update_repositories(
return self.sys_store.update_repositories(
self.repositories, add_with_errors=True, replace=replace
)

View File

@ -15,7 +15,7 @@ from ..const import (
CoreState,
)
from ..dbus.const import UnitActiveState
from ..exceptions import AddonsError, BackupError, BackupJobError
from ..exceptions import AddonsError, BackupError, BackupInvalidError, BackupJobError
from ..jobs.const import JOB_GROUP_BACKUP_MANAGER, JobCondition, JobExecutionLimit
from ..jobs.decorator import Job
from ..jobs.job_group import JobGroup
@ -388,6 +388,7 @@ class BackupManager(FileConfiguration, JobGroup):
Must be called from an existing restore job.
"""
addon_start_tasks: list[Awaitable[None]] | None = None
success = True
try:
task_hass: asyncio.Task | None = None
@ -399,7 +400,7 @@ class BackupManager(FileConfiguration, JobGroup):
# Process folders
if folder_list:
self._change_stage(RestoreJobStage.FOLDERS, backup)
await backup.restore_folders(folder_list)
success = await backup.restore_folders(folder_list)
# Process Home-Assistant
if homeassistant:
@ -419,13 +420,17 @@ class BackupManager(FileConfiguration, JobGroup):
await self.sys_addons.uninstall(addon.slug)
except AddonsError:
_LOGGER.warning("Can't uninstall Add-on %s", addon.slug)
success = False
if addon_list:
self._change_stage(RestoreJobStage.ADDON_REPOSITORIES, backup)
await backup.restore_repositories(replace)
self._change_stage(RestoreJobStage.ADDONS, backup)
addon_start_tasks = await backup.restore_addons(addon_list)
restore_success, addon_start_tasks = await backup.restore_addons(
addon_list
)
success = success and restore_success
# Wait for Home Assistant Core update/downgrade
if task_hass:
@ -433,18 +438,24 @@ class BackupManager(FileConfiguration, JobGroup):
RestoreJobStage.AWAIT_HOME_ASSISTANT_RESTART, backup
)
await task_hass
except BackupError:
raise
except Exception as err: # pylint: disable=broad-except
_LOGGER.exception("Restore %s error", backup.slug)
capture_exception(err)
return False
raise BackupError(
f"Restore {backup.slug} error, check logs for details"
) from err
else:
if addon_start_tasks:
self._change_stage(RestoreJobStage.AWAIT_ADDON_RESTARTS, backup)
# Ignore exceptions from waiting for addon startup, addon errors handled elsewhere
await asyncio.gather(*addon_start_tasks, return_exceptions=True)
# Failure to resume addons post restore is still a restore failure
if any(
await asyncio.gather(*addon_start_tasks, return_exceptions=True)
):
return False
return True
return success
finally:
# Leave Home Assistant alone if it wasn't part of the restore
if homeassistant:
@ -479,32 +490,34 @@ class BackupManager(FileConfiguration, JobGroup):
self.sys_jobs.current.reference = backup.slug
if backup.sys_type != BackupType.FULL:
_LOGGER.error("%s is only a partial backup!", backup.slug)
return False
raise BackupInvalidError(
f"{backup.slug} is only a partial backup!", _LOGGER.error
)
if backup.protected and not backup.set_password(password):
_LOGGER.error("Invalid password for backup %s", backup.slug)
return False
raise BackupInvalidError(
f"Invalid password for backup {backup.slug}", _LOGGER.error
)
if backup.supervisor_version > self.sys_supervisor.version:
_LOGGER.error(
"Backup was made on supervisor version %s, can't restore on %s. Must update supervisor first.",
backup.supervisor_version,
self.sys_supervisor.version,
raise BackupInvalidError(
f"Backup was made on supervisor version {backup.supervisor_version}, "
f"can't restore on {self.sys_supervisor.version}. Must update supervisor first.",
_LOGGER.error,
)
return False
_LOGGER.info("Full-Restore %s start", backup.slug)
self.sys_core.state = CoreState.FREEZE
# Stop Home-Assistant / Add-ons
await self.sys_core.shutdown()
try:
# Stop Home-Assistant / Add-ons
await self.sys_core.shutdown()
success = await self._do_restore(
backup, backup.addon_list, backup.folders, True, True
)
self.sys_core.state = CoreState.RUNNING
success = await self._do_restore(
backup, backup.addon_list, backup.folders, True, True
)
finally:
self.sys_core.state = CoreState.RUNNING
if success:
_LOGGER.info("Full-Restore %s done", backup.slug)
@ -543,29 +556,31 @@ class BackupManager(FileConfiguration, JobGroup):
homeassistant = True
if backup.protected and not backup.set_password(password):
_LOGGER.error("Invalid password for backup %s", backup.slug)
return False
raise BackupInvalidError(
f"Invalid password for backup {backup.slug}", _LOGGER.error
)
if backup.homeassistant is None and homeassistant:
_LOGGER.error("No Home Assistant Core data inside the backup")
return False
raise BackupInvalidError(
"No Home Assistant Core data inside the backup", _LOGGER.error
)
if backup.supervisor_version > self.sys_supervisor.version:
_LOGGER.error(
"Backup was made on supervisor version %s, can't restore on %s. Must update supervisor first.",
backup.supervisor_version,
self.sys_supervisor.version,
raise BackupInvalidError(
f"Backup was made on supervisor version {backup.supervisor_version}, "
f"can't restore on {self.sys_supervisor.version}. Must update supervisor first.",
_LOGGER.error,
)
return False
_LOGGER.info("Partial-Restore %s start", backup.slug)
self.sys_core.state = CoreState.FREEZE
success = await self._do_restore(
backup, addon_list, folder_list, homeassistant, False
)
self.sys_core.state = CoreState.RUNNING
try:
success = await self._do_restore(
backup, addon_list, folder_list, homeassistant, False
)
finally:
self.sys_core.state = CoreState.RUNNING
if success:
_LOGGER.info("Partial-Restore %s done", backup.slug)

View File

@ -593,6 +593,10 @@ class HomeAssistantBackupError(BackupError, HomeAssistantError):
"""Raise if an error during Home Assistant Core backup is happening."""
class BackupInvalidError(BackupError):
"""Raise if backup or password provided is invalid."""
class BackupJobError(BackupError, JobException):
"""Raise on Backup job error."""

View File

@ -481,7 +481,8 @@ class HomeAssistant(FileConfiguration, CoreSysAttributes):
ATTR_REFRESH_TOKEN,
ATTR_WATCHDOG,
):
self._data[attr] = data[attr]
if attr in data:
self._data[attr] = data[attr]
@Job(
name="home_assistant_get_users",

View File

@ -21,9 +21,9 @@ def fixture_backup_mock():
backup_instance.store_folders = AsyncMock(return_value=None)
backup_instance.store_homeassistant = AsyncMock(return_value=None)
backup_instance.store_addons = AsyncMock(return_value=None)
backup_instance.restore_folders = AsyncMock(return_value=None)
backup_instance.restore_folders = AsyncMock(return_value=True)
backup_instance.restore_homeassistant = AsyncMock(return_value=None)
backup_instance.restore_addons = AsyncMock(return_value=None)
backup_instance.restore_addons = AsyncMock(return_value=(True, []))
backup_instance.restore_repositories = AsyncMock(return_value=None)
yield backup_mock

View File

@ -22,7 +22,13 @@ from supervisor.docker.addon import DockerAddon
from supervisor.docker.const import ContainerState
from supervisor.docker.homeassistant import DockerHomeAssistant
from supervisor.docker.monitor import DockerContainerStateEvent
from supervisor.exceptions import AddonsError, BackupError, BackupJobError, DockerError
from supervisor.exceptions import (
AddonsError,
BackupError,
BackupInvalidError,
BackupJobError,
DockerError,
)
from supervisor.homeassistant.api import HomeAssistantAPI
from supervisor.homeassistant.core import HomeAssistantCore
from supervisor.homeassistant.module import HomeAssistant
@ -200,7 +206,7 @@ async def test_do_restore_full(coresys: CoreSys, full_backup_mock, install_addon
manager = BackupManager(coresys)
backup_instance = full_backup_mock.return_value
await manager.do_restore_full(backup_instance)
assert await manager.do_restore_full(backup_instance)
backup_instance.restore_homeassistant.assert_called_once()
backup_instance.restore_repositories.assert_called_once()
@ -229,7 +235,7 @@ async def test_do_restore_full_different_addon(
backup_instance = full_backup_mock.return_value
backup_instance.addon_list = ["differentslug"]
await manager.do_restore_full(backup_instance)
assert await manager.do_restore_full(backup_instance)
backup_instance.restore_homeassistant.assert_called_once()
backup_instance.restore_repositories.assert_called_once()
@ -256,7 +262,7 @@ async def test_do_restore_partial_minimal(
manager = BackupManager(coresys)
backup_instance = partial_backup_mock.return_value
await manager.do_restore_partial(backup_instance, homeassistant=False)
assert await manager.do_restore_partial(backup_instance, homeassistant=False)
backup_instance.restore_homeassistant.assert_not_called()
backup_instance.restore_repositories.assert_not_called()
@ -280,7 +286,7 @@ async def test_do_restore_partial_maximal(coresys: CoreSys, partial_backup_mock)
manager = BackupManager(coresys)
backup_instance = partial_backup_mock.return_value
await manager.do_restore_partial(
assert await manager.do_restore_partial(
backup_instance,
addons=[TEST_ADDON_SLUG],
folders=[FOLDER_SHARE, FOLDER_HOMEASSISTANT],
@ -299,25 +305,31 @@ async def test_do_restore_partial_maximal(coresys: CoreSys, partial_backup_mock)
assert coresys.core.state == CoreState.RUNNING
async def test_fail_invalid_full_backup(coresys: CoreSys, full_backup_mock: MagicMock):
async def test_fail_invalid_full_backup(
coresys: CoreSys, full_backup_mock: MagicMock, partial_backup_mock: MagicMock
):
"""Test restore fails with invalid backup."""
coresys.core.state = CoreState.RUNNING
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
manager = BackupManager(coresys)
with pytest.raises(BackupInvalidError):
await manager.do_restore_full(partial_backup_mock.return_value)
backup_instance = full_backup_mock.return_value
backup_instance.protected = True
backup_instance.set_password.return_value = False
assert await manager.do_restore_full(backup_instance) is False
with pytest.raises(BackupInvalidError):
await manager.do_restore_full(backup_instance)
backup_instance.protected = False
backup_instance.supervisor_version = "2022.08.4"
with patch.object(
type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3")
):
assert await manager.do_restore_full(backup_instance) is False
), pytest.raises(BackupInvalidError):
await manager.do_restore_full(backup_instance)
async def test_fail_invalid_partial_backup(
@ -333,20 +345,20 @@ async def test_fail_invalid_partial_backup(
backup_instance.protected = True
backup_instance.set_password.return_value = False
assert await manager.do_restore_partial(backup_instance) is False
with pytest.raises(BackupInvalidError):
await manager.do_restore_partial(backup_instance)
backup_instance.protected = False
backup_instance.homeassistant = None
assert (
await manager.do_restore_partial(backup_instance, homeassistant=True) is False
)
with pytest.raises(BackupInvalidError):
await manager.do_restore_partial(backup_instance, homeassistant=True)
backup_instance.supervisor_version = "2022.08.4"
with patch.object(
type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3")
):
assert await manager.do_restore_partial(backup_instance) is False
), pytest.raises(BackupInvalidError):
await manager.do_restore_partial(backup_instance)
async def test_backup_error(
@ -368,15 +380,20 @@ async def test_backup_error(
async def test_restore_error(
coresys: CoreSys, full_backup_mock: MagicMock, capture_exception: Mock
):
"""Test restoring full Backup."""
"""Test restoring full Backup with errors."""
coresys.core.state = CoreState.RUNNING
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
coresys.homeassistant.core.start = AsyncMock(return_value=None)
backup_instance = full_backup_mock.return_value
backup_instance.restore_dockerconfig.side_effect = (err := DockerError())
await coresys.backups.do_restore_full(backup_instance)
backup_instance.restore_dockerconfig.side_effect = BackupError()
with pytest.raises(BackupError):
await coresys.backups.do_restore_full(backup_instance)
capture_exception.assert_not_called()
backup_instance.restore_dockerconfig.side_effect = (err := DockerError())
with pytest.raises(BackupError):
await coresys.backups.do_restore_full(backup_instance)
capture_exception.assert_called_once_with(err)
@ -641,17 +658,17 @@ async def test_partial_backup_to_mount(
"test", homeassistant=True, location=mount
)
assert (mount_dir / f"{backup.slug}.tar").exists()
assert (mount_dir / f"{backup.slug}.tar").exists()
# Reload and check that backups in mounts are listed
await coresys.backups.reload()
assert coresys.backups.get(backup.slug)
# Reload and check that backups in mounts are listed
await coresys.backups.reload()
assert coresys.backups.get(backup.slug)
# Remove marker file and restore. Confirm it comes back
marker.unlink()
# Remove marker file and restore. Confirm it comes back
marker.unlink()
with patch.object(DockerHomeAssistant, "is_running", return_value=True):
await coresys.backups.do_restore_partial(backup, homeassistant=True)
with patch.object(DockerHomeAssistant, "is_running", return_value=True):
await coresys.backups.do_restore_partial(backup, homeassistant=True)
assert marker.exists()