Improve error handling in backup restore (#4791)

This commit is contained in:
Mike Degatano 2023-12-29 05:45:50 -05:00 committed by GitHub
parent 37b6e09475
commit 6c66a7ba17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 132 additions and 82 deletions

View File

@ -398,10 +398,12 @@ class Backup(CoreSysAttributes):
return start_tasks return start_tasks
async def restore_addons(self, addon_list: list[str]) -> list[asyncio.Task]: async def restore_addons(
self, addon_list: list[str]
) -> tuple[bool, list[asyncio.Task]]:
"""Restore a list add-on from backup.""" """Restore a list add-on from backup."""
async def _addon_restore(addon_slug: str) -> asyncio.Task | None: async def _addon_restore(addon_slug: str) -> tuple[bool, asyncio.Task | None]:
"""Task to restore an add-on into backup.""" """Task to restore an add-on into backup."""
tar_name = f"{addon_slug}.tar{'.gz' if self.compressed else ''}" tar_name = f"{addon_slug}.tar{'.gz' if self.compressed else ''}"
addon_file = SecureTarFile( addon_file = SecureTarFile(
@ -415,25 +417,31 @@ class Backup(CoreSysAttributes):
# If exists inside backup # If exists inside backup
if not addon_file.path.exists(): if not addon_file.path.exists():
_LOGGER.error("Can't find backup %s", addon_slug) _LOGGER.error("Can't find backup %s", addon_slug)
return return (False, None)
# Perform a restore # Perform a restore
try: try:
return await self.sys_addons.restore(addon_slug, addon_file) return (True, await self.sys_addons.restore(addon_slug, addon_file))
except AddonsError: except AddonsError:
_LOGGER.error("Can't restore backup %s", addon_slug) _LOGGER.error("Can't restore backup %s", addon_slug)
return (False, None)
# Save Add-ons sequential # Save Add-ons sequential
# avoid issue on slow IO # avoid issue on slow IO
start_tasks: list[asyncio.Task] = [] start_tasks: list[asyncio.Task] = []
success = True
for slug in addon_list: for slug in addon_list:
try: try:
if start_task := await _addon_restore(slug): addon_success, start_task = await _addon_restore(slug)
start_tasks.append(start_task)
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
_LOGGER.warning("Can't restore Add-on %s: %s", slug, err) _LOGGER.warning("Can't restore Add-on %s: %s", slug, err)
success = False
else:
success = success and addon_success
if start_task:
start_tasks.append(start_task)
return start_tasks return (success, start_tasks)
async def store_folders(self, folder_list: list[str]): async def store_folders(self, folder_list: list[str]):
"""Backup Supervisor data into backup.""" """Backup Supervisor data into backup."""
@ -483,10 +491,11 @@ class Backup(CoreSysAttributes):
f"Can't backup folder {folder}: {str(err)}", _LOGGER.error f"Can't backup folder {folder}: {str(err)}", _LOGGER.error
) from err ) from err
async def restore_folders(self, folder_list: list[str]): async def restore_folders(self, folder_list: list[str]) -> bool:
"""Backup Supervisor data into backup.""" """Backup Supervisor data into backup."""
success = True
async def _folder_restore(name: str) -> None: async def _folder_restore(name: str) -> bool:
"""Intenal function to restore a folder.""" """Intenal function to restore a folder."""
slug_name = name.replace("/", "_") slug_name = name.replace("/", "_")
tar_name = Path( tar_name = Path(
@ -497,7 +506,7 @@ class Backup(CoreSysAttributes):
# Check if exists inside backup # Check if exists inside backup
if not tar_name.exists(): if not tar_name.exists():
_LOGGER.warning("Can't find restore folder %s", name) _LOGGER.warning("Can't find restore folder %s", name)
return return False
# Unmount any mounts within folder # Unmount any mounts within folder
bind_mounts = [ bind_mounts = [
@ -516,7 +525,7 @@ class Backup(CoreSysAttributes):
await remove_folder(origin_dir, content_only=True) await remove_folder(origin_dir, content_only=True)
# Perform a restore # Perform a restore
def _restore() -> None: def _restore() -> bool:
try: try:
_LOGGER.info("Restore folder %s", name) _LOGGER.info("Restore folder %s", name)
with SecureTarFile( with SecureTarFile(
@ -530,9 +539,11 @@ class Backup(CoreSysAttributes):
_LOGGER.info("Restore folder %s done", name) _LOGGER.info("Restore folder %s done", name)
except (tarfile.TarError, OSError) as err: except (tarfile.TarError, OSError) as err:
_LOGGER.warning("Can't restore folder %s: %s", name, err) _LOGGER.warning("Can't restore folder %s: %s", name, err)
return False
return True
try: try:
await self.sys_run_in_executor(_restore) return await self.sys_run_in_executor(_restore)
finally: finally:
if bind_mounts: if bind_mounts:
await asyncio.gather( await asyncio.gather(
@ -543,9 +554,11 @@ class Backup(CoreSysAttributes):
# avoid issue on slow IO # avoid issue on slow IO
for folder in folder_list: for folder in folder_list:
try: try:
await _folder_restore(folder) success = success and await _folder_restore(folder)
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
_LOGGER.warning("Can't restore folder %s: %s", folder, err) _LOGGER.warning("Can't restore folder %s: %s", folder, err)
success = False
return success
async def store_homeassistant(self, exclude_database: bool = False): async def store_homeassistant(self, exclude_database: bool = False):
"""Backup Home Assistant Core configuration folder.""" """Backup Home Assistant Core configuration folder."""
@ -604,12 +617,12 @@ class Backup(CoreSysAttributes):
"""Store repository list into backup.""" """Store repository list into backup."""
self.repositories = self.sys_store.repository_urls self.repositories = self.sys_store.repository_urls
async def restore_repositories(self, replace: bool = False): def restore_repositories(self, replace: bool = False) -> Awaitable[None]:
"""Restore repositories from backup. """Restore repositories from backup.
Return a coroutine. Return a coroutine.
""" """
await self.sys_store.update_repositories( return self.sys_store.update_repositories(
self.repositories, add_with_errors=True, replace=replace self.repositories, add_with_errors=True, replace=replace
) )

View File

@ -15,7 +15,7 @@ from ..const import (
CoreState, CoreState,
) )
from ..dbus.const import UnitActiveState from ..dbus.const import UnitActiveState
from ..exceptions import AddonsError, BackupError, BackupJobError from ..exceptions import AddonsError, BackupError, BackupInvalidError, BackupJobError
from ..jobs.const import JOB_GROUP_BACKUP_MANAGER, JobCondition, JobExecutionLimit from ..jobs.const import JOB_GROUP_BACKUP_MANAGER, JobCondition, JobExecutionLimit
from ..jobs.decorator import Job from ..jobs.decorator import Job
from ..jobs.job_group import JobGroup from ..jobs.job_group import JobGroup
@ -388,6 +388,7 @@ class BackupManager(FileConfiguration, JobGroup):
Must be called from an existing restore job. Must be called from an existing restore job.
""" """
addon_start_tasks: list[Awaitable[None]] | None = None addon_start_tasks: list[Awaitable[None]] | None = None
success = True
try: try:
task_hass: asyncio.Task | None = None task_hass: asyncio.Task | None = None
@ -399,7 +400,7 @@ class BackupManager(FileConfiguration, JobGroup):
# Process folders # Process folders
if folder_list: if folder_list:
self._change_stage(RestoreJobStage.FOLDERS, backup) self._change_stage(RestoreJobStage.FOLDERS, backup)
await backup.restore_folders(folder_list) success = await backup.restore_folders(folder_list)
# Process Home-Assistant # Process Home-Assistant
if homeassistant: if homeassistant:
@ -419,13 +420,17 @@ class BackupManager(FileConfiguration, JobGroup):
await self.sys_addons.uninstall(addon.slug) await self.sys_addons.uninstall(addon.slug)
except AddonsError: except AddonsError:
_LOGGER.warning("Can't uninstall Add-on %s", addon.slug) _LOGGER.warning("Can't uninstall Add-on %s", addon.slug)
success = False
if addon_list: if addon_list:
self._change_stage(RestoreJobStage.ADDON_REPOSITORIES, backup) self._change_stage(RestoreJobStage.ADDON_REPOSITORIES, backup)
await backup.restore_repositories(replace) await backup.restore_repositories(replace)
self._change_stage(RestoreJobStage.ADDONS, backup) self._change_stage(RestoreJobStage.ADDONS, backup)
addon_start_tasks = await backup.restore_addons(addon_list) restore_success, addon_start_tasks = await backup.restore_addons(
addon_list
)
success = success and restore_success
# Wait for Home Assistant Core update/downgrade # Wait for Home Assistant Core update/downgrade
if task_hass: if task_hass:
@ -433,18 +438,24 @@ class BackupManager(FileConfiguration, JobGroup):
RestoreJobStage.AWAIT_HOME_ASSISTANT_RESTART, backup RestoreJobStage.AWAIT_HOME_ASSISTANT_RESTART, backup
) )
await task_hass await task_hass
except BackupError:
raise
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
_LOGGER.exception("Restore %s error", backup.slug) _LOGGER.exception("Restore %s error", backup.slug)
capture_exception(err) capture_exception(err)
return False raise BackupError(
f"Restore {backup.slug} error, check logs for details"
) from err
else: else:
if addon_start_tasks: if addon_start_tasks:
self._change_stage(RestoreJobStage.AWAIT_ADDON_RESTARTS, backup) self._change_stage(RestoreJobStage.AWAIT_ADDON_RESTARTS, backup)
# Ignore exceptions from waiting for addon startup, addon errors handled elsewhere # Failure to resume addons post restore is still a restore failure
await asyncio.gather(*addon_start_tasks, return_exceptions=True) if any(
await asyncio.gather(*addon_start_tasks, return_exceptions=True)
):
return False
return True return success
finally: finally:
# Leave Home Assistant alone if it wasn't part of the restore # Leave Home Assistant alone if it wasn't part of the restore
if homeassistant: if homeassistant:
@ -479,32 +490,34 @@ class BackupManager(FileConfiguration, JobGroup):
self.sys_jobs.current.reference = backup.slug self.sys_jobs.current.reference = backup.slug
if backup.sys_type != BackupType.FULL: if backup.sys_type != BackupType.FULL:
_LOGGER.error("%s is only a partial backup!", backup.slug) raise BackupInvalidError(
return False f"{backup.slug} is only a partial backup!", _LOGGER.error
)
if backup.protected and not backup.set_password(password): if backup.protected and not backup.set_password(password):
_LOGGER.error("Invalid password for backup %s", backup.slug) raise BackupInvalidError(
return False f"Invalid password for backup {backup.slug}", _LOGGER.error
)
if backup.supervisor_version > self.sys_supervisor.version: if backup.supervisor_version > self.sys_supervisor.version:
_LOGGER.error( raise BackupInvalidError(
"Backup was made on supervisor version %s, can't restore on %s. Must update supervisor first.", f"Backup was made on supervisor version {backup.supervisor_version}, "
backup.supervisor_version, f"can't restore on {self.sys_supervisor.version}. Must update supervisor first.",
self.sys_supervisor.version, _LOGGER.error,
) )
return False
_LOGGER.info("Full-Restore %s start", backup.slug) _LOGGER.info("Full-Restore %s start", backup.slug)
self.sys_core.state = CoreState.FREEZE self.sys_core.state = CoreState.FREEZE
# Stop Home-Assistant / Add-ons try:
await self.sys_core.shutdown() # Stop Home-Assistant / Add-ons
await self.sys_core.shutdown()
success = await self._do_restore( success = await self._do_restore(
backup, backup.addon_list, backup.folders, True, True backup, backup.addon_list, backup.folders, True, True
) )
finally:
self.sys_core.state = CoreState.RUNNING self.sys_core.state = CoreState.RUNNING
if success: if success:
_LOGGER.info("Full-Restore %s done", backup.slug) _LOGGER.info("Full-Restore %s done", backup.slug)
@ -543,29 +556,31 @@ class BackupManager(FileConfiguration, JobGroup):
homeassistant = True homeassistant = True
if backup.protected and not backup.set_password(password): if backup.protected and not backup.set_password(password):
_LOGGER.error("Invalid password for backup %s", backup.slug) raise BackupInvalidError(
return False f"Invalid password for backup {backup.slug}", _LOGGER.error
)
if backup.homeassistant is None and homeassistant: if backup.homeassistant is None and homeassistant:
_LOGGER.error("No Home Assistant Core data inside the backup") raise BackupInvalidError(
return False "No Home Assistant Core data inside the backup", _LOGGER.error
)
if backup.supervisor_version > self.sys_supervisor.version: if backup.supervisor_version > self.sys_supervisor.version:
_LOGGER.error( raise BackupInvalidError(
"Backup was made on supervisor version %s, can't restore on %s. Must update supervisor first.", f"Backup was made on supervisor version {backup.supervisor_version}, "
backup.supervisor_version, f"can't restore on {self.sys_supervisor.version}. Must update supervisor first.",
self.sys_supervisor.version, _LOGGER.error,
) )
return False
_LOGGER.info("Partial-Restore %s start", backup.slug) _LOGGER.info("Partial-Restore %s start", backup.slug)
self.sys_core.state = CoreState.FREEZE self.sys_core.state = CoreState.FREEZE
success = await self._do_restore( try:
backup, addon_list, folder_list, homeassistant, False success = await self._do_restore(
) backup, addon_list, folder_list, homeassistant, False
)
self.sys_core.state = CoreState.RUNNING finally:
self.sys_core.state = CoreState.RUNNING
if success: if success:
_LOGGER.info("Partial-Restore %s done", backup.slug) _LOGGER.info("Partial-Restore %s done", backup.slug)

View File

@ -593,6 +593,10 @@ class HomeAssistantBackupError(BackupError, HomeAssistantError):
"""Raise if an error during Home Assistant Core backup is happening.""" """Raise if an error during Home Assistant Core backup is happening."""
class BackupInvalidError(BackupError):
"""Raise if backup or password provided is invalid."""
class BackupJobError(BackupError, JobException): class BackupJobError(BackupError, JobException):
"""Raise on Backup job error.""" """Raise on Backup job error."""

View File

@ -481,7 +481,8 @@ class HomeAssistant(FileConfiguration, CoreSysAttributes):
ATTR_REFRESH_TOKEN, ATTR_REFRESH_TOKEN,
ATTR_WATCHDOG, ATTR_WATCHDOG,
): ):
self._data[attr] = data[attr] if attr in data:
self._data[attr] = data[attr]
@Job( @Job(
name="home_assistant_get_users", name="home_assistant_get_users",

View File

@ -21,9 +21,9 @@ def fixture_backup_mock():
backup_instance.store_folders = AsyncMock(return_value=None) backup_instance.store_folders = AsyncMock(return_value=None)
backup_instance.store_homeassistant = AsyncMock(return_value=None) backup_instance.store_homeassistant = AsyncMock(return_value=None)
backup_instance.store_addons = AsyncMock(return_value=None) backup_instance.store_addons = AsyncMock(return_value=None)
backup_instance.restore_folders = AsyncMock(return_value=None) backup_instance.restore_folders = AsyncMock(return_value=True)
backup_instance.restore_homeassistant = AsyncMock(return_value=None) backup_instance.restore_homeassistant = AsyncMock(return_value=None)
backup_instance.restore_addons = AsyncMock(return_value=None) backup_instance.restore_addons = AsyncMock(return_value=(True, []))
backup_instance.restore_repositories = AsyncMock(return_value=None) backup_instance.restore_repositories = AsyncMock(return_value=None)
yield backup_mock yield backup_mock

View File

@ -22,7 +22,13 @@ from supervisor.docker.addon import DockerAddon
from supervisor.docker.const import ContainerState from supervisor.docker.const import ContainerState
from supervisor.docker.homeassistant import DockerHomeAssistant from supervisor.docker.homeassistant import DockerHomeAssistant
from supervisor.docker.monitor import DockerContainerStateEvent from supervisor.docker.monitor import DockerContainerStateEvent
from supervisor.exceptions import AddonsError, BackupError, BackupJobError, DockerError from supervisor.exceptions import (
AddonsError,
BackupError,
BackupInvalidError,
BackupJobError,
DockerError,
)
from supervisor.homeassistant.api import HomeAssistantAPI from supervisor.homeassistant.api import HomeAssistantAPI
from supervisor.homeassistant.core import HomeAssistantCore from supervisor.homeassistant.core import HomeAssistantCore
from supervisor.homeassistant.module import HomeAssistant from supervisor.homeassistant.module import HomeAssistant
@ -200,7 +206,7 @@ async def test_do_restore_full(coresys: CoreSys, full_backup_mock, install_addon
manager = BackupManager(coresys) manager = BackupManager(coresys)
backup_instance = full_backup_mock.return_value backup_instance = full_backup_mock.return_value
await manager.do_restore_full(backup_instance) assert await manager.do_restore_full(backup_instance)
backup_instance.restore_homeassistant.assert_called_once() backup_instance.restore_homeassistant.assert_called_once()
backup_instance.restore_repositories.assert_called_once() backup_instance.restore_repositories.assert_called_once()
@ -229,7 +235,7 @@ async def test_do_restore_full_different_addon(
backup_instance = full_backup_mock.return_value backup_instance = full_backup_mock.return_value
backup_instance.addon_list = ["differentslug"] backup_instance.addon_list = ["differentslug"]
await manager.do_restore_full(backup_instance) assert await manager.do_restore_full(backup_instance)
backup_instance.restore_homeassistant.assert_called_once() backup_instance.restore_homeassistant.assert_called_once()
backup_instance.restore_repositories.assert_called_once() backup_instance.restore_repositories.assert_called_once()
@ -256,7 +262,7 @@ async def test_do_restore_partial_minimal(
manager = BackupManager(coresys) manager = BackupManager(coresys)
backup_instance = partial_backup_mock.return_value backup_instance = partial_backup_mock.return_value
await manager.do_restore_partial(backup_instance, homeassistant=False) assert await manager.do_restore_partial(backup_instance, homeassistant=False)
backup_instance.restore_homeassistant.assert_not_called() backup_instance.restore_homeassistant.assert_not_called()
backup_instance.restore_repositories.assert_not_called() backup_instance.restore_repositories.assert_not_called()
@ -280,7 +286,7 @@ async def test_do_restore_partial_maximal(coresys: CoreSys, partial_backup_mock)
manager = BackupManager(coresys) manager = BackupManager(coresys)
backup_instance = partial_backup_mock.return_value backup_instance = partial_backup_mock.return_value
await manager.do_restore_partial( assert await manager.do_restore_partial(
backup_instance, backup_instance,
addons=[TEST_ADDON_SLUG], addons=[TEST_ADDON_SLUG],
folders=[FOLDER_SHARE, FOLDER_HOMEASSISTANT], folders=[FOLDER_SHARE, FOLDER_HOMEASSISTANT],
@ -299,25 +305,31 @@ async def test_do_restore_partial_maximal(coresys: CoreSys, partial_backup_mock)
assert coresys.core.state == CoreState.RUNNING assert coresys.core.state == CoreState.RUNNING
async def test_fail_invalid_full_backup(coresys: CoreSys, full_backup_mock: MagicMock): async def test_fail_invalid_full_backup(
coresys: CoreSys, full_backup_mock: MagicMock, partial_backup_mock: MagicMock
):
"""Test restore fails with invalid backup.""" """Test restore fails with invalid backup."""
coresys.core.state = CoreState.RUNNING coresys.core.state = CoreState.RUNNING
coresys.hardware.disk.get_disk_free_space = lambda x: 5000 coresys.hardware.disk.get_disk_free_space = lambda x: 5000
manager = BackupManager(coresys) manager = BackupManager(coresys)
with pytest.raises(BackupInvalidError):
await manager.do_restore_full(partial_backup_mock.return_value)
backup_instance = full_backup_mock.return_value backup_instance = full_backup_mock.return_value
backup_instance.protected = True backup_instance.protected = True
backup_instance.set_password.return_value = False backup_instance.set_password.return_value = False
assert await manager.do_restore_full(backup_instance) is False with pytest.raises(BackupInvalidError):
await manager.do_restore_full(backup_instance)
backup_instance.protected = False backup_instance.protected = False
backup_instance.supervisor_version = "2022.08.4" backup_instance.supervisor_version = "2022.08.4"
with patch.object( with patch.object(
type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3") type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3")
): ), pytest.raises(BackupInvalidError):
assert await manager.do_restore_full(backup_instance) is False await manager.do_restore_full(backup_instance)
async def test_fail_invalid_partial_backup( async def test_fail_invalid_partial_backup(
@ -333,20 +345,20 @@ async def test_fail_invalid_partial_backup(
backup_instance.protected = True backup_instance.protected = True
backup_instance.set_password.return_value = False backup_instance.set_password.return_value = False
assert await manager.do_restore_partial(backup_instance) is False with pytest.raises(BackupInvalidError):
await manager.do_restore_partial(backup_instance)
backup_instance.protected = False backup_instance.protected = False
backup_instance.homeassistant = None backup_instance.homeassistant = None
assert ( with pytest.raises(BackupInvalidError):
await manager.do_restore_partial(backup_instance, homeassistant=True) is False await manager.do_restore_partial(backup_instance, homeassistant=True)
)
backup_instance.supervisor_version = "2022.08.4" backup_instance.supervisor_version = "2022.08.4"
with patch.object( with patch.object(
type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3") type(coresys.supervisor), "version", new=PropertyMock(return_value="2022.08.3")
): ), pytest.raises(BackupInvalidError):
assert await manager.do_restore_partial(backup_instance) is False await manager.do_restore_partial(backup_instance)
async def test_backup_error( async def test_backup_error(
@ -368,15 +380,20 @@ async def test_backup_error(
async def test_restore_error( async def test_restore_error(
coresys: CoreSys, full_backup_mock: MagicMock, capture_exception: Mock coresys: CoreSys, full_backup_mock: MagicMock, capture_exception: Mock
): ):
"""Test restoring full Backup.""" """Test restoring full Backup with errors."""
coresys.core.state = CoreState.RUNNING coresys.core.state = CoreState.RUNNING
coresys.hardware.disk.get_disk_free_space = lambda x: 5000 coresys.hardware.disk.get_disk_free_space = lambda x: 5000
coresys.homeassistant.core.start = AsyncMock(return_value=None) coresys.homeassistant.core.start = AsyncMock(return_value=None)
backup_instance = full_backup_mock.return_value backup_instance = full_backup_mock.return_value
backup_instance.restore_dockerconfig.side_effect = (err := DockerError()) backup_instance.restore_dockerconfig.side_effect = BackupError()
await coresys.backups.do_restore_full(backup_instance) with pytest.raises(BackupError):
await coresys.backups.do_restore_full(backup_instance)
capture_exception.assert_not_called()
backup_instance.restore_dockerconfig.side_effect = (err := DockerError())
with pytest.raises(BackupError):
await coresys.backups.do_restore_full(backup_instance)
capture_exception.assert_called_once_with(err) capture_exception.assert_called_once_with(err)
@ -641,17 +658,17 @@ async def test_partial_backup_to_mount(
"test", homeassistant=True, location=mount "test", homeassistant=True, location=mount
) )
assert (mount_dir / f"{backup.slug}.tar").exists() assert (mount_dir / f"{backup.slug}.tar").exists()
# Reload and check that backups in mounts are listed # Reload and check that backups in mounts are listed
await coresys.backups.reload() await coresys.backups.reload()
assert coresys.backups.get(backup.slug) assert coresys.backups.get(backup.slug)
# Remove marker file and restore. Confirm it comes back # Remove marker file and restore. Confirm it comes back
marker.unlink() marker.unlink()
with patch.object(DockerHomeAssistant, "is_running", return_value=True): with patch.object(DockerHomeAssistant, "is_running", return_value=True):
await coresys.backups.do_restore_partial(backup, homeassistant=True) await coresys.backups.do_restore_partial(backup, homeassistant=True)
assert marker.exists() assert marker.exists()