Report stage with error in jobs (#5784)

* Report stage with error in jobs

* Copy doesn't lose track of the successful copies

* Add stage to errors in api output test

* revert unneessary change to import

* Add tests for a bit more coverage of copy_additional_locations
This commit is contained in:
Mike Degatano 2025-03-27 10:07:06 -04:00 committed by GitHub
parent 92cadb4c55
commit 9222a3c9c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 214 additions and 30 deletions

View File

@ -368,13 +368,15 @@ class BackupManager(FileConfiguration, JobGroup):
): ):
"""Copy a backup file to additional locations.""" """Copy a backup file to additional locations."""
all_new_locations: dict[str | None, Path] = {}
def copy_to_additional_locations() -> dict[str | None, Path]: def copy_to_additional_locations() -> dict[str | None, Path]:
"""Copy backup file to additional locations.""" """Copy backup file to additional locations."""
all_locations: dict[str | None, Path] = {} nonlocal all_new_locations
for location in locations: for location in locations:
try: try:
if location == LOCATION_CLOUD_BACKUP: if location == LOCATION_CLOUD_BACKUP:
all_locations[LOCATION_CLOUD_BACKUP] = Path( all_new_locations[LOCATION_CLOUD_BACKUP] = Path(
copy(backup.tarfile, self.sys_config.path_core_backup) copy(backup.tarfile, self.sys_config.path_core_backup)
) )
elif location: elif location:
@ -384,11 +386,11 @@ class BackupManager(FileConfiguration, JobGroup):
f"{location_mount.name} is down, cannot copy to it", f"{location_mount.name} is down, cannot copy to it",
_LOGGER.error, _LOGGER.error,
) )
all_locations[location_mount.name] = Path( all_new_locations[location_mount.name] = Path(
copy(backup.tarfile, location_mount.local_where) copy(backup.tarfile, location_mount.local_where)
) )
else: else:
all_locations[None] = Path( all_new_locations[None] = Path(
copy(backup.tarfile, self.sys_config.path_backup) copy(backup.tarfile, self.sys_config.path_backup)
) )
except OSError as err: except OSError as err:
@ -401,18 +403,14 @@ class BackupManager(FileConfiguration, JobGroup):
raise BackupDataDiskBadMessageError(msg, _LOGGER.error) from err raise BackupDataDiskBadMessageError(msg, _LOGGER.error) from err
raise BackupError(msg, _LOGGER.error) from err raise BackupError(msg, _LOGGER.error) from err
return all_locations
try: try:
all_new_locations = await self.sys_run_in_executor( await self.sys_run_in_executor(copy_to_additional_locations)
copy_to_additional_locations
)
except BackupDataDiskBadMessageError: except BackupDataDiskBadMessageError:
self.sys_resolution.add_unhealthy_reason( self.sys_resolution.add_unhealthy_reason(
UnhealthyReason.OSERROR_BAD_MESSAGE UnhealthyReason.OSERROR_BAD_MESSAGE
) )
raise raise
finally:
backup.all_locations.update( backup.all_locations.update(
{ {
loc: { loc: {

View File

@ -73,10 +73,15 @@ class SupervisorJobError:
type_: type[HassioError] = HassioError type_: type[HassioError] = HassioError
message: str = "Unknown error, see supervisor logs" message: str = "Unknown error, see supervisor logs"
stage: str | None = None
def as_dict(self) -> dict[str, str]: def as_dict(self) -> dict[str, str]:
"""Return dictionary representation.""" """Return dictionary representation."""
return {"type": self.type_.__name__, "message": self.message} return {
"type": self.type_.__name__,
"message": self.message,
"stage": self.stage,
}
@define(order=True) @define(order=True)
@ -126,9 +131,9 @@ class SupervisorJob:
def capture_error(self, err: HassioError | None = None) -> None: def capture_error(self, err: HassioError | None = None) -> None:
"""Capture an error or record that an unknown error has occurred.""" """Capture an error or record that an unknown error has occurred."""
if err: if err:
new_error = SupervisorJobError(type(err), str(err)) new_error = SupervisorJobError(type(err), str(err), self.stage)
else: else:
new_error = SupervisorJobError() new_error = SupervisorJobError(stage=self.stage)
self.errors += [new_error] self.errors += [new_error]
@contextmanager @contextmanager

View File

@ -394,7 +394,11 @@ async def test_api_backup_errors(
assert job["child_jobs"][1]["child_jobs"][0]["name"] == "backup_addon_save" assert job["child_jobs"][1]["child_jobs"][0]["name"] == "backup_addon_save"
assert job["child_jobs"][1]["child_jobs"][0]["reference"] == "local_ssh" assert job["child_jobs"][1]["child_jobs"][0]["reference"] == "local_ssh"
assert job["child_jobs"][1]["child_jobs"][0]["errors"] == [ assert job["child_jobs"][1]["child_jobs"][0]["errors"] == [
{"type": "BackupError", "message": "Can't create backup for local_ssh"} {
"type": "BackupError",
"message": "Can't create backup for local_ssh",
"stage": None,
}
] ]
assert job["child_jobs"][2]["name"] == "backup_store_folders" assert job["child_jobs"][2]["name"] == "backup_store_folders"
assert job["child_jobs"][2]["reference"] == slug assert job["child_jobs"][2]["reference"] == slug
@ -425,11 +429,21 @@ async def test_api_backup_errors(
assert job["name"] == f"backup_manager_{backup_type}_backup" assert job["name"] == f"backup_manager_{backup_type}_backup"
assert job["done"] is True assert job["done"] is True
assert job["errors"] == ( assert job["errors"] == [
err := [{"type": "HomeAssistantBackupError", "message": "Backup error"}] {
) "type": "HomeAssistantBackupError",
"message": "Backup error",
"stage": "home_assistant",
}
]
assert job["child_jobs"][0]["name"] == "backup_store_homeassistant" assert job["child_jobs"][0]["name"] == "backup_store_homeassistant"
assert job["child_jobs"][0]["errors"] == err assert job["child_jobs"][0]["errors"] == [
{
"type": "HomeAssistantBackupError",
"message": "Backup error",
"stage": None,
}
]
assert len(job["child_jobs"]) == 1 assert len(job["child_jobs"]) == 1
@ -625,13 +639,20 @@ async def test_upload_download(
@pytest.mark.usefixtures("path_extern", "tmp_supervisor_data") @pytest.mark.usefixtures("path_extern", "tmp_supervisor_data")
@pytest.mark.parametrize( @pytest.mark.parametrize(
("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})] ("backup_type", "inputs", "locations"),
[
("full", {}, [None, ".cloud_backup"]),
("full", {}, [".cloud_backup", None]),
("partial", {"folders": ["ssl"]}, [None, ".cloud_backup"]),
("partial", {"folders": ["ssl"]}, [".cloud_backup", None]),
],
) )
async def test_backup_to_multiple_locations( async def test_backup_to_multiple_locations(
api_client: TestClient, api_client: TestClient,
coresys: CoreSys, coresys: CoreSys,
backup_type: str, backup_type: str,
inputs: dict[str, Any], inputs: dict[str, Any],
locations: list[str | None],
): ):
"""Test making a backup to multiple locations.""" """Test making a backup to multiple locations."""
await coresys.core.set_state(CoreState.RUNNING) await coresys.core.set_state(CoreState.RUNNING)
@ -639,8 +660,7 @@ async def test_backup_to_multiple_locations(
resp = await api_client.post( resp = await api_client.post(
f"/backups/new/{backup_type}", f"/backups/new/{backup_type}",
json={"name": "Multiple locations test", "location": [None, ".cloud_backup"]} json={"name": "Multiple locations test", "location": locations} | inputs,
| inputs,
) )
assert resp.status == 200 assert resp.status == 200
result = await resp.json() result = await resp.json()
@ -658,6 +678,56 @@ async def test_backup_to_multiple_locations(
assert coresys.backups.get(slug).location is None assert coresys.backups.get(slug).location is None
@pytest.mark.usefixtures("path_extern", "tmp_supervisor_data")
@pytest.mark.parametrize(
("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})]
)
async def test_backup_to_multiple_locations_error_on_copy(
api_client: TestClient,
coresys: CoreSys,
backup_type: str,
inputs: dict[str, Any],
):
"""Test making a backup to multiple locations that fails during copy stage."""
await coresys.core.set_state(CoreState.RUNNING)
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
with patch("supervisor.backups.manager.copy", side_effect=OSError):
resp = await api_client.post(
f"/backups/new/{backup_type}",
json={
"name": "Multiple locations test",
"location": [None, ".cloud_backup"],
}
| inputs,
)
assert resp.status == 200
result = await resp.json()
assert result["result"] == "ok"
slug = result["data"]["slug"]
orig_backup = coresys.config.path_backup / f"{slug}.tar"
assert await coresys.run_in_executor(orig_backup.exists)
assert coresys.backups.get(slug).all_locations == {
None: {"path": orig_backup, "protected": False, "size_bytes": 10240},
}
assert coresys.backups.get(slug).location is None
resp = await api_client.get("/jobs/info")
assert resp.status == 200
result = await resp.json()
assert result["data"]["jobs"][0]["name"] == f"backup_manager_{backup_type}_backup"
assert result["data"]["jobs"][0]["reference"] == slug
assert result["data"]["jobs"][0]["done"] is True
assert result["data"]["jobs"][0]["errors"] == [
{
"type": "BackupError",
"message": "Could not copy backup to .cloud_backup due to: ",
"stage": "copy_additional_locations",
}
]
@pytest.mark.parametrize( @pytest.mark.parametrize(
("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})] ("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})]
) )

View File

@ -7,6 +7,7 @@ from aiohttp.test_utils import TestClient
import pytest import pytest
from supervisor.coresys import CoreSys from supervisor.coresys import CoreSys
from supervisor.exceptions import SupervisorError
from supervisor.jobs.const import ATTR_IGNORE_CONDITIONS, JobCondition from supervisor.jobs.const import ATTR_IGNORE_CONDITIONS, JobCondition
from supervisor.jobs.decorator import Job from supervisor.jobs.decorator import Job
@ -317,3 +318,72 @@ async def test_jobs_sorted(api_client: TestClient, coresys: CoreSys):
], ],
}, },
] ]
async def test_job_with_error(
api_client: TestClient,
coresys: CoreSys,
):
"""Test job output with an error."""
class TestClass:
"""Test class."""
def __init__(self, coresys: CoreSys):
"""Initialize the test class."""
self.coresys = coresys
@Job(name="test_jobs_api_error_outer", cleanup=False)
async def test_jobs_api_error_outer(self):
"""Error test outer method."""
coresys.jobs.current.stage = "test"
await self.test_jobs_api_error_inner()
@Job(name="test_jobs_api_error_inner", cleanup=False)
async def test_jobs_api_error_inner(self):
"""Error test inner method."""
raise SupervisorError("bad")
test = TestClass(coresys)
with pytest.raises(SupervisorError):
await test.test_jobs_api_error_outer()
resp = await api_client.get("/jobs/info")
result = await resp.json()
assert result["data"]["jobs"] == [
{
"created": ANY,
"name": "test_jobs_api_error_outer",
"reference": None,
"uuid": ANY,
"progress": 0,
"stage": "test",
"done": True,
"errors": [
{
"type": "SupervisorError",
"message": "bad",
"stage": "test",
}
],
"child_jobs": [
{
"created": ANY,
"name": "test_jobs_api_error_inner",
"reference": None,
"uuid": ANY,
"progress": 0,
"stage": None,
"done": True,
"errors": [
{
"type": "SupervisorError",
"message": "bad",
"stage": None,
}
],
"child_jobs": [],
},
],
},
]

View File

@ -37,6 +37,7 @@ from supervisor.homeassistant.core import HomeAssistantCore
from supervisor.homeassistant.module import HomeAssistant from supervisor.homeassistant.module import HomeAssistant
from supervisor.jobs.const import JobCondition from supervisor.jobs.const import JobCondition
from supervisor.mounts.mount import Mount from supervisor.mounts.mount import Mount
from supervisor.resolution.const import UnhealthyReason
from supervisor.utils.json import read_json_file, write_json_file from supervisor.utils.json import read_json_file, write_json_file
from tests.common import get_fixture_path from tests.common import get_fixture_path
@ -2079,3 +2080,41 @@ async def test_remove_non_existing_backup_raises(
with pytest.raises(BackupFileNotFoundError): with pytest.raises(BackupFileNotFoundError):
await coresys.backups.remove(backup) await coresys.backups.remove(backup)
@pytest.mark.usefixtures("tmp_supervisor_data", "path_extern")
@pytest.mark.parametrize(
("error_num", "unhealthy", "default_location", "additional_location"),
[
(errno.EBUSY, False, None, ".cloud_backup"),
(errno.EBUSY, False, ".cloud_backup", None),
(errno.EBADMSG, True, None, ".cloud_backup"),
(errno.EBADMSG, True, ".cloud_backup", None),
],
)
async def test_backup_multiple_locations_oserror(
coresys: CoreSys,
error_num: int,
unhealthy: bool,
default_location: str | None,
additional_location: str | None,
):
"""Test backup to multiple locations raises oserror."""
await coresys.core.set_state(CoreState.RUNNING)
coresys.hardware.disk.get_disk_free_space = lambda x: 5000
with patch("supervisor.backups.manager.copy", side_effect=(err := OSError())):
err.errno = error_num
backup: Backup | None = await coresys.backups.do_backup_full(
name="test",
location=default_location,
additional_locations=[additional_location],
)
assert backup
assert coresys.backups.get(backup.slug) == backup
assert backup.location == default_location
assert additional_location not in backup.all_locations
assert (
UnhealthyReason.OSERROR_BAD_MESSAGE in coresys.resolution.unhealthy
) is unhealthy

View File

@ -195,6 +195,7 @@ async def test_notify_on_change(coresys: CoreSys, ha_ws_client: AsyncMock):
{ {
"type": "HassioError", "type": "HassioError",
"message": "Unknown error, see supervisor logs", "message": "Unknown error, see supervisor logs",
"stage": "test",
} }
], ],
"created": ANY, "created": ANY,
@ -221,6 +222,7 @@ async def test_notify_on_change(coresys: CoreSys, ha_ws_client: AsyncMock):
{ {
"type": "HassioError", "type": "HassioError",
"message": "Unknown error, see supervisor logs", "message": "Unknown error, see supervisor logs",
"stage": "test",
} }
], ],
"created": ANY, "created": ANY,