Report stage with error in jobs (#5784)

* Report stage with error in jobs * Copy doesn't lose track of the successful copies * Add stage to errors in api output test * revert unneessary change to import * Add tests for a bit more coverage of copy_additional_locations
2025-07-19 15:16:33 +00:00 · 2025-03-27 10:07:06 -04:00 · 2025-03-27 10:07:06 -04:00 · 9222a3c9c0
commit 9222a3c9c0
parent 92cadb4c55
6 changed files with 214 additions and 30 deletions
--- a/supervisor/backups/manager.py
+++ b/supervisor/backups/manager.py
@ -368,13 +368,15 @@ class BackupManager(FileConfiguration, JobGroup):
    ):
        """Copy a backup file to additional locations."""
        all_new_locations: dict[str | None, Path] = {}
        def copy_to_additional_locations() -> dict[str | None, Path]:
            """Copy backup file to additional locations."""
-            all_locations: dict[str | None, Path] = {}
+            nonlocal all_new_locations
            for location in locations:
                try:
                    if location == LOCATION_CLOUD_BACKUP:
-                        all_locations[LOCATION_CLOUD_BACKUP] = Path(
+                        all_new_locations[LOCATION_CLOUD_BACKUP] = Path(
                            copy(backup.tarfile, self.sys_config.path_core_backup)
                        )
                    elif location:
@ -384,11 +386,11 @@ class BackupManager(FileConfiguration, JobGroup):
                                f"{location_mount.name} is down, cannot copy to it",
                                _LOGGER.error,
                            )
-                        all_locations[location_mount.name] = Path(
+                        all_new_locations[location_mount.name] = Path(
                            copy(backup.tarfile, location_mount.local_where)
                        )
                    else:
-                        all_locations[None] = Path(
+                        all_new_locations[None] = Path(
                            copy(backup.tarfile, self.sys_config.path_backup)
                        )
                except OSError as err:
@ -401,18 +403,14 @@ class BackupManager(FileConfiguration, JobGroup):
                        raise BackupDataDiskBadMessageError(msg, _LOGGER.error) from err
                    raise BackupError(msg, _LOGGER.error) from err
            return all_locations
        try:
-            all_new_locations = await self.sys_run_in_executor(
+            await self.sys_run_in_executor(copy_to_additional_locations)
                copy_to_additional_locations
            )
        except BackupDataDiskBadMessageError:
            self.sys_resolution.add_unhealthy_reason(
                UnhealthyReason.OSERROR_BAD_MESSAGE
            )
            raise
-
+        finally:
            backup.all_locations.update(
                {
                    loc: {
--- a/supervisor/jobs/init.py
+++ b/supervisor/jobs/init.py
@ -73,10 +73,15 @@ class SupervisorJobError:
    type_: type[HassioError] = HassioError
    message: str = "Unknown error, see supervisor logs"
    stage: str | None = None
    def as_dict(self) -> dict[str, str]:
        """Return dictionary representation."""
-        return {"type": self.type_.__name__, "message": self.message}
+        return {
            "type": self.type_.__name__,
            "message": self.message,
            "stage": self.stage,
        }
@define(order=True)
@ -126,9 +131,9 @@ class SupervisorJob:
    def capture_error(self, err: HassioError | None = None) -> None:
        """Capture an error or record that an unknown error has occurred."""
        if err:
-            new_error = SupervisorJobError(type(err), str(err))
+            new_error = SupervisorJobError(type(err), str(err), self.stage)
        else:
-            new_error = SupervisorJobError()
+            new_error = SupervisorJobError(stage=self.stage)
        self.errors += [new_error]
    @contextmanager
--- a/tests/api/test_backups.py
+++ b/tests/api/test_backups.py
@ -394,7 +394,11 @@ async def test_api_backup_errors(
    assert job["child_jobs"][1]["child_jobs"][0]["name"] == "backup_addon_save"
    assert job["child_jobs"][1]["child_jobs"][0]["reference"] == "local_ssh"
    assert job["child_jobs"][1]["child_jobs"][0]["errors"] == [
-        {"type": "BackupError", "message": "Can't create backup for local_ssh"}
+        {
            "type": "BackupError",
            "message": "Can't create backup for local_ssh",
            "stage": None,
        }
    ]
    assert job["child_jobs"][2]["name"] == "backup_store_folders"
    assert job["child_jobs"][2]["reference"] == slug
@ -425,11 +429,21 @@ async def test_api_backup_errors(
    assert job["name"] == f"backup_manager_{backup_type}_backup"
    assert job["done"] is True
-    assert job["errors"] == (
+    assert job["errors"] == [
-        err := [{"type": "HomeAssistantBackupError", "message": "Backup error"}]
+        {
-    )
+            "type": "HomeAssistantBackupError",
            "message": "Backup error",
            "stage": "home_assistant",
        }
    ]
    assert job["child_jobs"][0]["name"] == "backup_store_homeassistant"
-    assert job["child_jobs"][0]["errors"] == err
+    assert job["child_jobs"][0]["errors"] == [
        {
            "type": "HomeAssistantBackupError",
            "message": "Backup error",
            "stage": None,
        }
    ]
    assert len(job["child_jobs"]) == 1
@ -625,13 +639,20 @@ async def test_upload_download(
@pytest.mark.usefixtures("path_extern", "tmp_supervisor_data")
@pytest.mark.parametrize(
-    ("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})]
+    ("backup_type", "inputs", "locations"),
    [
        ("full", {}, [None, ".cloud_backup"]),
        ("full", {}, [".cloud_backup", None]),
        ("partial", {"folders": ["ssl"]}, [None, ".cloud_backup"]),
        ("partial", {"folders": ["ssl"]}, [".cloud_backup", None]),
    ],
 )
 async def test_backup_to_multiple_locations(
    api_client: TestClient,
    coresys: CoreSys,
    backup_type: str,
    inputs: dict[str, Any],
    locations: list[str | None],
 ):
    """Test making a backup to multiple locations."""
    await coresys.core.set_state(CoreState.RUNNING)
@ -639,8 +660,7 @@ async def test_backup_to_multiple_locations(
    resp = await api_client.post(
        f"/backups/new/{backup_type}",
-        json={"name": "Multiple locations test", "location": [None, ".cloud_backup"]}
+        json={"name": "Multiple locations test", "location": locations} | inputs,
        | inputs,
    )
    assert resp.status == 200
    result = await resp.json()
@ -658,6 +678,56 @@ async def test_backup_to_multiple_locations(
    assert coresys.backups.get(slug).location is None
@pytest.mark.usefixtures("path_extern", "tmp_supervisor_data")
@pytest.mark.parametrize(
    ("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})]
 )
 async def test_backup_to_multiple_locations_error_on_copy(
    api_client: TestClient,
    coresys: CoreSys,
    backup_type: str,
    inputs: dict[str, Any],
 ):
    """Test making a backup to multiple locations that fails during copy stage."""
    await coresys.core.set_state(CoreState.RUNNING)
    coresys.hardware.disk.get_disk_free_space = lambda x: 5000
    with patch("supervisor.backups.manager.copy", side_effect=OSError):
        resp = await api_client.post(
            f"/backups/new/{backup_type}",
            json={
                "name": "Multiple locations test",
                "location": [None, ".cloud_backup"],
            }
            | inputs,
        )
    assert resp.status == 200
    result = await resp.json()
    assert result["result"] == "ok"
    slug = result["data"]["slug"]
    orig_backup = coresys.config.path_backup / f"{slug}.tar"
    assert await coresys.run_in_executor(orig_backup.exists)
    assert coresys.backups.get(slug).all_locations == {
        None: {"path": orig_backup, "protected": False, "size_bytes": 10240},
    }
    assert coresys.backups.get(slug).location is None
    resp = await api_client.get("/jobs/info")
    assert resp.status == 200
    result = await resp.json()
    assert result["data"]["jobs"][0]["name"] == f"backup_manager_{backup_type}_backup"
    assert result["data"]["jobs"][0]["reference"] == slug
    assert result["data"]["jobs"][0]["done"] is True
    assert result["data"]["jobs"][0]["errors"] == [
        {
            "type": "BackupError",
            "message": "Could not copy backup to .cloud_backup due to: ",
            "stage": "copy_additional_locations",
        }
    ]
@pytest.mark.parametrize(
    ("backup_type", "inputs"), [("full", {}), ("partial", {"folders": ["ssl"]})]
 )
--- a/tests/api/test_jobs.py
+++ b/tests/api/test_jobs.py
@ -7,6 +7,7 @@ from aiohttp.test_utils import TestClient
 import pytest
 from supervisor.coresys import CoreSys
 from supervisor.exceptions import SupervisorError
 from supervisor.jobs.const import ATTR_IGNORE_CONDITIONS, JobCondition
 from supervisor.jobs.decorator import Job
@ -317,3 +318,72 @@ async def test_jobs_sorted(api_client: TestClient, coresys: CoreSys):
            ],
        },
    ]
 async def test_job_with_error(
    api_client: TestClient,
    coresys: CoreSys,
 ):
    """Test job output with an error."""
    class TestClass:
        """Test class."""
        def __init__(self, coresys: CoreSys):
            """Initialize the test class."""
            self.coresys = coresys
        @Job(name="test_jobs_api_error_outer", cleanup=False)
        async def test_jobs_api_error_outer(self):
            """Error test outer method."""
            coresys.jobs.current.stage = "test"
            await self.test_jobs_api_error_inner()
        @Job(name="test_jobs_api_error_inner", cleanup=False)
        async def test_jobs_api_error_inner(self):
            """Error test inner method."""
            raise SupervisorError("bad")
    test = TestClass(coresys)
    with pytest.raises(SupervisorError):
        await test.test_jobs_api_error_outer()
    resp = await api_client.get("/jobs/info")
    result = await resp.json()
    assert result["data"]["jobs"] == [
        {
            "created": ANY,
            "name": "test_jobs_api_error_outer",
            "reference": None,
            "uuid": ANY,
            "progress": 0,
            "stage": "test",
            "done": True,
            "errors": [
                {
                    "type": "SupervisorError",
                    "message": "bad",
                    "stage": "test",
                }
            ],
            "child_jobs": [
                {
                    "created": ANY,
                    "name": "test_jobs_api_error_inner",
                    "reference": None,
                    "uuid": ANY,
                    "progress": 0,
                    "stage": None,
                    "done": True,
                    "errors": [
                        {
                            "type": "SupervisorError",
                            "message": "bad",
                            "stage": None,
                        }
                    ],
                    "child_jobs": [],
                },
            ],
        },
    ]
--- a/tests/backups/test_manager.py
+++ b/tests/backups/test_manager.py
@ -37,6 +37,7 @@ from supervisor.homeassistant.core import HomeAssistantCore
 from supervisor.homeassistant.module import HomeAssistant
 from supervisor.jobs.const import JobCondition
 from supervisor.mounts.mount import Mount
 from supervisor.resolution.const import UnhealthyReason
 from supervisor.utils.json import read_json_file, write_json_file
 from tests.common import get_fixture_path
@ -2079,3 +2080,41 @@ async def test_remove_non_existing_backup_raises(
    with pytest.raises(BackupFileNotFoundError):
        await coresys.backups.remove(backup)
@pytest.mark.usefixtures("tmp_supervisor_data", "path_extern")
@pytest.mark.parametrize(
    ("error_num", "unhealthy", "default_location", "additional_location"),
    [
        (errno.EBUSY, False, None, ".cloud_backup"),
        (errno.EBUSY, False, ".cloud_backup", None),
        (errno.EBADMSG, True, None, ".cloud_backup"),
        (errno.EBADMSG, True, ".cloud_backup", None),
    ],
 )
 async def test_backup_multiple_locations_oserror(
    coresys: CoreSys,
    error_num: int,
    unhealthy: bool,
    default_location: str | None,
    additional_location: str | None,
 ):
    """Test backup to multiple locations raises oserror."""
    await coresys.core.set_state(CoreState.RUNNING)
    coresys.hardware.disk.get_disk_free_space = lambda x: 5000
    with patch("supervisor.backups.manager.copy", side_effect=(err := OSError())):
        err.errno = error_num
        backup: Backup | None = await coresys.backups.do_backup_full(
            name="test",
            location=default_location,
            additional_locations=[additional_location],
        )
    assert backup
    assert coresys.backups.get(backup.slug) == backup
    assert backup.location == default_location
    assert additional_location not in backup.all_locations
    assert (
        UnhealthyReason.OSERROR_BAD_MESSAGE in coresys.resolution.unhealthy
    ) is unhealthy
--- a/tests/jobs/test_job_manager.py
+++ b/tests/jobs/test_job_manager.py
@ -195,6 +195,7 @@ async def test_notify_on_change(coresys: CoreSys, ha_ws_client: AsyncMock):
                            {
                                "type": "HassioError",
                                "message": "Unknown error, see supervisor logs",
                                "stage": "test",
                            }
                        ],
                        "created": ANY,
@ -221,6 +222,7 @@ async def test_notify_on_change(coresys: CoreSys, ha_ws_client: AsyncMock):
                        {
                            "type": "HassioError",
                            "message": "Unknown error, see supervisor logs",
                            "stage": "test",
                        }
                    ],
                    "created": ANY,