Implement upload retry logic in CloudBackupAgent (#135062)

* Implement upload retry logic in CloudBackupAgent

* Update backup.py

Co-authored-by: Erik Montnemery <erik@montnemery.com>

* nit

---------

Co-authored-by: Erik Montnemery <erik@montnemery.com>
This commit is contained in:
Joakim Sørensen 2025-01-08 08:16:18 +01:00 committed by GitHub
parent a1d43b9387
commit 20db7fdc96
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 77 additions and 27 deletions

View File

@ -2,10 +2,12 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import base64 import base64
from collections.abc import AsyncIterator, Callable, Coroutine, Mapping from collections.abc import AsyncIterator, Callable, Coroutine, Mapping
import hashlib import hashlib
import logging import logging
import random
from typing import Any from typing import Any
from aiohttp import ClientError, ClientTimeout from aiohttp import ClientError, ClientTimeout
@ -27,6 +29,9 @@ from .const import DATA_CLOUD, DOMAIN, EVENT_CLOUD_EVENT
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
_STORAGE_BACKUP = "backup" _STORAGE_BACKUP = "backup"
_RETRY_LIMIT = 5
_RETRY_SECONDS_MIN = 60
_RETRY_SECONDS_MAX = 600
async def _b64md5(stream: AsyncIterator[bytes]) -> str: async def _b64md5(stream: AsyncIterator[bytes]) -> str:
@ -125,6 +130,44 @@ class CloudBackupAgent(BackupAgent):
return ChunkAsyncStreamIterator(resp.content) return ChunkAsyncStreamIterator(resp.content)
async def _async_do_upload_backup(
self,
*,
open_stream: Callable[[], Coroutine[Any, Any, AsyncIterator[bytes]]],
filename: str,
base64md5hash: str,
metadata: dict[str, Any],
size: int,
) -> None:
"""Upload a backup."""
try:
details = await async_files_upload_details(
self._cloud,
storage_type=_STORAGE_BACKUP,
filename=filename,
metadata=metadata,
size=size,
base64md5hash=base64md5hash,
)
except (ClientError, CloudError) as err:
raise BackupAgentError("Failed to get upload details") from err
try:
upload_status = await self._cloud.websession.put(
details["url"],
data=await open_stream(),
headers=details["headers"] | {"content-length": str(size)},
timeout=ClientTimeout(connect=10.0, total=43200.0), # 43200s == 12h
)
_LOGGER.log(
logging.DEBUG if upload_status.status < 400 else logging.WARNING,
"Backup upload status: %s",
upload_status.status,
)
upload_status.raise_for_status()
except (TimeoutError, ClientError) as err:
raise BackupAgentError("Failed to upload backup") from err
async def async_upload_backup( async def async_upload_backup(
self, self,
*, *,
@ -141,34 +184,34 @@ class CloudBackupAgent(BackupAgent):
raise BackupAgentError("Cloud backups must be protected") raise BackupAgentError("Cloud backups must be protected")
base64md5hash = await _b64md5(await open_stream()) base64md5hash = await _b64md5(await open_stream())
filename = self._get_backup_filename()
metadata = backup.as_dict()
size = backup.size
try: tries = 1
details = await async_files_upload_details( while tries <= _RETRY_LIMIT:
self._cloud, try:
storage_type=_STORAGE_BACKUP, await self._async_do_upload_backup(
filename=self._get_backup_filename(), open_stream=open_stream,
metadata=backup.as_dict(), filename=filename,
size=backup.size, base64md5hash=base64md5hash,
base64md5hash=base64md5hash, metadata=metadata,
) size=size,
except (ClientError, CloudError) as err: )
raise BackupAgentError("Failed to get upload details") from err break
except BackupAgentError as err:
try: if tries == _RETRY_LIMIT:
upload_status = await self._cloud.websession.put( raise
details["url"], tries += 1
data=await open_stream(), retry_timer = random.randint(_RETRY_SECONDS_MIN, _RETRY_SECONDS_MAX)
headers=details["headers"] | {"content-length": str(backup.size)}, _LOGGER.info(
timeout=ClientTimeout(connect=10.0, total=43200.0), # 43200s == 12h "Failed to upload backup, retrying (%s/%s) in %ss: %s",
) tries,
_LOGGER.log( _RETRY_LIMIT,
logging.DEBUG if upload_status.status < 400 else logging.WARNING, retry_timer,
"Backup upload status: %s", err,
upload_status.status, )
) await asyncio.sleep(retry_timer)
upload_status.raise_for_status()
except (TimeoutError, ClientError) as err:
raise BackupAgentError("Failed to upload backup") from err
async def async_delete_backup( async def async_delete_backup(
self, self,

View File

@ -389,6 +389,7 @@ async def test_agents_upload_fail_put(
aioclient_mock: AiohttpClientMocker, aioclient_mock: AiohttpClientMocker,
mock_get_upload_details: Mock, mock_get_upload_details: Mock,
put_mock_kwargs: dict[str, Any], put_mock_kwargs: dict[str, Any],
caplog: pytest.LogCaptureFixture,
) -> None: ) -> None:
"""Test agent upload backup fails.""" """Test agent upload backup fails."""
client = await hass_client() client = await hass_client()
@ -417,6 +418,9 @@ async def test_agents_upload_fail_put(
return_value=test_backup, return_value=test_backup,
), ),
patch("pathlib.Path.open") as mocked_open, patch("pathlib.Path.open") as mocked_open,
patch("homeassistant.components.cloud.backup.asyncio.sleep"),
patch("homeassistant.components.cloud.backup.random.randint", return_value=60),
patch("homeassistant.components.cloud.backup._RETRY_LIMIT", 2),
): ):
mocked_open.return_value.read = Mock(side_effect=[b"test", b""]) mocked_open.return_value.read = Mock(side_effect=[b"test", b""])
fetch_backup.return_value = test_backup fetch_backup.return_value = test_backup
@ -426,6 +430,8 @@ async def test_agents_upload_fail_put(
) )
await hass.async_block_till_done() await hass.async_block_till_done()
assert len(aioclient_mock.mock_calls) == 2
assert "Failed to upload backup, retrying (2/2) in 60s" in caplog.text
assert resp.status == 201 assert resp.status == 201
store_backups = hass_storage[BACKUP_DOMAIN]["data"]["backups"] store_backups = hass_storage[BACKUP_DOMAIN]["data"]["backups"]
assert len(store_backups) == 1 assert len(store_backups) == 1
@ -469,6 +475,7 @@ async def test_agents_upload_fail_cloud(
return_value=test_backup, return_value=test_backup,
), ),
patch("pathlib.Path.open") as mocked_open, patch("pathlib.Path.open") as mocked_open,
patch("homeassistant.components.cloud.backup.asyncio.sleep"),
): ):
mocked_open.return_value.read = Mock(side_effect=[b"test", b""]) mocked_open.return_value.read = Mock(side_effect=[b"test", b""])
fetch_backup.return_value = test_backup fetch_backup.return_value = test_backup