Files
core/homeassistant/components/backblaze_b2/backup.py
ElCruncharino dcb2087f4b Add backblaze b2 backup integration (#149627)
Co-authored-by: Hugo van Rijswijk <git@hugovr.nl>
Co-authored-by: ElCruncharino <ElCruncharino@users.noreply.github.com>
Co-authored-by: Erik Montnemery <erik@montnemery.com>
2025-10-30 08:42:02 +01:00

616 lines
22 KiB
Python

"""Backup platform for the Backblaze B2 integration."""
import asyncio
from collections.abc import AsyncIterator, Callable, Coroutine
import functools
import json
import logging
import mimetypes
from time import time
from typing import Any
from b2sdk.v2 import FileVersion
from b2sdk.v2.exception import B2Error
from homeassistant.components.backup import (
AgentBackup,
BackupAgent,
BackupAgentError,
BackupNotFound,
suggested_filename,
)
from homeassistant.core import HomeAssistant, callback
from homeassistant.util.async_iterator import AsyncIteratorReader
from . import BackblazeConfigEntry
from .const import (
CONF_PREFIX,
DATA_BACKUP_AGENT_LISTENERS,
DOMAIN,
METADATA_FILE_SUFFIX,
METADATA_VERSION,
)
_LOGGER = logging.getLogger(__name__)
# Cache TTL for backup list (in seconds)
CACHE_TTL = 300
def suggested_filenames(backup: AgentBackup) -> tuple[str, str]:
"""Return the suggested filenames for the backup and metadata files."""
base_name = suggested_filename(backup).rsplit(".", 1)[0]
return f"{base_name}.tar", f"{base_name}.metadata.json"
def _parse_metadata(raw_content: str) -> dict[str, Any]:
"""Parse metadata content from JSON."""
try:
data = json.loads(raw_content)
except json.JSONDecodeError as err:
raise ValueError(f"Invalid JSON format: {err}") from err
else:
if not isinstance(data, dict):
raise TypeError("JSON content is not a dictionary")
return data
def _find_backup_file_for_metadata(
metadata_filename: str, all_files: dict[str, FileVersion], prefix: str
) -> FileVersion | None:
"""Find corresponding backup file for metadata file."""
base_name = metadata_filename[len(prefix) :].removesuffix(METADATA_FILE_SUFFIX)
return next(
(
file
for name, file in all_files.items()
if name.startswith(prefix + base_name)
and name.endswith(".tar")
and name != metadata_filename
),
None,
)
def _create_backup_from_metadata(
metadata_content: dict[str, Any], backup_file: FileVersion
) -> AgentBackup:
"""Construct an AgentBackup from parsed metadata content and the associated backup file."""
metadata = metadata_content["backup_metadata"]
metadata["size"] = backup_file.size
return AgentBackup.from_dict(metadata)
def handle_b2_errors[T](
func: Callable[..., Coroutine[Any, Any, T]],
) -> Callable[..., Coroutine[Any, Any, T]]:
"""Handle B2Errors by converting them to BackupAgentError."""
@functools.wraps(func)
async def wrapper(*args: Any, **kwargs: Any) -> T:
"""Catch B2Error and raise BackupAgentError."""
try:
return await func(*args, **kwargs)
except B2Error as err:
error_msg = f"Failed during {func.__name__}"
raise BackupAgentError(error_msg) from err
return wrapper
async def async_get_backup_agents(
hass: HomeAssistant,
) -> list[BackupAgent]:
"""Return a list of backup agents for all configured Backblaze B2 entries."""
entries: list[BackblazeConfigEntry] = hass.config_entries.async_loaded_entries(
DOMAIN
)
return [BackblazeBackupAgent(hass, entry) for entry in entries]
@callback
def async_register_backup_agents_listener(
hass: HomeAssistant,
*,
listener: Callable[[], None],
**kwargs: Any,
) -> Callable[[], None]:
"""Register a listener to be called when backup agents are added or removed.
:return: A function to unregister the listener.
"""
hass.data.setdefault(DATA_BACKUP_AGENT_LISTENERS, []).append(listener)
@callback
def remove_listener() -> None:
"""Remove the listener."""
hass.data[DATA_BACKUP_AGENT_LISTENERS].remove(listener)
if not hass.data[DATA_BACKUP_AGENT_LISTENERS]:
hass.data.pop(DATA_BACKUP_AGENT_LISTENERS, None)
return remove_listener
class BackblazeBackupAgent(BackupAgent):
"""Backup agent for Backblaze B2 cloud storage."""
domain = DOMAIN
def __init__(self, hass: HomeAssistant, entry: BackblazeConfigEntry) -> None:
"""Initialize the Backblaze B2 agent."""
super().__init__()
self._hass = hass
self._bucket = entry.runtime_data
self._prefix = entry.data[CONF_PREFIX]
self.name = entry.title
self.unique_id = entry.entry_id
self._all_files_cache: dict[str, FileVersion] = {}
self._all_files_cache_expiration: float = 0.0
self._backup_list_cache: dict[str, AgentBackup] = {}
self._backup_list_cache_expiration: float = 0.0
self._all_files_cache_lock = asyncio.Lock()
self._backup_list_cache_lock = asyncio.Lock()
def _is_cache_valid(self, expiration_time: float) -> bool:
"""Check if cache is still valid based on expiration time."""
return time() <= expiration_time
async def _cleanup_failed_upload(self, filename: str) -> None:
"""Clean up a partially uploaded file after upload failure."""
_LOGGER.warning(
"Attempting to delete partially uploaded main backup file %s "
"due to metadata upload failure",
filename,
)
try:
uploaded_main_file_info = await self._hass.async_add_executor_job(
self._bucket.get_file_info_by_name, filename
)
await self._hass.async_add_executor_job(uploaded_main_file_info.delete)
except B2Error:
_LOGGER.debug(
"Failed to clean up partially uploaded main backup file %s. "
"Manual intervention may be required to delete it from Backblaze B2",
filename,
exc_info=True,
)
else:
_LOGGER.debug(
"Successfully deleted partially uploaded main backup file %s", filename
)
async def _get_file_for_download(self, backup_id: str) -> FileVersion:
"""Get backup file for download, raising if not found."""
file, _ = await self._find_file_and_metadata_version_by_id(backup_id)
if not file:
raise BackupNotFound(f"Backup {backup_id} not found")
return file
@handle_b2_errors
async def async_download_backup(
self, backup_id: str, **kwargs: Any
) -> AsyncIterator[bytes]:
"""Download a backup from Backblaze B2."""
file = await self._get_file_for_download(backup_id)
_LOGGER.debug("Downloading %s", file.file_name)
downloaded_file = await self._hass.async_add_executor_job(file.download)
response = downloaded_file.response
async def stream_response() -> AsyncIterator[bytes]:
"""Stream the response into an AsyncIterator."""
try:
iterator = response.iter_content(chunk_size=1024 * 1024)
while True:
chunk = await self._hass.async_add_executor_job(
next, iterator, None
)
if chunk is None:
break
yield chunk
finally:
_LOGGER.debug("Finished streaming download for %s", file.file_name)
return stream_response()
@handle_b2_errors
async def async_upload_backup(
self,
*,
open_stream: Callable[[], Coroutine[Any, Any, AsyncIterator[bytes]]],
backup: AgentBackup,
**kwargs: Any,
) -> None:
"""Upload a backup to Backblaze B2.
This involves uploading the main backup archive and a separate metadata JSON file.
"""
tar_filename, metadata_filename = suggested_filenames(backup)
prefixed_tar_filename = self._prefix + tar_filename
prefixed_metadata_filename = self._prefix + metadata_filename
metadata_content_bytes = json.dumps(
{
"metadata_version": METADATA_VERSION,
"backup_id": backup.backup_id,
"backup_metadata": backup.as_dict(),
}
).encode("utf-8")
_LOGGER.debug(
"Uploading backup: %s, and metadata: %s",
prefixed_tar_filename,
prefixed_metadata_filename,
)
upload_successful = False
try:
await self._upload_backup_file(prefixed_tar_filename, open_stream, {})
_LOGGER.debug(
"Main backup file upload finished for %s", prefixed_tar_filename
)
_LOGGER.debug("Uploading metadata file: %s", prefixed_metadata_filename)
await self._upload_metadata_file(
metadata_content_bytes, prefixed_metadata_filename
)
_LOGGER.debug(
"Metadata file upload finished for %s", prefixed_metadata_filename
)
upload_successful = True
finally:
if upload_successful:
_LOGGER.debug("Backup upload complete: %s", prefixed_tar_filename)
self._invalidate_caches(
backup.backup_id, prefixed_tar_filename, prefixed_metadata_filename
)
else:
await self._cleanup_failed_upload(prefixed_tar_filename)
def _upload_metadata_file_sync(
self, metadata_content: bytes, filename: str
) -> None:
"""Synchronously upload metadata file to B2."""
self._bucket.upload_bytes(
metadata_content,
filename,
content_type="application/json",
file_info={"metadata_only": "true"},
)
async def _upload_metadata_file(
self, metadata_content: bytes, filename: str
) -> None:
"""Upload metadata file to B2."""
await self._hass.async_add_executor_job(
self._upload_metadata_file_sync,
metadata_content,
filename,
)
def _upload_unbound_stream_sync(
self,
reader: AsyncIteratorReader,
filename: str,
content_type: str,
file_info: dict[str, Any],
) -> FileVersion:
"""Synchronously upload unbound stream to B2."""
return self._bucket.upload_unbound_stream(
reader,
filename,
content_type=content_type,
file_info=file_info,
)
def _download_and_parse_metadata_sync(
self, metadata_file_version: FileVersion
) -> dict[str, Any]:
"""Synchronously download and parse metadata file."""
return _parse_metadata(
metadata_file_version.download().response.content.decode("utf-8")
)
async def _upload_backup_file(
self,
filename: str,
open_stream: Callable[[], Coroutine[Any, Any, AsyncIterator[bytes]]],
file_info: dict[str, Any],
) -> None:
"""Upload backup file to B2 using streaming."""
_LOGGER.debug("Starting streaming upload for %s", filename)
stream = await open_stream()
reader = AsyncIteratorReader(self._hass.loop, stream)
_LOGGER.debug("Uploading backup file %s with streaming", filename)
try:
content_type, _ = mimetypes.guess_type(filename)
file_version = await self._hass.async_add_executor_job(
self._upload_unbound_stream_sync,
reader,
filename,
content_type or "application/x-tar",
file_info,
)
finally:
reader.close()
_LOGGER.debug("Successfully uploaded %s (ID: %s)", filename, file_version.id_)
@handle_b2_errors
async def async_delete_backup(self, backup_id: str, **kwargs: Any) -> None:
"""Delete a backup and its associated metadata file from Backblaze B2."""
file, metadata_file = await self._find_file_and_metadata_version_by_id(
backup_id
)
if not file:
raise BackupNotFound(f"Backup {backup_id} not found")
# Invariant: when file is not None, metadata_file is also not None
assert metadata_file is not None
_LOGGER.debug(
"Deleting backup file: %s and metadata file: %s",
file.file_name,
metadata_file.file_name,
)
await self._hass.async_add_executor_job(file.delete)
await self._hass.async_add_executor_job(metadata_file.delete)
self._invalidate_caches(
backup_id,
file.file_name,
metadata_file.file_name,
remove_files=True,
)
@handle_b2_errors
async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]:
"""List all backups by finding their associated metadata files in Backblaze B2."""
async with self._backup_list_cache_lock:
if self._backup_list_cache and self._is_cache_valid(
self._backup_list_cache_expiration
):
_LOGGER.debug("Returning backups from cache")
return list(self._backup_list_cache.values())
_LOGGER.debug(
"Cache expired or empty, fetching all files from B2 to build backup list"
)
all_files_in_prefix = await self._get_all_files_in_prefix()
_LOGGER.debug(
"Files found in prefix '%s': %s",
self._prefix,
list(all_files_in_prefix.keys()),
)
# Process metadata files sequentially to avoid exhausting executor pool
backups = {}
for file_name, file_version in all_files_in_prefix.items():
if file_name.endswith(METADATA_FILE_SUFFIX):
backup = await self._hass.async_add_executor_job(
self._process_metadata_file_sync,
file_name,
file_version,
all_files_in_prefix,
)
if backup:
backups[backup.backup_id] = backup
self._backup_list_cache = backups
self._backup_list_cache_expiration = time() + CACHE_TTL
return list(backups.values())
@handle_b2_errors
async def async_get_backup(self, backup_id: str, **kwargs: Any) -> AgentBackup:
"""Get a specific backup by its ID from Backblaze B2."""
if self._backup_list_cache and self._is_cache_valid(
self._backup_list_cache_expiration
):
if backup := self._backup_list_cache.get(backup_id):
_LOGGER.debug("Returning backup %s from cache", backup_id)
return backup
file, metadata_file_version = await self._find_file_and_metadata_version_by_id(
backup_id
)
if not file or not metadata_file_version:
raise BackupNotFound(f"Backup {backup_id} not found")
metadata_content = await self._hass.async_add_executor_job(
self._download_and_parse_metadata_sync,
metadata_file_version,
)
_LOGGER.debug(
"Successfully retrieved metadata for backup ID %s from file %s",
backup_id,
metadata_file_version.file_name,
)
backup = _create_backup_from_metadata(metadata_content, file)
if self._is_cache_valid(self._backup_list_cache_expiration):
self._backup_list_cache[backup.backup_id] = backup
return backup
async def _find_file_and_metadata_version_by_id(
self, backup_id: str
) -> tuple[FileVersion | None, FileVersion | None]:
"""Find the main backup file and its associated metadata file version by backup ID."""
all_files_in_prefix = await self._get_all_files_in_prefix()
# Process metadata files sequentially to avoid exhausting executor pool
for file_name, file_version in all_files_in_prefix.items():
if file_name.endswith(METADATA_FILE_SUFFIX):
(
result_backup_file,
result_metadata_file_version,
) = await self._hass.async_add_executor_job(
self._process_metadata_file_for_id_sync,
file_name,
file_version,
backup_id,
all_files_in_prefix,
)
if result_backup_file and result_metadata_file_version:
return result_backup_file, result_metadata_file_version
_LOGGER.debug("Backup %s not found", backup_id)
return None, None
def _process_metadata_file_for_id_sync(
self,
file_name: str,
file_version: FileVersion,
target_backup_id: str,
all_files_in_prefix: dict[str, FileVersion],
) -> tuple[FileVersion | None, FileVersion | None]:
"""Synchronously process a single metadata file for a specific backup ID.
Called within a thread pool executor.
"""
try:
download_response = file_version.download().response
except B2Error as err:
_LOGGER.warning(
"Failed to download metadata file %s during ID search: %s",
file_name,
err,
)
return None, None
try:
metadata_content = _parse_metadata(
download_response.content.decode("utf-8")
)
except ValueError:
return None, None
if metadata_content["backup_id"] != target_backup_id:
_LOGGER.debug(
"Metadata file %s does not match target backup ID %s",
file_name,
target_backup_id,
)
return None, None
found_backup_file = _find_backup_file_for_metadata(
file_name, all_files_in_prefix, self._prefix
)
if not found_backup_file:
_LOGGER.warning(
"Found metadata file %s for backup ID %s, but no corresponding backup file",
file_name,
target_backup_id,
)
return None, None
_LOGGER.debug(
"Found backup file %s and metadata file %s for ID %s",
found_backup_file.file_name,
file_name,
target_backup_id,
)
return found_backup_file, file_version
async def _get_all_files_in_prefix(self) -> dict[str, FileVersion]:
"""Get all file versions in the configured prefix from Backblaze B2.
Uses a cache to minimize API calls.
This fetches a flat list of all files, including main backups and metadata files.
"""
async with self._all_files_cache_lock:
if self._is_cache_valid(self._all_files_cache_expiration):
_LOGGER.debug("Returning all files from cache")
return self._all_files_cache
_LOGGER.debug("Cache for all files expired or empty, fetching from B2")
all_files_in_prefix = await self._hass.async_add_executor_job(
self._fetch_all_files_in_prefix
)
self._all_files_cache = all_files_in_prefix
self._all_files_cache_expiration = time() + CACHE_TTL
return all_files_in_prefix
def _fetch_all_files_in_prefix(self) -> dict[str, FileVersion]:
"""Fetch all files in the configured prefix from B2."""
all_files: dict[str, FileVersion] = {}
for file, _ in self._bucket.ls(self._prefix):
all_files[file.file_name] = file
return all_files
def _process_metadata_file_sync(
self,
file_name: str,
file_version: FileVersion,
all_files_in_prefix: dict[str, FileVersion],
) -> AgentBackup | None:
"""Synchronously process a single metadata file and return an AgentBackup if valid."""
try:
download_response = file_version.download().response
except B2Error as err:
_LOGGER.warning("Failed to download metadata file %s: %s", file_name, err)
return None
try:
metadata_content = _parse_metadata(
download_response.content.decode("utf-8")
)
except ValueError:
return None
found_backup_file = _find_backup_file_for_metadata(
file_name, all_files_in_prefix, self._prefix
)
if not found_backup_file:
_LOGGER.warning(
"Found metadata file %s but no corresponding backup file",
file_name,
)
return None
_LOGGER.debug(
"Successfully processed metadata file %s for backup ID %s",
file_name,
metadata_content["backup_id"],
)
return _create_backup_from_metadata(metadata_content, found_backup_file)
def _invalidate_caches(
self,
backup_id: str,
tar_filename: str,
metadata_filename: str | None,
*,
remove_files: bool = False,
) -> None:
"""Invalidate caches after upload/deletion operations.
Args:
backup_id: The backup ID to remove from backup cache
tar_filename: The tar filename to remove from files cache
metadata_filename: The metadata filename to remove from files cache
remove_files: If True, remove specific files from cache; if False, expire entire cache
"""
if remove_files:
if self._is_cache_valid(self._all_files_cache_expiration):
self._all_files_cache.pop(tar_filename, None)
if metadata_filename:
self._all_files_cache.pop(metadata_filename, None)
if self._is_cache_valid(self._backup_list_cache_expiration):
self._backup_list_cache.pop(backup_id, None)
else:
# For uploads, we can't easily add new FileVersion objects without API calls,
# so we expire the entire cache for simplicity
self._all_files_cache_expiration = 0.0
self._backup_list_cache_expiration = 0.0