core/homeassistant/components/backblaze_b2/backup.py

"""Backup platform for the Backblaze B2 integration."""

import asyncio
from collections.abc import AsyncIterator, Callable, Coroutine
import functools
import json
import logging
import mimetypes
from time import time
from typing import Any

from b2sdk.v2 import FileVersion
from b2sdk.v2.exception import B2Error

from homeassistant.components.backup import (
    AgentBackup,
    BackupAgent,
    BackupAgentError,
    BackupNotFound,
    suggested_filename,
)
from homeassistant.core import HomeAssistant, callback
from homeassistant.util.async_iterator import AsyncIteratorReader

from . import BackblazeConfigEntry
from .const import (
    CONF_PREFIX,
    DATA_BACKUP_AGENT_LISTENERS,
    DOMAIN,
    METADATA_FILE_SUFFIX,
    METADATA_VERSION,
)

_LOGGER = logging.getLogger(__name__)

# Cache TTL for backup list (in seconds)
CACHE_TTL = 300


def suggested_filenames(backup: AgentBackup) -> tuple[str, str]:
    """Return the suggested filenames for the backup and metadata files."""
    base_name = suggested_filename(backup).rsplit(".", 1)[0]
    return f"{base_name}.tar", f"{base_name}.metadata.json"


def _parse_metadata(raw_content: str) -> dict[str, Any]:
    """Parse metadata content from JSON."""
    try:
        data = json.loads(raw_content)
    except json.JSONDecodeError as err:
        raise ValueError(f"Invalid JSON format: {err}") from err
    else:
        if not isinstance(data, dict):
            raise TypeError("JSON content is not a dictionary")
        return data


def _find_backup_file_for_metadata(
    metadata_filename: str, all_files: dict[str, FileVersion], prefix: str
) -> FileVersion | None:
    """Find corresponding backup file for metadata file."""
    base_name = metadata_filename[len(prefix) :].removesuffix(METADATA_FILE_SUFFIX)
    return next(
        (
            file
            for name, file in all_files.items()
            if name.startswith(prefix + base_name)
            and name.endswith(".tar")
            and name != metadata_filename
        ),
        None,
    )


def _create_backup_from_metadata(
    metadata_content: dict[str, Any], backup_file: FileVersion
) -> AgentBackup:
    """Construct an AgentBackup from parsed metadata content and the associated backup file."""
    metadata = metadata_content["backup_metadata"]
    metadata["size"] = backup_file.size
    return AgentBackup.from_dict(metadata)


def handle_b2_errors[T](
    func: Callable[..., Coroutine[Any, Any, T]],
) -> Callable[..., Coroutine[Any, Any, T]]:
    """Handle B2Errors by converting them to BackupAgentError."""

    @functools.wraps(func)
    async def wrapper(*args: Any, **kwargs: Any) -> T:
        """Catch B2Error and raise BackupAgentError."""
        try:
            return await func(*args, **kwargs)
        except B2Error as err:
            error_msg = f"Failed during {func.__name__}"
            raise BackupAgentError(error_msg) from err

    return wrapper


async def async_get_backup_agents(
    hass: HomeAssistant,
) -> list[BackupAgent]:
    """Return a list of backup agents for all configured Backblaze B2 entries."""
    entries: list[BackblazeConfigEntry] = hass.config_entries.async_loaded_entries(
        DOMAIN
    )
    return [BackblazeBackupAgent(hass, entry) for entry in entries]


@callback
def async_register_backup_agents_listener(
    hass: HomeAssistant,
    *,
    listener: Callable[[], None],
    **kwargs: Any,
) -> Callable[[], None]:
    """Register a listener to be called when backup agents are added or removed.

    :return: A function to unregister the listener.
    """
    hass.data.setdefault(DATA_BACKUP_AGENT_LISTENERS, []).append(listener)

    @callback
    def remove_listener() -> None:
        """Remove the listener."""
        hass.data[DATA_BACKUP_AGENT_LISTENERS].remove(listener)
        if not hass.data[DATA_BACKUP_AGENT_LISTENERS]:
            hass.data.pop(DATA_BACKUP_AGENT_LISTENERS, None)

    return remove_listener


class BackblazeBackupAgent(BackupAgent):
    """Backup agent for Backblaze B2 cloud storage."""

    domain = DOMAIN

    def __init__(self, hass: HomeAssistant, entry: BackblazeConfigEntry) -> None:
        """Initialize the Backblaze B2 agent."""
        super().__init__()
        self._hass = hass
        self._bucket = entry.runtime_data
        self._prefix = entry.data[CONF_PREFIX]

        self.name = entry.title
        self.unique_id = entry.entry_id

        self._all_files_cache: dict[str, FileVersion] = {}
        self._all_files_cache_expiration: float = 0.0
        self._backup_list_cache: dict[str, AgentBackup] = {}
        self._backup_list_cache_expiration: float = 0.0

        self._all_files_cache_lock = asyncio.Lock()
        self._backup_list_cache_lock = asyncio.Lock()

    def _is_cache_valid(self, expiration_time: float) -> bool:
        """Check if cache is still valid based on expiration time."""
        return time() <= expiration_time

    async def _cleanup_failed_upload(self, filename: str) -> None:
        """Clean up a partially uploaded file after upload failure."""
        _LOGGER.warning(
            "Attempting to delete partially uploaded main backup file %s "
            "due to metadata upload failure",
            filename,
        )
        try:
            uploaded_main_file_info = await self._hass.async_add_executor_job(
                self._bucket.get_file_info_by_name, filename
            )
            await self._hass.async_add_executor_job(uploaded_main_file_info.delete)
        except B2Error:
            _LOGGER.debug(
                "Failed to clean up partially uploaded main backup file %s. "
                "Manual intervention may be required to delete it from Backblaze B2",
                filename,
                exc_info=True,
            )
        else:
            _LOGGER.debug(
                "Successfully deleted partially uploaded main backup file %s", filename
            )

    async def _get_file_for_download(self, backup_id: str) -> FileVersion:
        """Get backup file for download, raising if not found."""
        file, _ = await self._find_file_and_metadata_version_by_id(backup_id)
        if not file:
            raise BackupNotFound(f"Backup {backup_id} not found")
        return file

    @handle_b2_errors
    async def async_download_backup(
        self, backup_id: str, **kwargs: Any
    ) -> AsyncIterator[bytes]:
        """Download a backup from Backblaze B2."""
        file = await self._get_file_for_download(backup_id)
        _LOGGER.debug("Downloading %s", file.file_name)

        downloaded_file = await self._hass.async_add_executor_job(file.download)
        response = downloaded_file.response

        async def stream_response() -> AsyncIterator[bytes]:
            """Stream the response into an AsyncIterator."""
            try:
                iterator = response.iter_content(chunk_size=1024 * 1024)
                while True:
                    chunk = await self._hass.async_add_executor_job(
                        next, iterator, None
                    )
                    if chunk is None:
                        break
                    yield chunk
            finally:
                _LOGGER.debug("Finished streaming download for %s", file.file_name)

        return stream_response()

    @handle_b2_errors
    async def async_upload_backup(
        self,
        *,
        open_stream: Callable[[], Coroutine[Any, Any, AsyncIterator[bytes]]],
        backup: AgentBackup,
        **kwargs: Any,
    ) -> None:
        """Upload a backup to Backblaze B2.

        This involves uploading the main backup archive and a separate metadata JSON file.
        """
        tar_filename, metadata_filename = suggested_filenames(backup)
        prefixed_tar_filename = self._prefix + tar_filename
        prefixed_metadata_filename = self._prefix + metadata_filename

        metadata_content_bytes = json.dumps(
            {
                "metadata_version": METADATA_VERSION,
                "backup_id": backup.backup_id,
                "backup_metadata": backup.as_dict(),
            }
        ).encode("utf-8")

        _LOGGER.debug(
            "Uploading backup: %s, and metadata: %s",
            prefixed_tar_filename,
            prefixed_metadata_filename,
        )

        upload_successful = False
        try:
            await self._upload_backup_file(prefixed_tar_filename, open_stream, {})
            _LOGGER.debug(
                "Main backup file upload finished for %s", prefixed_tar_filename
            )

            _LOGGER.debug("Uploading metadata file: %s", prefixed_metadata_filename)
            await self._upload_metadata_file(
                metadata_content_bytes, prefixed_metadata_filename
            )
            _LOGGER.debug(
                "Metadata file upload finished for %s", prefixed_metadata_filename
            )
            upload_successful = True
        finally:
            if upload_successful:
                _LOGGER.debug("Backup upload complete: %s", prefixed_tar_filename)
                self._invalidate_caches(
                    backup.backup_id, prefixed_tar_filename, prefixed_metadata_filename
                )
            else:
                await self._cleanup_failed_upload(prefixed_tar_filename)

    def _upload_metadata_file_sync(
        self, metadata_content: bytes, filename: str
    ) -> None:
        """Synchronously upload metadata file to B2."""
        self._bucket.upload_bytes(
            metadata_content,
            filename,
            content_type="application/json",
            file_info={"metadata_only": "true"},
        )

    async def _upload_metadata_file(
        self, metadata_content: bytes, filename: str
    ) -> None:
        """Upload metadata file to B2."""
        await self._hass.async_add_executor_job(
            self._upload_metadata_file_sync,
            metadata_content,
            filename,
        )

    def _upload_unbound_stream_sync(
        self,
        reader: AsyncIteratorReader,
        filename: str,
        content_type: str,
        file_info: dict[str, Any],
    ) -> FileVersion:
        """Synchronously upload unbound stream to B2."""
        return self._bucket.upload_unbound_stream(
            reader,
            filename,
            content_type=content_type,
            file_info=file_info,
        )

    def _download_and_parse_metadata_sync(
        self, metadata_file_version: FileVersion
    ) -> dict[str, Any]:
        """Synchronously download and parse metadata file."""
        return _parse_metadata(
            metadata_file_version.download().response.content.decode("utf-8")
        )

    async def _upload_backup_file(
        self,
        filename: str,
        open_stream: Callable[[], Coroutine[Any, Any, AsyncIterator[bytes]]],
        file_info: dict[str, Any],
    ) -> None:
        """Upload backup file to B2 using streaming."""
        _LOGGER.debug("Starting streaming upload for %s", filename)

        stream = await open_stream()
        reader = AsyncIteratorReader(self._hass.loop, stream)

        _LOGGER.debug("Uploading backup file %s with streaming", filename)
        try:
            content_type, _ = mimetypes.guess_type(filename)
            file_version = await self._hass.async_add_executor_job(
                self._upload_unbound_stream_sync,
                reader,
                filename,
                content_type or "application/x-tar",
                file_info,
            )
        finally:
            reader.close()

        _LOGGER.debug("Successfully uploaded %s (ID: %s)", filename, file_version.id_)

    @handle_b2_errors
    async def async_delete_backup(self, backup_id: str, **kwargs: Any) -> None:
        """Delete a backup and its associated metadata file from Backblaze B2."""
        file, metadata_file = await self._find_file_and_metadata_version_by_id(
            backup_id
        )
        if not file:
            raise BackupNotFound(f"Backup {backup_id} not found")

        # Invariant: when file is not None, metadata_file is also not None
        assert metadata_file is not None

        _LOGGER.debug(
            "Deleting backup file: %s and metadata file: %s",
            file.file_name,
            metadata_file.file_name,
        )

        await self._hass.async_add_executor_job(file.delete)
        await self._hass.async_add_executor_job(metadata_file.delete)

        self._invalidate_caches(
            backup_id,
            file.file_name,
            metadata_file.file_name,
            remove_files=True,
        )

    @handle_b2_errors
    async def async_list_backups(self, **kwargs: Any) -> list[AgentBackup]:
        """List all backups by finding their associated metadata files in Backblaze B2."""
        async with self._backup_list_cache_lock:
            if self._backup_list_cache and self._is_cache_valid(
                self._backup_list_cache_expiration
            ):
                _LOGGER.debug("Returning backups from cache")
                return list(self._backup_list_cache.values())

            _LOGGER.debug(
                "Cache expired or empty, fetching all files from B2 to build backup list"
            )
            all_files_in_prefix = await self._get_all_files_in_prefix()

            _LOGGER.debug(
                "Files found in prefix '%s': %s",
                self._prefix,
                list(all_files_in_prefix.keys()),
            )

            # Process metadata files sequentially to avoid exhausting executor pool
            backups = {}
            for file_name, file_version in all_files_in_prefix.items():
                if file_name.endswith(METADATA_FILE_SUFFIX):
                    backup = await self._hass.async_add_executor_job(
                        self._process_metadata_file_sync,
                        file_name,
                        file_version,
                        all_files_in_prefix,
                    )
                    if backup:
                        backups[backup.backup_id] = backup
            self._backup_list_cache = backups
            self._backup_list_cache_expiration = time() + CACHE_TTL

            return list(backups.values())

    @handle_b2_errors
    async def async_get_backup(self, backup_id: str, **kwargs: Any) -> AgentBackup:
        """Get a specific backup by its ID from Backblaze B2."""
        if self._backup_list_cache and self._is_cache_valid(
            self._backup_list_cache_expiration
        ):
            if backup := self._backup_list_cache.get(backup_id):
                _LOGGER.debug("Returning backup %s from cache", backup_id)
                return backup

        file, metadata_file_version = await self._find_file_and_metadata_version_by_id(
            backup_id
        )
        if not file or not metadata_file_version:
            raise BackupNotFound(f"Backup {backup_id} not found")

        metadata_content = await self._hass.async_add_executor_job(
            self._download_and_parse_metadata_sync,
            metadata_file_version,
        )

        _LOGGER.debug(
            "Successfully retrieved metadata for backup ID %s from file %s",
            backup_id,
            metadata_file_version.file_name,
        )
        backup = _create_backup_from_metadata(metadata_content, file)

        if self._is_cache_valid(self._backup_list_cache_expiration):
            self._backup_list_cache[backup.backup_id] = backup

        return backup

    async def _find_file_and_metadata_version_by_id(
        self, backup_id: str
    ) -> tuple[FileVersion | None, FileVersion | None]:
        """Find the main backup file and its associated metadata file version by backup ID."""
        all_files_in_prefix = await self._get_all_files_in_prefix()

        # Process metadata files sequentially to avoid exhausting executor pool
        for file_name, file_version in all_files_in_prefix.items():
            if file_name.endswith(METADATA_FILE_SUFFIX):
                (
                    result_backup_file,
                    result_metadata_file_version,
                ) = await self._hass.async_add_executor_job(
                    self._process_metadata_file_for_id_sync,
                    file_name,
                    file_version,
                    backup_id,
                    all_files_in_prefix,
                )
                if result_backup_file and result_metadata_file_version:
                    return result_backup_file, result_metadata_file_version

        _LOGGER.debug("Backup %s not found", backup_id)
        return None, None

    def _process_metadata_file_for_id_sync(
        self,
        file_name: str,
        file_version: FileVersion,
        target_backup_id: str,
        all_files_in_prefix: dict[str, FileVersion],
    ) -> tuple[FileVersion | None, FileVersion | None]:
        """Synchronously process a single metadata file for a specific backup ID.

        Called within a thread pool executor.
        """
        try:
            download_response = file_version.download().response
        except B2Error as err:
            _LOGGER.warning(
                "Failed to download metadata file %s during ID search: %s",
                file_name,
                err,
            )
            return None, None

        try:
            metadata_content = _parse_metadata(
                download_response.content.decode("utf-8")
            )
        except ValueError:
            return None, None

        if metadata_content["backup_id"] != target_backup_id:
            _LOGGER.debug(
                "Metadata file %s does not match target backup ID %s",
                file_name,
                target_backup_id,
            )
            return None, None

        found_backup_file = _find_backup_file_for_metadata(
            file_name, all_files_in_prefix, self._prefix
        )
        if not found_backup_file:
            _LOGGER.warning(
                "Found metadata file %s for backup ID %s, but no corresponding backup file",
                file_name,
                target_backup_id,
            )
            return None, None

        _LOGGER.debug(
            "Found backup file %s and metadata file %s for ID %s",
            found_backup_file.file_name,
            file_name,
            target_backup_id,
        )
        return found_backup_file, file_version

    async def _get_all_files_in_prefix(self) -> dict[str, FileVersion]:
        """Get all file versions in the configured prefix from Backblaze B2.

        Uses a cache to minimize API calls.

        This fetches a flat list of all files, including main backups and metadata files.
        """
        async with self._all_files_cache_lock:
            if self._is_cache_valid(self._all_files_cache_expiration):
                _LOGGER.debug("Returning all files from cache")
                return self._all_files_cache

            _LOGGER.debug("Cache for all files expired or empty, fetching from B2")
            all_files_in_prefix = await self._hass.async_add_executor_job(
                self._fetch_all_files_in_prefix
            )
            self._all_files_cache = all_files_in_prefix
            self._all_files_cache_expiration = time() + CACHE_TTL
            return all_files_in_prefix

    def _fetch_all_files_in_prefix(self) -> dict[str, FileVersion]:
        """Fetch all files in the configured prefix from B2."""
        all_files: dict[str, FileVersion] = {}
        for file, _ in self._bucket.ls(self._prefix):
            all_files[file.file_name] = file
        return all_files

    def _process_metadata_file_sync(
        self,
        file_name: str,
        file_version: FileVersion,
        all_files_in_prefix: dict[str, FileVersion],
    ) -> AgentBackup | None:
        """Synchronously process a single metadata file and return an AgentBackup if valid."""
        try:
            download_response = file_version.download().response
        except B2Error as err:
            _LOGGER.warning("Failed to download metadata file %s: %s", file_name, err)
            return None

        try:
            metadata_content = _parse_metadata(
                download_response.content.decode("utf-8")
            )
        except ValueError:
            return None

        found_backup_file = _find_backup_file_for_metadata(
            file_name, all_files_in_prefix, self._prefix
        )
        if not found_backup_file:
            _LOGGER.warning(
                "Found metadata file %s but no corresponding backup file",
                file_name,
            )
            return None

        _LOGGER.debug(
            "Successfully processed metadata file %s for backup ID %s",
            file_name,
            metadata_content["backup_id"],
        )
        return _create_backup_from_metadata(metadata_content, found_backup_file)

    def _invalidate_caches(
        self,
        backup_id: str,
        tar_filename: str,
        metadata_filename: str | None,
        *,
        remove_files: bool = False,
    ) -> None:
        """Invalidate caches after upload/deletion operations.

        Args:
            backup_id: The backup ID to remove from backup cache
            tar_filename: The tar filename to remove from files cache
            metadata_filename: The metadata filename to remove from files cache
            remove_files: If True, remove specific files from cache; if False, expire entire cache
        """
        if remove_files:
            if self._is_cache_valid(self._all_files_cache_expiration):
                self._all_files_cache.pop(tar_filename, None)
                if metadata_filename:
                    self._all_files_cache.pop(metadata_filename, None)

            if self._is_cache_valid(self._backup_list_cache_expiration):
                self._backup_list_cache.pop(backup_id, None)
        else:
            # For uploads, we can't easily add new FileVersion objects without API calls,
            # so we expire the entire cache for simplicity
            self._all_files_cache_expiration = 0.0
            self._backup_list_cache_expiration = 0.0