diff --git a/homeassistant/components/nest/media_source.py b/homeassistant/components/nest/media_source.py index 146b6f2479e..a3d2901e911 100644 --- a/homeassistant/components/nest/media_source.py +++ b/homeassistant/components/nest/media_source.py @@ -20,8 +20,10 @@ from __future__ import annotations from collections.abc import Mapping from dataclasses import dataclass +import datetime import logging import os +import pathlib from typing import Any from google_nest_sdm.camera_traits import CameraClipPreviewTrait, CameraEventImageTrait @@ -46,6 +48,7 @@ from homeassistant.components.media_source import ( ) from homeassistant.core import HomeAssistant, callback from homeassistant.helpers import device_registry as dr +from homeassistant.helpers.event import async_track_time_interval from homeassistant.helpers.storage import Store from homeassistant.helpers.template import DATE_STR_FORMAT from homeassistant.util import dt as dt_util @@ -72,6 +75,9 @@ MEDIA_PATH = f"{DOMAIN}/event_media" # Size of small in-memory disk cache to avoid excessive disk reads DISK_READ_LRU_MAX_SIZE = 32 +# Remove orphaned media files that are older than this age +ORPHANED_MEDIA_AGE_CUTOFF = datetime.timedelta(days=7) + async def async_get_media_event_store( hass: HomeAssistant, subscriber: GoogleNestSubscriber @@ -123,6 +129,12 @@ class NestEventMediaStore(EventMediaStore): self._media_path = media_path self._data: dict[str, Any] | None = None self._devices: Mapping[str, str] | None = {} + # Invoke garbage collection for orphaned files one per + async_track_time_interval( + hass, + self.async_remove_orphaned_media, + datetime.timedelta(days=1), + ) async def async_load(self) -> dict | None: """Load data.""" @@ -249,6 +261,68 @@ class NestEventMediaStore(EventMediaStore): devices[device.name] = device_entry.id return devices + async def async_remove_orphaned_media(self, now: datetime.datetime) -> None: + """Remove any media files that are orphaned and not referenced by the active event data. + + The event media store handles garbage collection, but there may be cases where files are + left around or unable to be removed. This is a scheduled event that will also check for + old orphaned files and remove them when the events are not referenced in the active list + of event data. + + Event media files are stored with the format -.suffix. We extract + the list of valid timestamps from the event data and remove any files that are not in that list + or are older than the cutoff time. + """ + _LOGGER.debug("Checking for orphaned media at %s", now) + + def _cleanup(event_timestamps: dict[str, set[int]]) -> None: + time_cutoff = (now - ORPHANED_MEDIA_AGE_CUTOFF).timestamp() + media_path = pathlib.Path(self._media_path) + for device_id, valid_timestamps in event_timestamps.items(): + media_files = list(media_path.glob(f"{device_id}/*")) + _LOGGER.debug("Found %d files (device=%s)", len(media_files), device_id) + for media_file in media_files: + if "-" not in media_file.name: + continue + try: + timestamp = int(media_file.name.split("-")[0]) + except ValueError: + continue + if timestamp in valid_timestamps or timestamp > time_cutoff: + continue + _LOGGER.debug("Removing orphaned media file: %s", media_file) + try: + os.remove(media_file) + except OSError as err: + _LOGGER.error( + "Unable to remove orphaned media file: %s %s", + media_file, + err, + ) + + # Nest device id mapped to home assistant device id + event_timestamps = await self._get_valid_event_timestamps() + await self._hass.async_add_executor_job(_cleanup, event_timestamps) + + async def _get_valid_event_timestamps(self) -> dict[str, set[int]]: + """Return a mapping of home assistant device id to valid timestamps.""" + device_map = await self._get_devices() + event_data = await self.async_load() or {} + valid_device_timestamps = {} + for nest_device_id, device_id in device_map.items(): + if (device_events := event_data.get(nest_device_id, {})) is None: + continue + valid_device_timestamps[device_id] = { + int( + datetime.datetime.fromisoformat( + camera_event["timestamp"] + ).timestamp() + ) + for events in device_events + for camera_event in events["events"].values() + } + return valid_device_timestamps + async def async_get_media_source(hass: HomeAssistant) -> MediaSource: """Set up Nest media source.""" diff --git a/tests/components/nest/conftest.py b/tests/components/nest/conftest.py index 92d90a18a7e..b4b94efce5b 100644 --- a/tests/components/nest/conftest.py +++ b/tests/components/nest/conftest.py @@ -144,13 +144,14 @@ async def auth( return FakeAuth(aioclient_mock, create_device, device_access_project_id) -@pytest.fixture(autouse=True) -def cleanup_media_storage(hass: HomeAssistant) -> Generator[None]: +@pytest.fixture(autouse=True, name="media_path") +def cleanup_media_storage(hass: HomeAssistant) -> Generator[str]: """Test cleanup, remove any media storage persisted during the test.""" tmp_path = str(uuid.uuid4()) with patch("homeassistant.components.nest.media_source.MEDIA_PATH", new=tmp_path): - yield - shutil.rmtree(hass.config.path(tmp_path), ignore_errors=True) + full_path = hass.config.path(tmp_path) + yield full_path + shutil.rmtree(full_path, ignore_errors=True) @pytest.fixture diff --git a/tests/components/nest/test_media_source.py b/tests/components/nest/test_media_source.py index d009e1185da..0b0654fc69c 100644 --- a/tests/components/nest/test_media_source.py +++ b/tests/components/nest/test_media_source.py @@ -8,11 +8,13 @@ from collections.abc import Generator import datetime from http import HTTPStatus import io +import pathlib from typing import Any from unittest.mock import patch import aiohttp import av +from freezegun import freeze_time import numpy as np import pytest @@ -39,7 +41,7 @@ from .common import ( ) from .conftest import FakeAuth -from tests.common import MockUser, async_capture_events +from tests.common import MockUser, async_capture_events, async_fire_time_changed from tests.typing import ClientSessionGenerator DOMAIN = "nest" @@ -1574,3 +1576,80 @@ async def test_event_clip_media_attachment( response = await client.get(content_path) assert response.status == HTTPStatus.OK, f"Response not matched: {response}" await response.read() + + +@pytest.mark.parametrize(("device_traits", "cache_size"), [(BATTERY_CAMERA_TRAITS, 5)]) +async def test_remove_stale_media( + hass: HomeAssistant, + device_registry: dr.DeviceRegistry, + auth, + mp4, + hass_client: ClientSessionGenerator, + subscriber, + setup_platform, + media_path: str, +) -> None: + """Test media files getting evicted from the cache.""" + await setup_platform() + + device = device_registry.async_get_device(identifiers={(DOMAIN, DEVICE_ID)}) + assert device + assert device.name == DEVICE_NAME + + # Publish a media event + auth.responses = [ + aiohttp.web.Response(body=mp4.getvalue()), + ] + event_timestamp = dt_util.now() + await subscriber.async_receive_event( + create_event_message( + create_battery_event_data(MOTION_EVENT), + timestamp=event_timestamp, + ) + ) + await hass.async_block_till_done() + + # The first subdirectory is the device id. Media for events are stored in the + # device subdirectory. First verify that the media was persisted. We will + # then add additional media files, then invoke the garbage collector, and + # then verify orphaned files are removed. + storage_path = pathlib.Path(media_path) + device_path = storage_path / device.id + media_files = list(device_path.glob("*")) + assert len(media_files) == 1 + event_media = media_files[0] + assert event_media.name.endswith(".mp4") + + event_time1 = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=8) + extra_media1 = ( + device_path / f"{int(event_time1.timestamp())}-camera_motion-test.mp4" + ) + extra_media1.write_bytes(mp4.getvalue()) + event_time2 = event_time1 + datetime.timedelta(hours=20) + extra_media2 = ( + device_path / f"{int(event_time2.timestamp())}-camera_motion-test.jpg" + ) + extra_media2.write_bytes(mp4.getvalue()) + # This event will not be garbage collected because it is too recent + event_time3 = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=3) + extra_media3 = ( + device_path / f"{int(event_time3.timestamp())}-camera_motion-test.mp4" + ) + extra_media3.write_bytes(mp4.getvalue()) + + assert len(list(device_path.glob("*"))) == 4 + + # Advance the clock to invoke the garbage collector. This will remove extra + # files that are not valid events that are old enough. + point_in_time = datetime.datetime.now(datetime.UTC) + datetime.timedelta(days=1) + with freeze_time(point_in_time): + async_fire_time_changed(hass, point_in_time) + await hass.async_block_till_done() + await hass.async_block_till_done() + + # Verify that the event media is still present and that the extra files + # are removed. Newer media is not removed. + assert event_media.exists() + assert not extra_media1.exists() + assert not extra_media2.exists() + assert extra_media3.exists()