Remove nest event media files that are no longer referenced (#141295)

* Remove nest event media files that are no longer referenced

* Fix double glob
This commit is contained in:
Allen Porter 2025-03-24 18:08:54 -07:00 committed by GitHub
parent 204b1e1f24
commit f864f71028
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 159 additions and 5 deletions

View File

@ -20,8 +20,10 @@ from __future__ import annotations
from collections.abc import Mapping
from dataclasses import dataclass
import datetime
import logging
import os
import pathlib
from typing import Any
from google_nest_sdm.camera_traits import CameraClipPreviewTrait, CameraEventImageTrait
@ -46,6 +48,7 @@ from homeassistant.components.media_source import (
)
from homeassistant.core import HomeAssistant, callback
from homeassistant.helpers import device_registry as dr
from homeassistant.helpers.event import async_track_time_interval
from homeassistant.helpers.storage import Store
from homeassistant.helpers.template import DATE_STR_FORMAT
from homeassistant.util import dt as dt_util
@ -72,6 +75,9 @@ MEDIA_PATH = f"{DOMAIN}/event_media"
# Size of small in-memory disk cache to avoid excessive disk reads
DISK_READ_LRU_MAX_SIZE = 32
# Remove orphaned media files that are older than this age
ORPHANED_MEDIA_AGE_CUTOFF = datetime.timedelta(days=7)
async def async_get_media_event_store(
hass: HomeAssistant, subscriber: GoogleNestSubscriber
@ -123,6 +129,12 @@ class NestEventMediaStore(EventMediaStore):
self._media_path = media_path
self._data: dict[str, Any] | None = None
self._devices: Mapping[str, str] | None = {}
# Invoke garbage collection for orphaned files one per
async_track_time_interval(
hass,
self.async_remove_orphaned_media,
datetime.timedelta(days=1),
)
async def async_load(self) -> dict | None:
"""Load data."""
@ -249,6 +261,68 @@ class NestEventMediaStore(EventMediaStore):
devices[device.name] = device_entry.id
return devices
async def async_remove_orphaned_media(self, now: datetime.datetime) -> None:
"""Remove any media files that are orphaned and not referenced by the active event data.
The event media store handles garbage collection, but there may be cases where files are
left around or unable to be removed. This is a scheduled event that will also check for
old orphaned files and remove them when the events are not referenced in the active list
of event data.
Event media files are stored with the format <timestamp>-<event_type>.suffix. We extract
the list of valid timestamps from the event data and remove any files that are not in that list
or are older than the cutoff time.
"""
_LOGGER.debug("Checking for orphaned media at %s", now)
def _cleanup(event_timestamps: dict[str, set[int]]) -> None:
time_cutoff = (now - ORPHANED_MEDIA_AGE_CUTOFF).timestamp()
media_path = pathlib.Path(self._media_path)
for device_id, valid_timestamps in event_timestamps.items():
media_files = list(media_path.glob(f"{device_id}/*"))
_LOGGER.debug("Found %d files (device=%s)", len(media_files), device_id)
for media_file in media_files:
if "-" not in media_file.name:
continue
try:
timestamp = int(media_file.name.split("-")[0])
except ValueError:
continue
if timestamp in valid_timestamps or timestamp > time_cutoff:
continue
_LOGGER.debug("Removing orphaned media file: %s", media_file)
try:
os.remove(media_file)
except OSError as err:
_LOGGER.error(
"Unable to remove orphaned media file: %s %s",
media_file,
err,
)
# Nest device id mapped to home assistant device id
event_timestamps = await self._get_valid_event_timestamps()
await self._hass.async_add_executor_job(_cleanup, event_timestamps)
async def _get_valid_event_timestamps(self) -> dict[str, set[int]]:
"""Return a mapping of home assistant device id to valid timestamps."""
device_map = await self._get_devices()
event_data = await self.async_load() or {}
valid_device_timestamps = {}
for nest_device_id, device_id in device_map.items():
if (device_events := event_data.get(nest_device_id, {})) is None:
continue
valid_device_timestamps[device_id] = {
int(
datetime.datetime.fromisoformat(
camera_event["timestamp"]
).timestamp()
)
for events in device_events
for camera_event in events["events"].values()
}
return valid_device_timestamps
async def async_get_media_source(hass: HomeAssistant) -> MediaSource:
"""Set up Nest media source."""

View File

@ -144,13 +144,14 @@ async def auth(
return FakeAuth(aioclient_mock, create_device, device_access_project_id)
@pytest.fixture(autouse=True)
def cleanup_media_storage(hass: HomeAssistant) -> Generator[None]:
@pytest.fixture(autouse=True, name="media_path")
def cleanup_media_storage(hass: HomeAssistant) -> Generator[str]:
"""Test cleanup, remove any media storage persisted during the test."""
tmp_path = str(uuid.uuid4())
with patch("homeassistant.components.nest.media_source.MEDIA_PATH", new=tmp_path):
yield
shutil.rmtree(hass.config.path(tmp_path), ignore_errors=True)
full_path = hass.config.path(tmp_path)
yield full_path
shutil.rmtree(full_path, ignore_errors=True)
@pytest.fixture

View File

@ -8,11 +8,13 @@ from collections.abc import Generator
import datetime
from http import HTTPStatus
import io
import pathlib
from typing import Any
from unittest.mock import patch
import aiohttp
import av
from freezegun import freeze_time
import numpy as np
import pytest
@ -39,7 +41,7 @@ from .common import (
)
from .conftest import FakeAuth
from tests.common import MockUser, async_capture_events
from tests.common import MockUser, async_capture_events, async_fire_time_changed
from tests.typing import ClientSessionGenerator
DOMAIN = "nest"
@ -1574,3 +1576,80 @@ async def test_event_clip_media_attachment(
response = await client.get(content_path)
assert response.status == HTTPStatus.OK, f"Response not matched: {response}"
await response.read()
@pytest.mark.parametrize(("device_traits", "cache_size"), [(BATTERY_CAMERA_TRAITS, 5)])
async def test_remove_stale_media(
hass: HomeAssistant,
device_registry: dr.DeviceRegistry,
auth,
mp4,
hass_client: ClientSessionGenerator,
subscriber,
setup_platform,
media_path: str,
) -> None:
"""Test media files getting evicted from the cache."""
await setup_platform()
device = device_registry.async_get_device(identifiers={(DOMAIN, DEVICE_ID)})
assert device
assert device.name == DEVICE_NAME
# Publish a media event
auth.responses = [
aiohttp.web.Response(body=mp4.getvalue()),
]
event_timestamp = dt_util.now()
await subscriber.async_receive_event(
create_event_message(
create_battery_event_data(MOTION_EVENT),
timestamp=event_timestamp,
)
)
await hass.async_block_till_done()
# The first subdirectory is the device id. Media for events are stored in the
# device subdirectory. First verify that the media was persisted. We will
# then add additional media files, then invoke the garbage collector, and
# then verify orphaned files are removed.
storage_path = pathlib.Path(media_path)
device_path = storage_path / device.id
media_files = list(device_path.glob("*"))
assert len(media_files) == 1
event_media = media_files[0]
assert event_media.name.endswith(".mp4")
event_time1 = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=8)
extra_media1 = (
device_path / f"{int(event_time1.timestamp())}-camera_motion-test.mp4"
)
extra_media1.write_bytes(mp4.getvalue())
event_time2 = event_time1 + datetime.timedelta(hours=20)
extra_media2 = (
device_path / f"{int(event_time2.timestamp())}-camera_motion-test.jpg"
)
extra_media2.write_bytes(mp4.getvalue())
# This event will not be garbage collected because it is too recent
event_time3 = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=3)
extra_media3 = (
device_path / f"{int(event_time3.timestamp())}-camera_motion-test.mp4"
)
extra_media3.write_bytes(mp4.getvalue())
assert len(list(device_path.glob("*"))) == 4
# Advance the clock to invoke the garbage collector. This will remove extra
# files that are not valid events that are old enough.
point_in_time = datetime.datetime.now(datetime.UTC) + datetime.timedelta(days=1)
with freeze_time(point_in_time):
async_fire_time_changed(hass, point_in_time)
await hass.async_block_till_done()
await hass.async_block_till_done()
# Verify that the event media is still present and that the extra files
# are removed. Newer media is not removed.
assert event_media.exists()
assert not extra_media1.exists()
assert not extra_media2.exists()
assert extra_media3.exists()