Use storage helper in feedreader (#98754)

This commit is contained in:
Martin Hjelmare 2023-08-21 18:21:34 +02:00 committed by GitHub
parent a42d975c49
commit 91df9434d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 283 additions and 127 deletions

View File

@ -1,22 +1,23 @@
"""Support for RSS/Atom feeds.""" """Support for RSS/Atom feeds."""
from __future__ import annotations from __future__ import annotations
from calendar import timegm
from datetime import datetime, timedelta from datetime import datetime, timedelta
from logging import getLogger from logging import getLogger
from os.path import exists import os
import pickle import pickle
from threading import Lock from time import gmtime, struct_time
from time import struct_time
from typing import cast
import feedparser import feedparser
import voluptuous as vol import voluptuous as vol
from homeassistant.const import CONF_SCAN_INTERVAL, EVENT_HOMEASSISTANT_START from homeassistant.const import CONF_SCAN_INTERVAL, EVENT_HOMEASSISTANT_START
from homeassistant.core import HomeAssistant from homeassistant.core import Event, HomeAssistant, callback
import homeassistant.helpers.config_validation as cv import homeassistant.helpers.config_validation as cv
from homeassistant.helpers.event import track_time_interval from homeassistant.helpers.event import async_track_time_interval
from homeassistant.helpers.storage import Store
from homeassistant.helpers.typing import ConfigType from homeassistant.helpers.typing import ConfigType
from homeassistant.util.dt import utc_from_timestamp
_LOGGER = getLogger(__name__) _LOGGER = getLogger(__name__)
@ -25,10 +26,12 @@ CONF_MAX_ENTRIES = "max_entries"
DEFAULT_MAX_ENTRIES = 20 DEFAULT_MAX_ENTRIES = 20
DEFAULT_SCAN_INTERVAL = timedelta(hours=1) DEFAULT_SCAN_INTERVAL = timedelta(hours=1)
DELAY_SAVE = 30
DOMAIN = "feedreader" DOMAIN = "feedreader"
EVENT_FEEDREADER = "feedreader" EVENT_FEEDREADER = "feedreader"
STORAGE_VERSION = 1
CONFIG_SCHEMA = vol.Schema( CONFIG_SCHEMA = vol.Schema(
{ {
@ -46,17 +49,25 @@ CONFIG_SCHEMA = vol.Schema(
) )
def setup(hass: HomeAssistant, config: ConfigType) -> bool: async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
"""Set up the Feedreader component.""" """Set up the Feedreader component."""
urls: list[str] = config[DOMAIN][CONF_URLS] urls: list[str] = config[DOMAIN][CONF_URLS]
if not urls:
return False
scan_interval: timedelta = config[DOMAIN][CONF_SCAN_INTERVAL] scan_interval: timedelta = config[DOMAIN][CONF_SCAN_INTERVAL]
max_entries: int = config[DOMAIN][CONF_MAX_ENTRIES] max_entries: int = config[DOMAIN][CONF_MAX_ENTRIES]
data_file = hass.config.path(f"{DOMAIN}.pickle") old_data_file = hass.config.path(f"{DOMAIN}.pickle")
storage = StoredData(data_file) storage = StoredData(hass, old_data_file)
await storage.async_setup()
feeds = [ feeds = [
FeedManager(url, scan_interval, max_entries, hass, storage) for url in urls FeedManager(hass, url, scan_interval, max_entries, storage) for url in urls
] ]
return len(feeds) > 0
for feed in feeds:
feed.async_setup()
return True
class FeedManager: class FeedManager:
@ -64,50 +75,47 @@ class FeedManager:
def __init__( def __init__(
self, self,
hass: HomeAssistant,
url: str, url: str,
scan_interval: timedelta, scan_interval: timedelta,
max_entries: int, max_entries: int,
hass: HomeAssistant,
storage: StoredData, storage: StoredData,
) -> None: ) -> None:
"""Initialize the FeedManager object, poll as per scan interval.""" """Initialize the FeedManager object, poll as per scan interval."""
self._hass = hass
self._url = url self._url = url
self._scan_interval = scan_interval self._scan_interval = scan_interval
self._max_entries = max_entries self._max_entries = max_entries
self._feed: feedparser.FeedParserDict | None = None self._feed: feedparser.FeedParserDict | None = None
self._hass = hass
self._firstrun = True self._firstrun = True
self._storage = storage self._storage = storage
self._last_entry_timestamp: struct_time | None = None self._last_entry_timestamp: struct_time | None = None
self._last_update_successful = False
self._has_published_parsed = False self._has_published_parsed = False
self._has_updated_parsed = False self._has_updated_parsed = False
self._event_type = EVENT_FEEDREADER self._event_type = EVENT_FEEDREADER
self._feed_id = url self._feed_id = url
hass.bus.listen_once(EVENT_HOMEASSISTANT_START, lambda _: self._update())
self._init_regular_updates(hass) @callback
def async_setup(self) -> None:
"""Set up the feed manager."""
self._hass.bus.async_listen_once(EVENT_HOMEASSISTANT_START, self._async_update)
async_track_time_interval(
self._hass, self._async_update, self._scan_interval, cancel_on_shutdown=True
)
def _log_no_entries(self) -> None: def _log_no_entries(self) -> None:
"""Send no entries log at debug level.""" """Send no entries log at debug level."""
_LOGGER.debug("No new entries to be published in feed %s", self._url) _LOGGER.debug("No new entries to be published in feed %s", self._url)
def _init_regular_updates(self, hass: HomeAssistant) -> None: async def _async_update(self, _: datetime | Event) -> None:
"""Schedule regular updates at the top of the clock."""
track_time_interval(
hass,
lambda now: self._update(),
self._scan_interval,
cancel_on_shutdown=True,
)
@property
def last_update_successful(self) -> bool:
"""Return True if the last feed update was successful."""
return self._last_update_successful
def _update(self) -> None:
"""Update the feed and publish new entries to the event bus.""" """Update the feed and publish new entries to the event bus."""
_LOGGER.info("Fetching new data from feed %s", self._url) last_entry_timestamp = await self._hass.async_add_executor_job(self._update)
if last_entry_timestamp:
self._storage.async_put_timestamp(self._feed_id, last_entry_timestamp)
def _update(self) -> struct_time | None:
"""Update the feed and publish new entries to the event bus."""
_LOGGER.debug("Fetching new data from feed %s", self._url)
self._feed: feedparser.FeedParserDict = feedparser.parse( # type: ignore[no-redef] self._feed: feedparser.FeedParserDict = feedparser.parse( # type: ignore[no-redef]
self._url, self._url,
etag=None if not self._feed else self._feed.get("etag"), etag=None if not self._feed else self._feed.get("etag"),
@ -115,38 +123,41 @@ class FeedManager:
) )
if not self._feed: if not self._feed:
_LOGGER.error("Error fetching feed data from %s", self._url) _LOGGER.error("Error fetching feed data from %s", self._url)
self._last_update_successful = False return None
else: # The 'bozo' flag really only indicates that there was an issue
# The 'bozo' flag really only indicates that there was an issue # during the initial parsing of the XML, but it doesn't indicate
# during the initial parsing of the XML, but it doesn't indicate # whether this is an unrecoverable error. In this case the
# whether this is an unrecoverable error. In this case the # feedparser lib is trying a less strict parsing approach.
# feedparser lib is trying a less strict parsing approach. # If an error is detected here, log warning message but continue
# If an error is detected here, log warning message but continue # processing the feed entries if present.
# processing the feed entries if present. if self._feed.bozo != 0:
if self._feed.bozo != 0: _LOGGER.warning(
_LOGGER.warning( "Possible issue parsing feed %s: %s",
"Possible issue parsing feed %s: %s", self._url,
self._url, self._feed.bozo_exception,
self._feed.bozo_exception, )
) # Using etag and modified, if there's no new data available,
# Using etag and modified, if there's no new data available, # the entries list will be empty
# the entries list will be empty _LOGGER.debug(
if self._feed.entries: "%s entri(es) available in feed %s",
_LOGGER.debug( len(self._feed.entries),
"%s entri(es) available in feed %s", self._url,
len(self._feed.entries), )
self._url, if not self._feed.entries:
) self._log_no_entries()
self._filter_entries() return None
self._publish_new_entries()
if self._has_published_parsed or self._has_updated_parsed: self._filter_entries()
self._storage.put_timestamp( self._publish_new_entries()
self._feed_id, cast(struct_time, self._last_entry_timestamp)
) _LOGGER.debug("Fetch from feed %s completed", self._url)
else:
self._log_no_entries() if (
self._last_update_successful = True self._has_published_parsed or self._has_updated_parsed
_LOGGER.info("Fetch from feed %s completed", self._url) ) and self._last_entry_timestamp:
return self._last_entry_timestamp
return None
def _filter_entries(self) -> None: def _filter_entries(self) -> None:
"""Filter the entries provided and return the ones to keep.""" """Filter the entries provided and return the ones to keep."""
@ -219,47 +230,62 @@ class FeedManager:
class StoredData: class StoredData:
"""Abstraction over pickle data storage.""" """Represent a data storage."""
def __init__(self, data_file: str) -> None: def __init__(self, hass: HomeAssistant, legacy_data_file: str) -> None:
"""Initialize pickle data storage.""" """Initialize data storage."""
self._data_file = data_file self._legacy_data_file = legacy_data_file
self._lock = Lock()
self._cache_outdated = True
self._data: dict[str, struct_time] = {} self._data: dict[str, struct_time] = {}
self._fetch_data() self._hass = hass
self._store: Store[dict[str, str]] = Store(hass, STORAGE_VERSION, DOMAIN)
def _fetch_data(self) -> None: async def async_setup(self) -> None:
"""Fetch data stored into pickle file.""" """Set up storage."""
if self._cache_outdated and exists(self._data_file): if not os.path.exists(self._store.path):
try: # Remove the legacy store loading after deprecation period.
_LOGGER.debug("Fetching data from file %s", self._data_file) data = await self._hass.async_add_executor_job(self._legacy_fetch_data)
with self._lock, open(self._data_file, "rb") as myfile: else:
self._data = pickle.load(myfile) or {} if (store_data := await self._store.async_load()) is None:
self._cache_outdated = False return
except Exception: # pylint: disable=broad-except # Make sure that dst is set to 0, by using gmtime() on the timestamp.
_LOGGER.error( data = {
"Error loading data from pickled file %s", self._data_file feed_id: gmtime(datetime.fromisoformat(timestamp_string).timestamp())
) for feed_id, timestamp_string in store_data.items()
}
self._data = data
def _legacy_fetch_data(self) -> dict[str, struct_time]:
"""Fetch data stored in pickle file."""
_LOGGER.debug("Fetching data from legacy file %s", self._legacy_data_file)
try:
with open(self._legacy_data_file, "rb") as myfile:
return pickle.load(myfile) or {}
except FileNotFoundError:
pass
except (OSError, pickle.PickleError) as err:
_LOGGER.error(
"Error loading data from pickled file %s: %s",
self._legacy_data_file,
err,
)
return {}
def get_timestamp(self, feed_id: str) -> struct_time | None: def get_timestamp(self, feed_id: str) -> struct_time | None:
"""Return stored timestamp for given feed id (usually the url).""" """Return stored timestamp for given feed id."""
self._fetch_data()
return self._data.get(feed_id) return self._data.get(feed_id)
def put_timestamp(self, feed_id: str, timestamp: struct_time) -> None: @callback
"""Update timestamp for given feed id (usually the url).""" def async_put_timestamp(self, feed_id: str, timestamp: struct_time) -> None:
self._fetch_data() """Update timestamp for given feed id."""
with self._lock, open(self._data_file, "wb") as myfile: self._data[feed_id] = timestamp
self._data.update({feed_id: timestamp}) self._store.async_delay_save(self._async_save_data, DELAY_SAVE)
_LOGGER.debug(
"Overwriting feed %s timestamp in storage file %s: %s", @callback
feed_id, def _async_save_data(self) -> dict[str, str]:
self._data_file, """Save feed data to storage."""
timestamp, return {
) feed_id: utc_from_timestamp(timegm(struct_utc)).isoformat()
try: for feed_id, struct_utc in self._data.items()
pickle.dump(self._data, myfile) }
except Exception: # pylint: disable=broad-except
_LOGGER.error("Error saving pickled data to %s", self._data_file)
self._cache_outdated = True

View File

@ -1,7 +1,11 @@
"""The tests for the feedreader component.""" """The tests for the feedreader component."""
from datetime import timedelta from collections.abc import Generator
from datetime import datetime, timedelta
import pickle
from time import gmtime
from typing import Any
from unittest import mock from unittest import mock
from unittest.mock import mock_open, patch from unittest.mock import MagicMock, mock_open, patch
import pytest import pytest
@ -13,7 +17,7 @@ from homeassistant.components.feedreader import (
EVENT_FEEDREADER, EVENT_FEEDREADER,
) )
from homeassistant.const import CONF_SCAN_INTERVAL, EVENT_HOMEASSISTANT_START from homeassistant.const import CONF_SCAN_INTERVAL, EVENT_HOMEASSISTANT_START
from homeassistant.core import HomeAssistant from homeassistant.core import Event, HomeAssistant
from homeassistant.setup import async_setup_component from homeassistant.setup import async_setup_component
import homeassistant.util.dt as dt_util import homeassistant.util.dt as dt_util
@ -27,7 +31,7 @@ VALID_CONFIG_4 = {feedreader.DOMAIN: {CONF_URLS: [URL], CONF_MAX_ENTRIES: 5}}
VALID_CONFIG_5 = {feedreader.DOMAIN: {CONF_URLS: [URL], CONF_MAX_ENTRIES: 1}} VALID_CONFIG_5 = {feedreader.DOMAIN: {CONF_URLS: [URL], CONF_MAX_ENTRIES: 1}}
def load_fixture_bytes(src): def load_fixture_bytes(src: str) -> bytes:
"""Return byte stream of fixture.""" """Return byte stream of fixture."""
feed_data = load_fixture(src) feed_data = load_fixture(src)
raw = bytes(feed_data, "utf-8") raw = bytes(feed_data, "utf-8")
@ -35,72 +39,198 @@ def load_fixture_bytes(src):
@pytest.fixture(name="feed_one_event") @pytest.fixture(name="feed_one_event")
def fixture_feed_one_event(hass): def fixture_feed_one_event(hass: HomeAssistant) -> bytes:
"""Load test feed data for one event.""" """Load test feed data for one event."""
return load_fixture_bytes("feedreader.xml") return load_fixture_bytes("feedreader.xml")
@pytest.fixture(name="feed_two_event") @pytest.fixture(name="feed_two_event")
def fixture_feed_two_events(hass): def fixture_feed_two_events(hass: HomeAssistant) -> bytes:
"""Load test feed data for two event.""" """Load test feed data for two event."""
return load_fixture_bytes("feedreader1.xml") return load_fixture_bytes("feedreader1.xml")
@pytest.fixture(name="feed_21_events") @pytest.fixture(name="feed_21_events")
def fixture_feed_21_events(hass): def fixture_feed_21_events(hass: HomeAssistant) -> bytes:
"""Load test feed data for twenty one events.""" """Load test feed data for twenty one events."""
return load_fixture_bytes("feedreader2.xml") return load_fixture_bytes("feedreader2.xml")
@pytest.fixture(name="feed_three_events") @pytest.fixture(name="feed_three_events")
def fixture_feed_three_events(hass): def fixture_feed_three_events(hass: HomeAssistant) -> bytes:
"""Load test feed data for three events.""" """Load test feed data for three events."""
return load_fixture_bytes("feedreader3.xml") return load_fixture_bytes("feedreader3.xml")
@pytest.fixture(name="feed_atom_event") @pytest.fixture(name="feed_atom_event")
def fixture_feed_atom_event(hass): def fixture_feed_atom_event(hass: HomeAssistant) -> bytes:
"""Load test feed data for atom event.""" """Load test feed data for atom event."""
return load_fixture_bytes("feedreader5.xml") return load_fixture_bytes("feedreader5.xml")
@pytest.fixture(name="events") @pytest.fixture(name="events")
async def fixture_events(hass): async def fixture_events(hass: HomeAssistant) -> list[Event]:
"""Fixture that catches alexa events.""" """Fixture that catches alexa events."""
return async_capture_events(hass, EVENT_FEEDREADER) return async_capture_events(hass, EVENT_FEEDREADER)
@pytest.fixture(name="feed_storage", autouse=True) @pytest.fixture(name="storage")
def fixture_feed_storage(): def fixture_storage(request: pytest.FixtureRequest) -> Generator[None, None, None]:
"""Set up the test storage environment."""
if request.param == "legacy_storage":
with patch("os.path.exists", return_value=False):
yield
elif request.param == "json_storage":
with patch("os.path.exists", return_value=True):
yield
else:
raise RuntimeError("Invalid storage fixture")
@pytest.fixture(name="legacy_storage_open")
def fixture_legacy_storage_open() -> Generator[MagicMock, None, None]:
"""Mock builtins.open for feedreader storage.""" """Mock builtins.open for feedreader storage."""
with patch("homeassistant.components.feedreader.open", mock_open(), create=True):
yield
async def test_setup_one_feed(hass: HomeAssistant) -> None:
"""Test the general setup of this component."""
with patch( with patch(
"homeassistant.components.feedreader.track_time_interval" "homeassistant.components.feedreader.open",
mock_open(),
create=True,
) as open_mock:
yield open_mock
@pytest.fixture(name="legacy_storage_load", autouse=True)
def fixture_legacy_storage_load(
legacy_storage_open,
) -> Generator[MagicMock, None, None]:
"""Mock builtins.open for feedreader storage."""
with patch(
"homeassistant.components.feedreader.pickle.load", return_value={}
) as pickle_load:
yield pickle_load
async def test_setup_no_feeds(hass: HomeAssistant) -> None:
"""Test config with no urls."""
assert not await async_setup_component(
hass, feedreader.DOMAIN, {feedreader.DOMAIN: {CONF_URLS: []}}
)
@pytest.mark.parametrize(
("open_error", "load_error"),
[
(FileNotFoundError("No file"), None),
(OSError("Boom"), None),
(None, pickle.PickleError("Bad data")),
],
)
async def test_legacy_storage_error(
hass: HomeAssistant,
legacy_storage_open: MagicMock,
legacy_storage_load: MagicMock,
open_error: Exception | None,
load_error: Exception | None,
) -> None:
"""Test legacy storage error."""
legacy_storage_open.side_effect = open_error
legacy_storage_load.side_effect = load_error
with patch(
"homeassistant.components.feedreader.async_track_time_interval"
) as track_method: ) as track_method:
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_1) assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_1)
await hass.async_block_till_done() await hass.async_block_till_done()
track_method.assert_called_once_with( track_method.assert_called_once_with(
hass, mock.ANY, DEFAULT_SCAN_INTERVAL, cancel_on_shutdown=True hass, mock.ANY, DEFAULT_SCAN_INTERVAL, cancel_on_shutdown=True
) )
@pytest.mark.parametrize("storage", ["legacy_storage", "json_storage"], indirect=True)
async def test_storage_data_loading(
hass: HomeAssistant,
events: list[Event],
feed_one_event: bytes,
legacy_storage_load: MagicMock,
hass_storage: dict[str, Any],
storage: None,
) -> None:
"""Test loading existing storage data."""
storage_data: dict[str, str] = {URL: "2018-04-30T05:10:00+00:00"}
hass_storage[feedreader.DOMAIN] = {
"version": 1,
"minor_version": 1,
"key": feedreader.DOMAIN,
"data": storage_data,
}
legacy_storage_data = {
URL: gmtime(datetime.fromisoformat(storage_data[URL]).timestamp())
}
legacy_storage_load.return_value = legacy_storage_data
with patch(
"feedparser.http.get",
return_value=feed_one_event,
):
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_2)
hass.bus.async_fire(EVENT_HOMEASSISTANT_START)
await hass.async_block_till_done()
# no new events
assert not events
async def test_storage_data_writing(
hass: HomeAssistant,
events: list[Event],
feed_one_event: bytes,
hass_storage: dict[str, Any],
) -> None:
"""Test writing to storage."""
storage_data: dict[str, str] = {URL: "2018-04-30T05:10:00+00:00"}
with patch(
"feedparser.http.get",
return_value=feed_one_event,
), patch("homeassistant.components.feedreader.DELAY_SAVE", new=0):
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_2)
hass.bus.async_fire(EVENT_HOMEASSISTANT_START)
await hass.async_block_till_done()
# one new event
assert len(events) == 1
# storage data updated
assert hass_storage[feedreader.DOMAIN]["data"] == storage_data
@pytest.mark.parametrize("storage", ["legacy_storage", "json_storage"], indirect=True)
async def test_setup_one_feed(hass: HomeAssistant, storage: None) -> None:
"""Test the general setup of this component."""
with patch(
"homeassistant.components.feedreader.async_track_time_interval"
) as track_method:
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_1)
await hass.async_block_till_done()
track_method.assert_called_once_with(
hass, mock.ANY, DEFAULT_SCAN_INTERVAL, cancel_on_shutdown=True
)
async def test_setup_scan_interval(hass: HomeAssistant) -> None: async def test_setup_scan_interval(hass: HomeAssistant) -> None:
"""Test the setup of this component with scan interval.""" """Test the setup of this component with scan interval."""
with patch( with patch(
"homeassistant.components.feedreader.track_time_interval" "homeassistant.components.feedreader.async_track_time_interval"
) as track_method: ) as track_method:
assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_2) assert await async_setup_component(hass, feedreader.DOMAIN, VALID_CONFIG_2)
await hass.async_block_till_done() await hass.async_block_till_done()
track_method.assert_called_once_with( track_method.assert_called_once_with(
hass, mock.ANY, timedelta(seconds=60), cancel_on_shutdown=True hass, mock.ANY, timedelta(seconds=60), cancel_on_shutdown=True
) )
async def test_setup_max_entries(hass: HomeAssistant) -> None: async def test_setup_max_entries(hass: HomeAssistant) -> None: