Feedreader: add file data storage (#2147)

Right now we ignore already parsed entries and store the information
at runtime, but it will not survive a restart. This patch adds storage
functionality storing pickled file into default config folder when
feed has `published_parsed` support.
This commit is contained in:
Alexander Fortin 2016-05-29 23:33:53 +02:00 committed by Paulus Schoutsen
parent afe84c2a8b
commit 19522b1f39

View File

@ -6,7 +6,11 @@ https://home-assistant.io/components/feedreader/
""" """
from datetime import datetime from datetime import datetime
from logging import getLogger from logging import getLogger
from os.path import exists
from threading import Lock
import pickle
import voluptuous as vol import voluptuous as vol
from homeassistant.const import EVENT_HOMEASSISTANT_START from homeassistant.const import EVENT_HOMEASSISTANT_START
from homeassistant.helpers.event import track_utc_time_change from homeassistant.helpers.event import track_utc_time_change
@ -27,14 +31,15 @@ MAX_ENTRIES = 20
class FeedManager(object): class FeedManager(object):
"""Abstraction over feedparser module.""" """Abstraction over feedparser module."""
def __init__(self, url, hass): def __init__(self, url, hass, storage):
"""Initialize the FeedManager object, poll every hour.""" """Initialize the FeedManager object, poll every hour."""
self._url = url self._url = url
self._feed = None self._feed = None
self._hass = hass self._hass = hass
self._firstrun = True self._firstrun = True
# Initialize last entry timestamp as epoch time self._storage = storage
self._last_entry_timestamp = datetime.utcfromtimestamp(0).timetuple() self._last_entry_timestamp = None
self._has_published_parsed = False
hass.bus.listen_once(EVENT_HOMEASSISTANT_START, hass.bus.listen_once(EVENT_HOMEASSISTANT_START,
lambda _: self._update()) lambda _: self._update())
track_utc_time_change(hass, lambda now: self._update(), track_utc_time_change(hass, lambda now: self._update(),
@ -42,7 +47,7 @@ class FeedManager(object):
def _log_no_entries(self): def _log_no_entries(self):
"""Send no entries log at debug level.""" """Send no entries log at debug level."""
_LOGGER.debug('No new entries in feed "%s"', self._url) _LOGGER.debug('No new entries to be published in feed "%s"', self._url)
def _update(self): def _update(self):
"""Update the feed and publish new entries to the event bus.""" """Update the feed and publish new entries to the event bus."""
@ -65,10 +70,13 @@ class FeedManager(object):
len(self._feed.entries), len(self._feed.entries),
self._url) self._url)
if len(self._feed.entries) > MAX_ENTRIES: if len(self._feed.entries) > MAX_ENTRIES:
_LOGGER.debug('Publishing only the first %s entries ' _LOGGER.debug('Processing only the first %s entries '
'in feed "%s"', MAX_ENTRIES, self._url) 'in feed "%s"', MAX_ENTRIES, self._url)
self._feed.entries = self._feed.entries[0:MAX_ENTRIES] self._feed.entries = self._feed.entries[0:MAX_ENTRIES]
self._publish_new_entries() self._publish_new_entries()
if self._has_published_parsed:
self._storage.put_timestamp(self._url,
self._last_entry_timestamp)
else: else:
self._log_no_entries() self._log_no_entries()
_LOGGER.info('Fetch from feed "%s" completed', self._url) _LOGGER.info('Fetch from feed "%s" completed', self._url)
@ -79,9 +87,11 @@ class FeedManager(object):
# let's make use of it to publish only new available # let's make use of it to publish only new available
# entries since the last run # entries since the last run
if 'published_parsed' in entry.keys(): if 'published_parsed' in entry.keys():
self._has_published_parsed = True
self._last_entry_timestamp = max(entry.published_parsed, self._last_entry_timestamp = max(entry.published_parsed,
self._last_entry_timestamp) self._last_entry_timestamp)
else: else:
self._has_published_parsed = False
_LOGGER.debug('No `published_parsed` info available ' _LOGGER.debug('No `published_parsed` info available '
'for entry "%s"', entry.title) 'for entry "%s"', entry.title)
entry.update({'feed_url': self._url}) entry.update({'feed_url': self._url})
@ -90,6 +100,13 @@ class FeedManager(object):
def _publish_new_entries(self): def _publish_new_entries(self):
"""Publish new entries to the event bus.""" """Publish new entries to the event bus."""
new_entries = False new_entries = False
self._last_entry_timestamp = self._storage.get_timestamp(self._url)
if self._last_entry_timestamp:
self._firstrun = False
else:
# Set last entry timestamp as epoch time if not available
self._last_entry_timestamp = \
datetime.utcfromtimestamp(0).timetuple()
for entry in self._feed.entries: for entry in self._feed.entries:
if self._firstrun or ( if self._firstrun or (
'published_parsed' in entry.keys() and 'published_parsed' in entry.keys() and
@ -103,8 +120,55 @@ class FeedManager(object):
self._firstrun = False self._firstrun = False
class StoredData(object):
"""Abstraction over pickle data storage."""
def __init__(self, data_file):
"""Initialize pickle data storage."""
self._data_file = data_file
self._lock = Lock()
self._cache_outdated = True
self._data = {}
self._fetch_data()
def _fetch_data(self):
"""Fetch data stored into pickle file."""
if self._cache_outdated and exists(self._data_file):
try:
_LOGGER.debug('Fetching data from file %s', self._data_file)
with self._lock, open(self._data_file, 'rb') as myfile:
self._data = pickle.load(myfile) or {}
self._cache_outdated = False
# pylint: disable=bare-except
except:
_LOGGER.error('Error loading data from pickled file %s',
self._data_file)
def get_timestamp(self, url):
"""Return stored timestamp for given url."""
self._fetch_data()
return self._data.get(url)
def put_timestamp(self, url, timestamp):
"""Update timestamp for given url."""
self._fetch_data()
with self._lock, open(self._data_file, 'wb') as myfile:
self._data.update({url: timestamp})
_LOGGER.debug('Overwriting feed "%s" timestamp in storage file %s',
url, self._data_file)
try:
pickle.dump(self._data, myfile)
# pylint: disable=bare-except
except:
_LOGGER.error('Error saving pickled data to %s',
self._data_file)
self._cache_outdated = True
def setup(hass, config): def setup(hass, config):
"""Setup the feedreader component.""" """Setup the feedreader component."""
urls = config.get(DOMAIN)['urls'] urls = config.get(DOMAIN)['urls']
feeds = [FeedManager(url, hass) for url in urls] data_file = hass.config.path("{}.pickle".format(DOMAIN))
storage = StoredData(data_file)
feeds = [FeedManager(url, hass, storage) for url in urls]
return len(feeds) > 0 return len(feeds) > 0