Unscape HTML Entities from RSS feeds (#130915)

* Unscape HTML Entities from RSS feeds

* Improve tests
This commit is contained in:
Sergio Conde Gómez 2024-11-20 18:21:16 +01:00 committed by GitHub
parent e7a2377c7e
commit 94bf77606b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 181 additions and 4 deletions

View File

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import html
import logging import logging
from typing import Any from typing import Any
import urllib.error import urllib.error
@ -107,7 +108,7 @@ class FeedReaderConfigFlow(ConfigFlow, domain=DOMAIN):
return self.abort_on_import_error(user_input[CONF_URL], "url_error") return self.abort_on_import_error(user_input[CONF_URL], "url_error")
return self.show_user_form(user_input, {"base": "url_error"}) return self.show_user_form(user_input, {"base": "url_error"})
feed_title = feed["feed"]["title"] feed_title = html.unescape(feed["feed"]["title"])
return self.async_create_entry( return self.async_create_entry(
title=feed_title, title=feed_title,

View File

@ -4,6 +4,7 @@ from __future__ import annotations
from calendar import timegm from calendar import timegm
from datetime import datetime from datetime import datetime
import html
from logging import getLogger from logging import getLogger
from time import gmtime, struct_time from time import gmtime, struct_time
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
@ -102,7 +103,8 @@ class FeedReaderCoordinator(
"""Set up the feed manager.""" """Set up the feed manager."""
feed = await self._async_fetch_feed() feed = await self._async_fetch_feed()
self.logger.debug("Feed data fetched from %s : %s", self.url, feed["feed"]) self.logger.debug("Feed data fetched from %s : %s", self.url, feed["feed"])
self.feed_author = feed["feed"].get("author") if feed_author := feed["feed"].get("author"):
self.feed_author = html.unescape(feed_author)
self.feed_version = feedparser.api.SUPPORTED_VERSIONS.get(feed["version"]) self.feed_version = feedparser.api.SUPPORTED_VERSIONS.get(feed["version"])
self._feed = feed self._feed = feed

View File

@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import html
import logging import logging
from feedparser import FeedParserDict from feedparser import FeedParserDict
@ -76,15 +77,22 @@ class FeedReaderEvent(CoordinatorEntity[FeedReaderCoordinator], EventEntity):
# so we always take the first entry in list, since we only care about the latest entry # so we always take the first entry in list, since we only care about the latest entry
feed_data: FeedParserDict = data[0] feed_data: FeedParserDict = data[0]
if description := feed_data.get("description"):
description = html.unescape(description)
if title := feed_data.get("title"):
title = html.unescape(title)
if content := feed_data.get("content"): if content := feed_data.get("content"):
if isinstance(content, list) and isinstance(content[0], dict): if isinstance(content, list) and isinstance(content[0], dict):
content = content[0].get("value") content = content[0].get("value")
content = html.unescape(content)
self._trigger_event( self._trigger_event(
EVENT_FEEDREADER, EVENT_FEEDREADER,
{ {
ATTR_DESCRIPTION: feed_data.get("description"), ATTR_DESCRIPTION: description,
ATTR_TITLE: feed_data.get("title"), ATTR_TITLE: title,
ATTR_LINK: feed_data.get("link"), ATTR_LINK: feed_data.get("link"),
ATTR_CONTENT: content, ATTR_CONTENT: content,
}, },

View File

@ -64,6 +64,18 @@ def fixture_feed_only_summary(hass: HomeAssistant) -> bytes:
return load_fixture_bytes("feedreader8.xml") return load_fixture_bytes("feedreader8.xml")
@pytest.fixture(name="feed_htmlentities")
def fixture_feed_htmlentities(hass: HomeAssistant) -> bytes:
"""Load test feed data with HTML Entities."""
return load_fixture_bytes("feedreader9.xml")
@pytest.fixture(name="feed_atom_htmlentities")
def fixture_feed_atom_htmlentities(hass: HomeAssistant) -> bytes:
"""Load test ATOM feed data with HTML Entities."""
return load_fixture_bytes("feedreader10.xml")
@pytest.fixture(name="events") @pytest.fixture(name="events")
async def fixture_events(hass: HomeAssistant) -> list[Event]: async def fixture_events(hass: HomeAssistant) -> list[Event]:
"""Fixture that catches alexa events.""" """Fixture that catches alexa events."""

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<feed
xmlns="http://www.w3.org/2005/Atom">
<title><![CDATA[ATOM RSS en espa&ntilde;ol]]></title>
<link href="http://example.org/"/>
<updated>2024-11-18T14:00:00Z</updated>
<author>
<name><![CDATA[Juan P&eacute;rez]]></name>
</author>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<entry>
<title><![CDATA[T&iacute;tulo]]></title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2024-11-18T14:00:00Z</updated>
<summary><![CDATA[Resumen en espa&ntilde;ol]]></summary>
<content><![CDATA[Contenido en espa&ntilde;ol]]></content>
</entry>
</feed>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title><![CDATA[RSS en espa&ntilde;ol]]></title>
<description><![CDATA[Esto es un ejemplo de un feed RSS en espa&ntilde;ol]]></description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 18 Nov 2024 15:00:00 +1000</lastBuildDate>
<pubDate>Mon, 18 Nov 2024 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title><![CDATA[T&iacute;tulo 1]]></title>
<description><![CDATA[Descripci&oacute;n 1]]></description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 18 Nov 2024 15:00:00 +1000</pubDate>
<content><![CDATA[Contenido 1 en espa&ntilde;ol]]></content>
</item>
</channel>
</rss>

View File

@ -0,0 +1,27 @@
# serializer version: 1
# name: test_event_htmlentities[feed_atom_htmlentities]
ReadOnlyDict({
'content': 'Contenido en español',
'description': 'Resumen en español',
'event_type': 'feedreader',
'event_types': list([
'feedreader',
]),
'friendly_name': 'Mock Title',
'link': 'http://example.org/2003/12/13/atom03',
'title': 'Título',
})
# ---
# name: test_event_htmlentities[feed_htmlentities]
ReadOnlyDict({
'content': 'Contenido 1 en español',
'description': 'Descripción 1',
'event_type': 'feedreader',
'event_types': list([
'feedreader',
]),
'friendly_name': 'Mock Title',
'link': 'http://www.example.com/link/1',
'title': 'Título 1',
})
# ---

View File

@ -246,3 +246,38 @@ async def test_options_flow(hass: HomeAssistant) -> None:
assert result["data"] == { assert result["data"] == {
CONF_MAX_ENTRIES: 10, CONF_MAX_ENTRIES: 10,
} }
@pytest.mark.parametrize(
("fixture_name", "expected_title"),
[
("feed_htmlentities", "RSS en español"),
("feed_atom_htmlentities", "ATOM RSS en español"),
],
)
async def test_feed_htmlentities(
hass: HomeAssistant,
feedparser,
setup_entry,
fixture_name,
expected_title,
request: pytest.FixtureRequest,
) -> None:
"""Test starting a flow by user from a feed with HTML Entities in the title."""
with patch(
"homeassistant.components.feedreader.config_flow.feedparser.http.get",
side_effect=[request.getfixturevalue(fixture_name)],
):
# init user flow
result = await hass.config_entries.flow.async_init(
DOMAIN, context={"source": SOURCE_USER}
)
assert result["type"] is FlowResultType.FORM
assert result["step_id"] == "user"
# success
result = await hass.config_entries.flow.async_configure(
result["flow_id"], user_input={CONF_URL: URL}
)
assert result["type"] is FlowResultType.CREATE_ENTRY
assert result["title"] == expected_title

View File

@ -3,6 +3,9 @@
from datetime import timedelta from datetime import timedelta
from unittest.mock import patch from unittest.mock import patch
import pytest
from syrupy.assertion import SnapshotAssertion
from homeassistant.components.feedreader.event import ( from homeassistant.components.feedreader.event import (
ATTR_CONTENT, ATTR_CONTENT,
ATTR_DESCRIPTION, ATTR_DESCRIPTION,
@ -59,3 +62,31 @@ async def test_event_entity(
assert state.attributes[ATTR_LINK] == "http://www.example.com/link/1" assert state.attributes[ATTR_LINK] == "http://www.example.com/link/1"
assert state.attributes[ATTR_CONTENT] == "This is a summary" assert state.attributes[ATTR_CONTENT] == "This is a summary"
assert state.attributes[ATTR_DESCRIPTION] == "Description 1" assert state.attributes[ATTR_DESCRIPTION] == "Description 1"
@pytest.mark.parametrize(
("fixture_name"),
[
("feed_htmlentities"),
("feed_atom_htmlentities"),
],
)
async def test_event_htmlentities(
hass: HomeAssistant,
snapshot: SnapshotAssertion,
fixture_name,
request: pytest.FixtureRequest,
) -> None:
"""Test feed event entity with HTML Entities."""
entry = create_mock_entry(VALID_CONFIG_DEFAULT)
entry.add_to_hass(hass)
with patch(
"homeassistant.components.feedreader.coordinator.feedparser.http.get",
side_effect=[request.getfixturevalue(fixture_name)],
):
assert await hass.config_entries.async_setup(entry.entry_id)
await hass.async_block_till_done()
state = hass.states.get("event.mock_title")
assert state
assert state.attributes == snapshot

View File

@ -12,6 +12,7 @@ import pytest
from homeassistant.components.feedreader.const import DOMAIN from homeassistant.components.feedreader.const import DOMAIN
from homeassistant.core import Event, HomeAssistant from homeassistant.core import Event, HomeAssistant
from homeassistant.helpers import device_registry as dr
import homeassistant.util.dt as dt_util import homeassistant.util.dt as dt_util
from . import async_setup_config_entry, create_mock_entry from . import async_setup_config_entry, create_mock_entry
@ -357,3 +358,23 @@ async def test_feed_errors(
freezer.tick(timedelta(hours=1, seconds=1)) freezer.tick(timedelta(hours=1, seconds=1))
async_fire_time_changed(hass) async_fire_time_changed(hass)
await hass.async_block_till_done(wait_background_tasks=True) await hass.async_block_till_done(wait_background_tasks=True)
async def test_feed_atom_htmlentities(
hass: HomeAssistant, feed_atom_htmlentities, device_registry: dr.DeviceRegistry
) -> None:
"""Test ATOM feed author with HTML Entities."""
entry = create_mock_entry(VALID_CONFIG_DEFAULT)
entry.add_to_hass(hass)
with patch(
"homeassistant.components.feedreader.coordinator.feedparser.http.get",
side_effect=[feed_atom_htmlentities],
):
assert await hass.config_entries.async_setup(entry.entry_id)
await hass.async_block_till_done()
device_entry = device_registry.async_get_device(
identifiers={(DOMAIN, entry.entry_id)}
)
assert device_entry.manufacturer == "Juan Pérez"