Unscape HTML Entities from RSS feeds (#130915)

* Unscape HTML Entities from RSS feeds

* Improve tests
This commit is contained in:
Sergio Conde Gómez 2024-11-20 18:21:16 +01:00 committed by GitHub
parent e7a2377c7e
commit 94bf77606b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 181 additions and 4 deletions

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import html
import logging
from typing import Any
import urllib.error
@ -107,7 +108,7 @@ class FeedReaderConfigFlow(ConfigFlow, domain=DOMAIN):
return self.abort_on_import_error(user_input[CONF_URL], "url_error")
return self.show_user_form(user_input, {"base": "url_error"})
feed_title = feed["feed"]["title"]
feed_title = html.unescape(feed["feed"]["title"])
return self.async_create_entry(
title=feed_title,

View File

@ -4,6 +4,7 @@ from __future__ import annotations
from calendar import timegm
from datetime import datetime
import html
from logging import getLogger
from time import gmtime, struct_time
from typing import TYPE_CHECKING
@ -102,7 +103,8 @@ class FeedReaderCoordinator(
"""Set up the feed manager."""
feed = await self._async_fetch_feed()
self.logger.debug("Feed data fetched from %s : %s", self.url, feed["feed"])
self.feed_author = feed["feed"].get("author")
if feed_author := feed["feed"].get("author"):
self.feed_author = html.unescape(feed_author)
self.feed_version = feedparser.api.SUPPORTED_VERSIONS.get(feed["version"])
self._feed = feed

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import html
import logging
from feedparser import FeedParserDict
@ -76,15 +77,22 @@ class FeedReaderEvent(CoordinatorEntity[FeedReaderCoordinator], EventEntity):
# so we always take the first entry in list, since we only care about the latest entry
feed_data: FeedParserDict = data[0]
if description := feed_data.get("description"):
description = html.unescape(description)
if title := feed_data.get("title"):
title = html.unescape(title)
if content := feed_data.get("content"):
if isinstance(content, list) and isinstance(content[0], dict):
content = content[0].get("value")
content = html.unescape(content)
self._trigger_event(
EVENT_FEEDREADER,
{
ATTR_DESCRIPTION: feed_data.get("description"),
ATTR_TITLE: feed_data.get("title"),
ATTR_DESCRIPTION: description,
ATTR_TITLE: title,
ATTR_LINK: feed_data.get("link"),
ATTR_CONTENT: content,
},

View File

@ -64,6 +64,18 @@ def fixture_feed_only_summary(hass: HomeAssistant) -> bytes:
return load_fixture_bytes("feedreader8.xml")
@pytest.fixture(name="feed_htmlentities")
def fixture_feed_htmlentities(hass: HomeAssistant) -> bytes:
"""Load test feed data with HTML Entities."""
return load_fixture_bytes("feedreader9.xml")
@pytest.fixture(name="feed_atom_htmlentities")
def fixture_feed_atom_htmlentities(hass: HomeAssistant) -> bytes:
"""Load test ATOM feed data with HTML Entities."""
return load_fixture_bytes("feedreader10.xml")
@pytest.fixture(name="events")
async def fixture_events(hass: HomeAssistant) -> list[Event]:
"""Fixture that catches alexa events."""

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<feed
xmlns="http://www.w3.org/2005/Atom">
<title><![CDATA[ATOM RSS en espa&ntilde;ol]]></title>
<link href="http://example.org/"/>
<updated>2024-11-18T14:00:00Z</updated>
<author>
<name><![CDATA[Juan P&eacute;rez]]></name>
</author>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<entry>
<title><![CDATA[T&iacute;tulo]]></title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2024-11-18T14:00:00Z</updated>
<summary><![CDATA[Resumen en espa&ntilde;ol]]></summary>
<content><![CDATA[Contenido en espa&ntilde;ol]]></content>
</entry>
</feed>

View File

@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title><![CDATA[RSS en espa&ntilde;ol]]></title>
<description><![CDATA[Esto es un ejemplo de un feed RSS en espa&ntilde;ol]]></description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 18 Nov 2024 15:00:00 +1000</lastBuildDate>
<pubDate>Mon, 18 Nov 2024 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title><![CDATA[T&iacute;tulo 1]]></title>
<description><![CDATA[Descripci&oacute;n 1]]></description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 18 Nov 2024 15:00:00 +1000</pubDate>
<content><![CDATA[Contenido 1 en espa&ntilde;ol]]></content>
</item>
</channel>
</rss>

View File

@ -0,0 +1,27 @@
# serializer version: 1
# name: test_event_htmlentities[feed_atom_htmlentities]
ReadOnlyDict({
'content': 'Contenido en español',
'description': 'Resumen en español',
'event_type': 'feedreader',
'event_types': list([
'feedreader',
]),
'friendly_name': 'Mock Title',
'link': 'http://example.org/2003/12/13/atom03',
'title': 'Título',
})
# ---
# name: test_event_htmlentities[feed_htmlentities]
ReadOnlyDict({
'content': 'Contenido 1 en español',
'description': 'Descripción 1',
'event_type': 'feedreader',
'event_types': list([
'feedreader',
]),
'friendly_name': 'Mock Title',
'link': 'http://www.example.com/link/1',
'title': 'Título 1',
})
# ---

View File

@ -246,3 +246,38 @@ async def test_options_flow(hass: HomeAssistant) -> None:
assert result["data"] == {
CONF_MAX_ENTRIES: 10,
}
@pytest.mark.parametrize(
("fixture_name", "expected_title"),
[
("feed_htmlentities", "RSS en español"),
("feed_atom_htmlentities", "ATOM RSS en español"),
],
)
async def test_feed_htmlentities(
hass: HomeAssistant,
feedparser,
setup_entry,
fixture_name,
expected_title,
request: pytest.FixtureRequest,
) -> None:
"""Test starting a flow by user from a feed with HTML Entities in the title."""
with patch(
"homeassistant.components.feedreader.config_flow.feedparser.http.get",
side_effect=[request.getfixturevalue(fixture_name)],
):
# init user flow
result = await hass.config_entries.flow.async_init(
DOMAIN, context={"source": SOURCE_USER}
)
assert result["type"] is FlowResultType.FORM
assert result["step_id"] == "user"
# success
result = await hass.config_entries.flow.async_configure(
result["flow_id"], user_input={CONF_URL: URL}
)
assert result["type"] is FlowResultType.CREATE_ENTRY
assert result["title"] == expected_title

View File

@ -3,6 +3,9 @@
from datetime import timedelta
from unittest.mock import patch
import pytest
from syrupy.assertion import SnapshotAssertion
from homeassistant.components.feedreader.event import (
ATTR_CONTENT,
ATTR_DESCRIPTION,
@ -59,3 +62,31 @@ async def test_event_entity(
assert state.attributes[ATTR_LINK] == "http://www.example.com/link/1"
assert state.attributes[ATTR_CONTENT] == "This is a summary"
assert state.attributes[ATTR_DESCRIPTION] == "Description 1"
@pytest.mark.parametrize(
("fixture_name"),
[
("feed_htmlentities"),
("feed_atom_htmlentities"),
],
)
async def test_event_htmlentities(
hass: HomeAssistant,
snapshot: SnapshotAssertion,
fixture_name,
request: pytest.FixtureRequest,
) -> None:
"""Test feed event entity with HTML Entities."""
entry = create_mock_entry(VALID_CONFIG_DEFAULT)
entry.add_to_hass(hass)
with patch(
"homeassistant.components.feedreader.coordinator.feedparser.http.get",
side_effect=[request.getfixturevalue(fixture_name)],
):
assert await hass.config_entries.async_setup(entry.entry_id)
await hass.async_block_till_done()
state = hass.states.get("event.mock_title")
assert state
assert state.attributes == snapshot

View File

@ -12,6 +12,7 @@ import pytest
from homeassistant.components.feedreader.const import DOMAIN
from homeassistant.core import Event, HomeAssistant
from homeassistant.helpers import device_registry as dr
import homeassistant.util.dt as dt_util
from . import async_setup_config_entry, create_mock_entry
@ -357,3 +358,23 @@ async def test_feed_errors(
freezer.tick(timedelta(hours=1, seconds=1))
async_fire_time_changed(hass)
await hass.async_block_till_done(wait_background_tasks=True)
async def test_feed_atom_htmlentities(
hass: HomeAssistant, feed_atom_htmlentities, device_registry: dr.DeviceRegistry
) -> None:
"""Test ATOM feed author with HTML Entities."""
entry = create_mock_entry(VALID_CONFIG_DEFAULT)
entry.add_to_hass(hass)
with patch(
"homeassistant.components.feedreader.coordinator.feedparser.http.get",
side_effect=[feed_atom_htmlentities],
):
assert await hass.config_entries.async_setup(entry.entry_id)
await hass.async_block_till_done()
device_entry = device_registry.async_get_device(
identifiers={(DOMAIN, entry.entry_id)}
)
assert device_entry.manufacturer == "Juan Pérez"