Improve scrape performance by using lxml parser (#71087)

* Improve scape performance by using lxml parser

* load it

* tweak

* tweak

* ensure libxml2 is installed in dev container
This commit is contained in:
J. Nick Koston 2022-05-01 21:04:05 -05:00 committed by GitHub
parent c23866e5e5
commit b770ca319e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 9 additions and 2 deletions

View File

@ -18,6 +18,7 @@ RUN \
libavfilter-dev \ libavfilter-dev \
libpcap-dev \ libpcap-dev \
libturbojpeg0 \ libturbojpeg0 \
libxml2 \
git \ git \
cmake \ cmake \
&& apt-get clean \ && apt-get clean \

View File

@ -2,7 +2,7 @@
"domain": "scrape", "domain": "scrape",
"name": "Scrape", "name": "Scrape",
"documentation": "https://www.home-assistant.io/integrations/scrape", "documentation": "https://www.home-assistant.io/integrations/scrape",
"requirements": ["beautifulsoup4==4.11.1"], "requirements": ["beautifulsoup4==4.11.1", "lxml==4.8.0"],
"after_dependencies": ["rest"], "after_dependencies": ["rest"],
"codeowners": ["@fabaff"], "codeowners": ["@fabaff"],
"iot_class": "cloud_polling" "iot_class": "cloud_polling"

View File

@ -154,7 +154,7 @@ class ScrapeSensor(SensorEntity):
def _extract_value(self) -> Any: def _extract_value(self) -> Any:
"""Parse the html extraction in the executor.""" """Parse the html extraction in the executor."""
raw_data = BeautifulSoup(self.rest.data, "html.parser") raw_data = BeautifulSoup(self.rest.data, "lxml")
_LOGGER.debug(raw_data) _LOGGER.debug(raw_data)
try: try:

View File

@ -974,6 +974,9 @@ lupupy==0.0.24
# homeassistant.components.lw12wifi # homeassistant.components.lw12wifi
lw12==0.9.2 lw12==0.9.2
# homeassistant.components.scrape
lxml==4.8.0
# homeassistant.components.nmap_tracker # homeassistant.components.nmap_tracker
mac-vendor-lookup==0.1.11 mac-vendor-lookup==0.1.11

View File

@ -663,6 +663,9 @@ lru-dict==1.1.7
# homeassistant.components.luftdaten # homeassistant.components.luftdaten
luftdaten==0.7.2 luftdaten==0.7.2
# homeassistant.components.scrape
lxml==4.8.0
# homeassistant.components.nmap_tracker # homeassistant.components.nmap_tracker
mac-vendor-lookup==0.1.11 mac-vendor-lookup==0.1.11