Improve YAML Dump times with C Dumper (#73424)

This commit is contained in:
J. Nick Koston 2022-06-13 10:14:30 -10:00 committed by GitHub
parent 0ffeb6c304
commit 034c0c0593
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 14 deletions

View File

@ -5,13 +5,12 @@ from collections.abc import Callable, Sequence
from typing import Any, TypedDict, cast from typing import Any, TypedDict, cast
import voluptuous as vol import voluptuous as vol
import yaml
from homeassistant.backports.enum import StrEnum from homeassistant.backports.enum import StrEnum
from homeassistant.const import CONF_MODE, CONF_UNIT_OF_MEASUREMENT from homeassistant.const import CONF_MODE, CONF_UNIT_OF_MEASUREMENT
from homeassistant.core import split_entity_id, valid_entity_id from homeassistant.core import split_entity_id, valid_entity_id
from homeassistant.util import decorator from homeassistant.util import decorator
from homeassistant.util.yaml.dumper import represent_odict from homeassistant.util.yaml.dumper import add_representer, represent_odict
from . import config_validation as cv from . import config_validation as cv
@ -889,7 +888,7 @@ class TimeSelector(Selector):
return cast(str, data) return cast(str, data)
yaml.SafeDumper.add_representer( add_representer(
Selector, Selector,
lambda dumper, value: represent_odict( lambda dumper, value: represent_odict(
dumper, "tag:yaml.org,2002:map", value.serialize() dumper, "tag:yaml.org,2002:map", value.serialize()

View File

@ -1,5 +1,6 @@
"""Custom dumper and representers.""" """Custom dumper and representers."""
from collections import OrderedDict from collections import OrderedDict
from typing import Any
import yaml import yaml
@ -8,10 +9,20 @@ from .objects import Input, NodeListClass
# mypy: allow-untyped-calls, no-warn-return-any # mypy: allow-untyped-calls, no-warn-return-any
try:
from yaml import CSafeDumper as FastestAvailableSafeDumper
except ImportError:
from yaml import SafeDumper as FastestAvailableSafeDumper # type: ignore[misc]
def dump(_dict: dict) -> str: def dump(_dict: dict) -> str:
"""Dump YAML to a string and remove null.""" """Dump YAML to a string and remove null."""
return yaml.safe_dump( return yaml.dump(
_dict, default_flow_style=False, allow_unicode=True, sort_keys=False _dict,
default_flow_style=False,
allow_unicode=True,
sort_keys=False,
Dumper=FastestAvailableSafeDumper,
).replace(": null\n", ":\n") ).replace(": null\n", ":\n")
@ -51,17 +62,22 @@ def represent_odict( # type: ignore[no-untyped-def]
return node return node
yaml.SafeDumper.add_representer( def add_representer(klass: Any, representer: Any) -> None:
"""Add to representer to the dumper."""
FastestAvailableSafeDumper.add_representer(klass, representer)
add_representer(
OrderedDict, OrderedDict,
lambda dumper, value: represent_odict(dumper, "tag:yaml.org,2002:map", value), lambda dumper, value: represent_odict(dumper, "tag:yaml.org,2002:map", value),
) )
yaml.SafeDumper.add_representer( add_representer(
NodeListClass, NodeListClass,
lambda dumper, value: dumper.represent_sequence("tag:yaml.org,2002:seq", value), lambda dumper, value: dumper.represent_sequence("tag:yaml.org,2002:seq", value),
) )
yaml.SafeDumper.add_representer( add_representer(
Input, Input,
lambda dumper, value: dumper.represent_scalar("!input", value.name), lambda dumper, value: dumper.represent_scalar("!input", value.name),
) )

View File

@ -5,6 +5,7 @@ from unittest.mock import Mock, patch
import pytest import pytest
from homeassistant.setup import async_setup_component from homeassistant.setup import async_setup_component
from homeassistant.util.yaml import parse_yaml
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@ -130,9 +131,18 @@ async def test_save_blueprint(hass, aioclient_mock, hass_ws_client):
assert msg["id"] == 6 assert msg["id"] == 6
assert msg["success"] assert msg["success"]
assert write_mock.mock_calls assert write_mock.mock_calls
assert write_mock.call_args[0] == ( # There are subtle differences in the dumper quoting
"blueprint:\n name: Call service based on event\n domain: automation\n input:\n trigger_event:\n selector:\n text: {}\n service_to_call:\n a_number:\n selector:\n number:\n mode: box\n step: 1.0\n source_url: https://github.com/balloob/home-assistant-config/blob/main/blueprints/automation/motion_light.yaml\ntrigger:\n platform: event\n event_type: !input 'trigger_event'\naction:\n service: !input 'service_to_call'\n entity_id: light.kitchen\n", # behavior when quoting is not required as both produce
# valid yaml
output_yaml = write_mock.call_args[0][0]
assert output_yaml in (
# pure python dumper will quote the value after !input
"blueprint:\n name: Call service based on event\n domain: automation\n input:\n trigger_event:\n selector:\n text: {}\n service_to_call:\n a_number:\n selector:\n number:\n mode: box\n step: 1.0\n source_url: https://github.com/balloob/home-assistant-config/blob/main/blueprints/automation/motion_light.yaml\ntrigger:\n platform: event\n event_type: !input 'trigger_event'\naction:\n service: !input 'service_to_call'\n entity_id: light.kitchen\n"
# c dumper will not quote the value after !input
"blueprint:\n name: Call service based on event\n domain: automation\n input:\n trigger_event:\n selector:\n text: {}\n service_to_call:\n a_number:\n selector:\n number:\n mode: box\n step: 1.0\n source_url: https://github.com/balloob/home-assistant-config/blob/main/blueprints/automation/motion_light.yaml\ntrigger:\n platform: event\n event_type: !input trigger_event\naction:\n service: !input service_to_call\n entity_id: light.kitchen\n"
) )
# Make sure ita parsable and does not raise
assert len(parse_yaml(output_yaml)) > 1
async def test_save_existing_file(hass, aioclient_mock, hass_ws_client): async def test_save_existing_file(hass, aioclient_mock, hass_ws_client):

View File

@ -33,6 +33,23 @@ def try_both_loaders(request):
importlib.reload(yaml_loader) importlib.reload(yaml_loader)
@pytest.fixture(params=["enable_c_dumper", "disable_c_dumper"])
def try_both_dumpers(request):
"""Disable the yaml c dumper."""
if not request.param == "disable_c_dumper":
yield
return
try:
cdumper = pyyaml.CSafeDumper
except ImportError:
return
del pyyaml.CSafeDumper
importlib.reload(yaml_loader)
yield
pyyaml.CSafeDumper = cdumper
importlib.reload(yaml_loader)
def test_simple_list(try_both_loaders): def test_simple_list(try_both_loaders):
"""Test simple list.""" """Test simple list."""
conf = "config:\n - simple\n - list" conf = "config:\n - simple\n - list"
@ -283,12 +300,12 @@ def test_load_yaml_encoding_error(mock_open, try_both_loaders):
yaml_loader.load_yaml("test") yaml_loader.load_yaml("test")
def test_dump(): def test_dump(try_both_dumpers):
"""The that the dump method returns empty None values.""" """The that the dump method returns empty None values."""
assert yaml.dump({"a": None, "b": "b"}) == "a:\nb: b\n" assert yaml.dump({"a": None, "b": "b"}) == "a:\nb: b\n"
def test_dump_unicode(): def test_dump_unicode(try_both_dumpers):
"""The that the dump method returns empty None values.""" """The that the dump method returns empty None values."""
assert yaml.dump({"a": None, "b": "привет"}) == "a:\nb: привет\n" assert yaml.dump({"a": None, "b": "привет"}) == "a:\nb: привет\n"
@ -424,7 +441,7 @@ class TestSecrets(unittest.TestCase):
) )
def test_representing_yaml_loaded_data(): def test_representing_yaml_loaded_data(try_both_dumpers):
"""Test we can represent YAML loaded data.""" """Test we can represent YAML loaded data."""
files = {YAML_CONFIG_FILE: 'key: [1, "2", 3]'} files = {YAML_CONFIG_FILE: 'key: [1, "2", 3]'}
with patch_yaml_files(files): with patch_yaml_files(files):
@ -460,7 +477,7 @@ def test_input_class():
assert len({input, input2}) == 1 assert len({input, input2}) == 1
def test_input(try_both_loaders): def test_input(try_both_loaders, try_both_dumpers):
"""Test loading inputs.""" """Test loading inputs."""
data = {"hello": yaml.Input("test_name")} data = {"hello": yaml.Input("test_name")}
assert yaml.parse_yaml(yaml.dump(data)) == data assert yaml.parse_yaml(yaml.dump(data)) == data