Report which data causes JSON serialization error (#31901)

This commit is contained in:
Paulus Schoutsen 2020-02-17 10:49:42 -08:00 committed by GitHub
parent 00ac7a7d70
commit a6b5d73f1c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 10 deletions

View File

@ -1,4 +1,5 @@
"""JSON utility functions."""
from collections import deque
import json
import logging
import os
@ -51,10 +52,17 @@ def save_json(
Returns True on success.
"""
try:
json_data = json.dumps(data, sort_keys=True, indent=4, cls=encoder)
except TypeError:
# pylint: disable=no-member
msg = f"Failed to serialize to JSON: {filename}. Bad data found at {', '.join(find_paths_unserializable_data(data))}"
_LOGGER.error(msg)
raise SerializationError(msg)
tmp_filename = ""
tmp_path = os.path.split(filename)[0]
try:
json_data = json.dumps(data, sort_keys=True, indent=4, cls=encoder)
# Modern versions of Python tempfile create this file with mode 0o600
with tempfile.NamedTemporaryFile(
mode="w", encoding="utf-8", dir=tmp_path, delete=False
@ -64,9 +72,6 @@ def save_json(
if not private:
os.chmod(tmp_filename, 0o644)
os.replace(tmp_filename, filename)
except TypeError as error:
_LOGGER.exception("Failed to serialize to JSON: %s", filename)
raise SerializationError(error)
except OSError as error:
_LOGGER.exception("Saving JSON file failed: %s", filename)
raise WriteError(error)
@ -78,3 +83,39 @@ def save_json(
# If we are cleaning up then something else went wrong, so
# we should suppress likely follow-on errors in the cleanup
_LOGGER.error("JSON replacement cleanup failed: %s", err)
def find_paths_unserializable_data(bad_data: Union[List, Dict]) -> List[str]:
"""Find the paths to unserializable data.
This method is slow! Only use for error handling.
"""
to_process = deque([(bad_data, "$")])
invalid = []
while to_process:
obj, obj_path = to_process.popleft()
try:
json.dumps(obj)
valid = True
except TypeError:
valid = False
if isinstance(obj, dict):
for key, value in obj.items():
try:
# Is key valid?
json.dumps({key: None})
except TypeError:
invalid.append(f"{obj_path}<key: {key}>")
else:
# Process value
to_process.append((value, f"{obj_path}.{key}"))
elif isinstance(obj, list):
for idx, value in enumerate(obj):
to_process.append((value, f"{obj_path}[{idx}]"))
elif not valid: # type: ignore
invalid.append(obj_path)
return invalid

View File

@ -9,13 +9,16 @@ from unittest.mock import Mock
import pytest
from homeassistant.exceptions import HomeAssistantError
from homeassistant.util.json import SerializationError, load_json, save_json
from homeassistant.util.json import (
SerializationError,
find_paths_unserializable_data,
load_json,
save_json,
)
# Test data that can be saved as JSON
TEST_JSON_A = {"a": 1, "B": "two"}
TEST_JSON_B = {"a": "one", "B": 2}
# Test data that can not be saved as JSON (keys must be strings)
TEST_BAD_OBJECT = {("A",): 1}
# Test data that can not be loaded as JSON
TEST_BAD_SERIALIED = "THIS IS NOT JSON\n"
TMP_DIR = None
@ -71,9 +74,12 @@ def test_overwrite_and_reload():
def test_save_bad_data():
"""Test error from trying to save unserialisable data."""
fname = _path_for("test4")
with pytest.raises(SerializationError):
save_json(fname, TEST_BAD_OBJECT)
with pytest.raises(SerializationError) as excinfo:
save_json("test4", {"hello": set()})
assert "Failed to serialize to JSON: test4. Bad data found at $.hello" in str(
excinfo.value
)
def test_load_bad_data():
@ -99,3 +105,20 @@ def test_custom_encoder():
save_json(fname, Mock(), encoder=MockJSONEncoder)
data = load_json(fname)
assert data == "9"
def test_find_unserializable_data():
"""Find unserializeable data."""
assert find_paths_unserializable_data(1) == []
assert find_paths_unserializable_data([1, 2]) == []
assert find_paths_unserializable_data({"something": "yo"}) == []
assert find_paths_unserializable_data({"something": set()}) == ["$.something"]
assert find_paths_unserializable_data({"something": [1, set()]}) == [
"$.something[1]"
]
assert find_paths_unserializable_data([1, {"bla": set(), "blub": set()}]) == [
"$[1].bla",
"$[1].blub",
]
assert find_paths_unserializable_data({("A",): 1}) == ["$<key: ('A',)>"]