diff --git a/homeassistant/components/google_assistant/data_redaction.py b/homeassistant/components/google_assistant/data_redaction.py new file mode 100644 index 00000000000..ae6fe5f7098 --- /dev/null +++ b/homeassistant/components/google_assistant/data_redaction.py @@ -0,0 +1,36 @@ +"""Helpers to redact Google Assistant data when logging.""" +from __future__ import annotations + +from collections.abc import Callable +from typing import Any + +from homeassistant.core import callback +from homeassistant.helpers.redact import async_redact_data, partial_redact + +REQUEST_MSG_TO_REDACT: dict[str, Callable[[str], str]] = { + "agentUserId": partial_redact, + "uuid": partial_redact, + "webhookId": partial_redact, +} + +RESPONSE_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT | {id: partial_redact} + +SYNC_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT + + +@callback +def async_redact_request_msg(msg: dict[str, Any]) -> dict[str, Any]: + """Mask sensitive data in message.""" + return async_redact_data(msg, REQUEST_MSG_TO_REDACT) + + +@callback +def async_redact_response_msg(msg: dict[str, Any]) -> dict[str, Any]: + """Mask sensitive data in message.""" + return async_redact_data(msg, RESPONSE_MSG_TO_REDACT) + + +@callback +def async_redact_sync_msg(msg: dict[str, Any]) -> dict[str, Any]: + """Mask sensitive data in message.""" + return async_redact_data(msg, SYNC_MSG_TO_REDACT) diff --git a/homeassistant/components/google_assistant/helpers.py b/homeassistant/components/google_assistant/helpers.py index f3d0d24f7c8..d75ebb49509 100644 --- a/homeassistant/components/google_assistant/helpers.py +++ b/homeassistant/components/google_assistant/helpers.py @@ -32,6 +32,7 @@ from homeassistant.helpers import ( ) from homeassistant.helpers.event import async_call_later from homeassistant.helpers.network import get_url +from homeassistant.helpers.redact import partial_redact from homeassistant.helpers.storage import Store from homeassistant.util.dt import utcnow @@ -48,6 +49,7 @@ from .const import ( STORE_AGENT_USER_IDS, STORE_GOOGLE_LOCAL_WEBHOOK_ID, ) +from .data_redaction import async_redact_request_msg, async_redact_response_msg from .error import SmartHomeError SYNC_DELAY = 15 @@ -332,8 +334,8 @@ class AbstractConfig(ABC): _LOGGER.debug( "Register webhook handler %s for agent user id %s", - webhook_id, - user_agent_id, + partial_redact(webhook_id), + partial_redact(user_agent_id), ) try: webhook.async_register( @@ -348,8 +350,8 @@ class AbstractConfig(ABC): except ValueError: _LOGGER.warning( "Webhook handler %s for agent user id %s is already defined!", - webhook_id, - user_agent_id, + partial_redact(webhook_id), + partial_redact(user_agent_id), ) setup_successful = False break @@ -374,8 +376,8 @@ class AbstractConfig(ABC): webhook_id = self.get_local_webhook_id(agent_user_id) _LOGGER.debug( "Unregister webhook handler %s for agent user id %s", - webhook_id, - agent_user_id, + partial_redact(webhook_id), + partial_redact(agent_user_id), ) webhook.async_unregister(self.hass, webhook_id) @@ -410,7 +412,7 @@ class AbstractConfig(ABC): "Received local message from %s (JS %s):\n%s\n", request.remote, request.headers.get("HA-Cloud-Version", "unknown"), - pprint.pformat(payload), + pprint.pformat(async_redact_request_msg(payload)), ) if (agent_user_id := self.get_local_agent_user_id(webhook_id)) is None: @@ -421,8 +423,8 @@ class AbstractConfig(ABC): "Cannot process request for webhook %s as no linked agent user is" " found:\n%s\n" ), - webhook_id, - pprint.pformat(payload), + partial_redact(webhook_id), + pprint.pformat(async_redact_request_msg(payload)), ) webhook.async_unregister(self.hass, webhook_id) return None @@ -441,7 +443,10 @@ class AbstractConfig(ABC): ) if _LOGGER.isEnabledFor(logging.DEBUG): - _LOGGER.debug("Responding to local message:\n%s\n", pprint.pformat(result)) + _LOGGER.debug( + "Responding to local message:\n%s\n", + pprint.pformat(async_redact_response_msg(result)), + ) return json_response(result) diff --git a/homeassistant/components/google_assistant/smart_home.py b/homeassistant/components/google_assistant/smart_home.py index b8c57812540..7d8cc752342 100644 --- a/homeassistant/components/google_assistant/smart_home.py +++ b/homeassistant/components/google_assistant/smart_home.py @@ -18,6 +18,11 @@ from .const import ( EVENT_QUERY_RECEIVED, EVENT_SYNC_RECEIVED, ) +from .data_redaction import ( + async_redact_request_msg, + async_redact_response_msg, + async_redact_sync_msg, +) from .error import SmartHomeError from .helpers import GoogleEntity, RequestData, async_get_entities @@ -42,7 +47,11 @@ async def async_handle_message(hass, config, user_id, message, source): response = await _process(hass, data, message) if response and "errorCode" in response["payload"]: - _LOGGER.error("Error handling message %s: %s", message, response["payload"]) + _LOGGER.error( + "Error handling message %s: %s", + async_redact_request_msg(message), + async_redact_response_msg(response["payload"]), + ) return response @@ -118,7 +127,7 @@ async def async_devices_sync( devices = await async_devices_sync_response(hass, data.config, agent_user_id) response = create_sync_response(agent_user_id, devices) - _LOGGER.debug("Syncing entities response: %s", response) + _LOGGER.debug("Syncing entities response: %s", async_redact_sync_msg(response)) return response diff --git a/homeassistant/helpers/redact.py b/homeassistant/helpers/redact.py new file mode 100644 index 00000000000..f8df73b9180 --- /dev/null +++ b/homeassistant/helpers/redact.py @@ -0,0 +1,75 @@ +"""Helpers to redact sensitive data.""" +from __future__ import annotations + +from collections.abc import Callable, Iterable, Mapping +from typing import Any, TypeVar, cast, overload + +from homeassistant.core import callback + +REDACTED = "**REDACTED**" + +_T = TypeVar("_T") +_ValueT = TypeVar("_ValueT") + + +def partial_redact( + x: str | Any, unmasked_prefix: int = 4, unmasked_suffix: int = 4 +) -> str: + """Mask part of a string with *.""" + if not isinstance(x, str): + return REDACTED + + unmasked = unmasked_prefix + unmasked_suffix + if len(x) < unmasked * 2: + return REDACTED + + if not unmasked_prefix and not unmasked_suffix: + return REDACTED + + suffix = x[-unmasked_suffix:] if unmasked_suffix else "" + return f"{x[:unmasked_prefix]}***{suffix}" + + +@overload +def async_redact_data( # type: ignore[overload-overlap] + data: Mapping, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]] +) -> dict: + ... + + +@overload +def async_redact_data( + data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]] +) -> _T: + ... + + +@callback +def async_redact_data( + data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]] +) -> _T: + """Redact sensitive data in a dict.""" + if not isinstance(data, (Mapping, list)): + return data + + if isinstance(data, list): + return cast(_T, [async_redact_data(val, to_redact) for val in data]) + + redacted = {**data} + + for key, value in redacted.items(): + if value is None: + continue + if isinstance(value, str) and not value: + continue + if key in to_redact: + if isinstance(to_redact, Mapping): + redacted[key] = to_redact[key](value) + else: + redacted[key] = REDACTED + elif isinstance(value, Mapping): + redacted[key] = async_redact_data(value, to_redact) + elif isinstance(value, list): + redacted[key] = [async_redact_data(item, to_redact) for item in value] + + return cast(_T, redacted) diff --git a/tests/components/google_assistant/test_http.py b/tests/components/google_assistant/test_http.py index aa7f8472cab..c6589555c3e 100644 --- a/tests/components/google_assistant/test_http.py +++ b/tests/components/google_assistant/test_http.py @@ -466,6 +466,6 @@ async def test_async_enable_local_sdk( ) assert resp.status == HTTPStatus.OK assert ( - "Cannot process request for webhook mock_webhook_id as no linked agent user is found:" + "Cannot process request for webhook **REDACTED** as no linked agent user is found:" in caplog.text ) diff --git a/tests/helpers/test_redact.py b/tests/helpers/test_redact.py new file mode 100644 index 00000000000..73461012907 --- /dev/null +++ b/tests/helpers/test_redact.py @@ -0,0 +1,94 @@ +"""Test the data redation helper.""" +from homeassistant.helpers.redact import REDACTED, async_redact_data, partial_redact + + +def test_redact() -> None: + """Test the async_redact_data helper.""" + data = { + "key1": "value1", + "key2": ["value2_a", "value2_b"], + "key3": [["value_3a", "value_3b"], ["value_3c", "value_3d"]], + "key4": { + "key4_1": "value4_1", + "key4_2": ["value4_2a", "value4_2b"], + "key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]], + }, + "key5": None, + "key6": "", + "key7": False, + } + + to_redact = { + "key1", + "key3", + "key4_1", + "key5", + "key6", + "key7", + } + + assert async_redact_data(data, to_redact) == { + "key1": REDACTED, + "key2": ["value2_a", "value2_b"], + "key3": REDACTED, + "key4": { + "key4_1": REDACTED, + "key4_2": ["value4_2a", "value4_2b"], + "key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]], + }, + "key5": None, + "key6": "", + "key7": REDACTED, + } + + +def test_redact_custom_redact_function() -> None: + """Test the async_redact_data helper.""" + data = { + "key1": "val1val1val1val1", + "key2": ["value2_a", "value2_b"], + "key3": [ + ["val_3avalue_3avalue_3a", "value_3bvalue_3bvalue_3b"], + ["value_3cvalue_3cvalue_3c", "value_3dvalue_3dvalue_3d"], + ], + "key4": { + "key4_1": "val4_1val4_1val4_1val4_1", + "key4_2": ["value4_2a", "value4_2b"], + "key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]], + }, + "key5": None, + "key6": "", + "key7": False, + } + + to_redact = { + "key1": partial_redact, + "key3": partial_redact, # Value is a list, will default to REDACTED + "key4_1": partial_redact, + "key5": partial_redact, + "key6": partial_redact, + "key7": partial_redact, # Value is False, will default to REDACTED + } + + assert async_redact_data(data, to_redact) == { + "key1": "val1***val1", + "key2": ["value2_a", "value2_b"], + "key3": REDACTED, + "key4": { + "key4_1": "val4***l4_1", + "key4_2": ["value4_2a", "value4_2b"], + "key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]], + }, + "key5": None, + "key6": "", + "key7": REDACTED, + } + + +def test_partial_redact() -> None: + """Test the partial_redact helper.""" + assert partial_redact(None, 0, 0) == REDACTED + assert partial_redact("short_string") == REDACTED + assert partial_redact("long_enough_string") == "long***ring" + assert partial_redact("long_enough_string", 2, 2) == "lo***ng" + assert partial_redact("long_enough_string", 0, 0) == REDACTED