Deduplicate Thread datasets based on EXTPANID (#93795)

* Deduplicate Thread datasets based on EXTPANID

* Apply suggestions from code review
This commit is contained in:
Erik Montnemery 2023-05-30 16:16:56 +02:00 committed by GitHub
parent d05d67414a
commit 11299c4537
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 410 additions and 17 deletions

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import dataclasses
from datetime import datetime
from functools import cached_property
import logging
from typing import Any, cast
from python_otbr_api import tlv_parser
@ -18,9 +19,11 @@ from homeassistant.util import dt as dt_util, ulid as ulid_util
DATA_STORE = "thread.datasets"
STORAGE_KEY = "thread.datasets"
STORAGE_VERSION_MAJOR = 1
STORAGE_VERSION_MINOR = 1
STORAGE_VERSION_MINOR = 2
SAVE_DELAY = 10
_LOGGER = logging.getLogger(__name__)
class DatasetPreferredError(HomeAssistantError):
"""Raised when attempting to delete the preferred dataset."""
@ -49,11 +52,9 @@ class DatasetEntry:
return tlv_parser.parse_tlv(self.tlv)
@property
def extended_pan_id(self) -> str | None:
def extended_pan_id(self) -> str:
"""Return extended PAN ID as a hex string."""
if (ext_pan_id := self.dataset.get(MeshcopTLVType.EXTPANID)) is None:
return None
return str(ext_pan_id)
return str(self.dataset[MeshcopTLVType.EXTPANID])
@property
def network_name(self) -> str | None:
@ -77,6 +78,88 @@ class DatasetEntry:
}
class DatasetStoreStore(Store):
"""Store Thread datasets."""
async def _async_migrate_func(
self, old_major_version: int, old_minor_version: int, old_data: dict[str, Any]
) -> dict[str, Any]:
"""Migrate to the new version."""
if old_major_version == 1:
if old_minor_version < 2:
datasets: dict[str, DatasetEntry] = {}
preferred_dataset = old_data["preferred_dataset"]
for dataset in old_data["datasets"]:
created = cast(datetime, dt_util.parse_datetime(dataset["created"]))
entry = DatasetEntry(
created=created,
id=dataset["id"],
source=dataset["source"],
tlv=dataset["tlv"],
)
if (
MeshcopTLVType.EXTPANID not in entry.dataset
or MeshcopTLVType.ACTIVETIMESTAMP not in entry.dataset
):
_LOGGER.warning(
"Dropped invalid Thread dataset '%s'", entry.tlv
)
if entry.id == preferred_dataset:
preferred_dataset = None
continue
if entry.extended_pan_id in datasets:
if datasets[entry.extended_pan_id].id == preferred_dataset:
_LOGGER.warning(
(
"Dropped duplicated Thread dataset '%s' "
"(duplicate of preferred dataset '%s')"
),
entry.tlv,
datasets[entry.extended_pan_id].tlv,
)
continue
new_timestamp = cast(
tlv_parser.Timestamp,
entry.dataset[MeshcopTLVType.ACTIVETIMESTAMP],
)
old_timestamp = cast(
tlv_parser.Timestamp,
datasets[entry.extended_pan_id].dataset[
MeshcopTLVType.ACTIVETIMESTAMP
],
)
if old_timestamp.seconds >= new_timestamp.seconds or (
old_timestamp.seconds == new_timestamp.seconds
and old_timestamp.ticks >= new_timestamp.ticks
):
_LOGGER.warning(
(
"Dropped duplicated Thread dataset '%s' "
"(duplicate of '%s')"
),
entry.tlv,
datasets[entry.extended_pan_id].tlv,
)
continue
_LOGGER.warning(
(
"Dropped duplicated Thread dataset '%s' "
"(duplicate of '%s')"
),
datasets[entry.extended_pan_id].tlv,
entry.tlv,
)
datasets[entry.extended_pan_id] = entry
data = {
"preferred_dataset": preferred_dataset,
"datasets": [dataset.to_json() for dataset in datasets.values()],
}
return data
class DatasetStore:
"""Class to hold a collection of thread datasets."""
@ -85,7 +168,7 @@ class DatasetStore:
self.hass = hass
self.datasets: dict[str, DatasetEntry] = {}
self._preferred_dataset: str | None = None
self._store: Store[dict[str, Any]] = Store(
self._store: Store[dict[str, Any]] = DatasetStoreStore(
hass,
STORAGE_VERSION_MAJOR,
STORAGE_KEY,
@ -98,10 +181,64 @@ class DatasetStore:
"""Add dataset, does nothing if it already exists."""
# Make sure the tlv is valid
dataset = tlv_parser.parse_tlv(tlv)
# Don't allow adding a dataset which does not have an extended pan id or
# timestamp
if (
MeshcopTLVType.EXTPANID not in dataset
or MeshcopTLVType.ACTIVETIMESTAMP not in dataset
):
raise HomeAssistantError("Invalid dataset")
# Bail out if the dataset already exists
if any(entry for entry in self.datasets.values() if entry.dataset == dataset):
return
# Update if dataset with same extended pan id exists and the timestamp
# is newer
if entry := next(
(
entry
for entry in self.datasets.values()
if entry.dataset[MeshcopTLVType.EXTPANID]
== dataset[MeshcopTLVType.EXTPANID]
),
None,
):
new_timestamp = cast(
tlv_parser.Timestamp, dataset[MeshcopTLVType.ACTIVETIMESTAMP]
)
old_timestamp = cast(
tlv_parser.Timestamp,
entry.dataset[MeshcopTLVType.ACTIVETIMESTAMP],
)
if old_timestamp.seconds >= new_timestamp.seconds or (
old_timestamp.seconds == new_timestamp.seconds
and old_timestamp.ticks >= new_timestamp.ticks
):
_LOGGER.warning(
(
"Got dataset with same extended PAN ID and same or older active"
" timestamp, old dataset: '%s', new dataset: '%s'"
),
entry.tlv,
tlv,
)
return
_LOGGER.debug(
(
"Updating dataset with same extended PAN ID and newer active "
"timestamp, old dataset: '%s', new dataset: '%s'"
),
entry.tlv,
tlv,
)
self.datasets[entry.id] = dataclasses.replace(
self.datasets[entry.id], tlv=tlv
)
self.async_schedule_save()
return
entry = DatasetEntry(source=source, tlv=tlv)
self.datasets[entry.id] = entry
# Set to preferred if there is no preferred dataset

View File

@ -3,7 +3,7 @@ BASE_URL = "http://core-silabs-multiprotocol:8081"
CONFIG_ENTRY_DATA = {"url": "http://core-silabs-multiprotocol:8081"}
DATASET_CH15 = bytes.fromhex(
"0E080000000000010000000300000F35060004001FFFE00208F642646DA209B1C00708FDF57B5A"
"0E080000000000010000000300000F35060004001FFFE00208F642646DA209B1D00708FDF57B5A"
"0FE2AAF60510DE98B5BA1A528FEE049D4B4B01835375030D4F70656E5468726561642048410102"
"25A40410F5DD18371BFD29E1A601EF6FFAD94C030C0402A0F7F8"
)

View File

@ -7,13 +7,13 @@ DATASET_1 = (
)
DATASET_2 = (
"0E080000000000010000000300000F35060004001FFFE0020811111111222222220708FDAD70BF"
"0E080000000000010000000300000F35060004001FFFE0020811111111222222330708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E486f6d65417373697374616e742101"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)
DATASET_3 = (
"0E080000000000010000000300000F35060004001FFFE0020811111111222222220708FDAD70BF"
"0E080000000000010000000300000F35060004001FFFE0020811111111222222440708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E7ef09f90a3f09f90a5f09f90a47e01"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)

View File

@ -26,7 +26,25 @@ DATASET_1_BAD_CHANNEL = (
)
DATASET_1_NO_CHANNEL = (
"0E08000000000001000035060004001FFFE0020811111111222222220708FDAD70BF"
"0E08000000000001000035060004001FFFE0020811111111222222250708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E4F70656E54687265616444656D6F01"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)
DATASET_1_NO_EXTPANID = (
"0E080000000000010000000300000F35060004001FFFE00708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E4F70656E54687265616444656D6F01"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)
DATASET_1_NO_ACTIVETIMESTAMP = (
"000300000F35060004001FFFE0020811111111222222220708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E4F70656E54687265616444656D6F01"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)
DATASET_1_LARGER_TIMESTAMP = (
"0E080000000000020000000300000F35060004001FFFE0020811111111222222220708FDAD70BF"
"E5AA15DD051000112233445566778899AABBCCDDEEFF030E4F70656E54687265616444656D6F01"
"0212340410445F2B5CA6F2A93A55CE570A70EFEECB0C0402A0F7F8"
)
@ -148,14 +166,14 @@ async def test_dataset_properties(hass: HomeAssistant) -> None:
dataset = store.async_get(dataset_2.id)
assert dataset == dataset_2
assert dataset.channel == 15
assert dataset.extended_pan_id == "1111111122222222"
assert dataset.extended_pan_id == "1111111122222233"
assert dataset.network_name == "HomeAssistant!"
assert dataset.pan_id == "1234"
dataset = store.async_get(dataset_3.id)
assert dataset == dataset_3
assert dataset.channel == 15
assert dataset.extended_pan_id == "1111111122222222"
assert dataset.extended_pan_id == "1111111122222244"
assert dataset.network_name == "~🐣🐥🐤~"
assert dataset.pan_id == "1234"
@ -164,6 +182,58 @@ async def test_dataset_properties(hass: HomeAssistant) -> None:
assert dataset.channel is None
@pytest.mark.parametrize(
("dataset", "error"),
[
(DATASET_1_BAD_CHANNEL, TLVError),
(DATASET_1_NO_EXTPANID, HomeAssistantError),
(DATASET_1_NO_ACTIVETIMESTAMP, HomeAssistantError),
],
)
async def test_add_bad_dataset(hass: HomeAssistant, dataset, error) -> None:
"""Test adding a bad dataset."""
with pytest.raises(error):
await dataset_store.async_add_dataset(hass, "test", dataset)
async def test_update_dataset_newer(hass: HomeAssistant, caplog) -> None:
"""Test updating a dataset."""
await dataset_store.async_add_dataset(hass, "test", DATASET_1)
await dataset_store.async_add_dataset(hass, "test", DATASET_1_LARGER_TIMESTAMP)
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1_LARGER_TIMESTAMP
assert (
"Updating dataset with same extended PAN ID and newer active timestamp"
in caplog.text
)
assert (
"Got dataset with same extended PAN ID and same or older active timestamp"
not in caplog.text
)
async def test_update_dataset_older(hass: HomeAssistant, caplog) -> None:
"""Test updating a dataset."""
await dataset_store.async_add_dataset(hass, "test", DATASET_1_LARGER_TIMESTAMP)
await dataset_store.async_add_dataset(hass, "test", DATASET_1)
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1_LARGER_TIMESTAMP
assert (
"Updating dataset with same extended PAN ID and newer active timestamp"
not in caplog.text
)
assert (
"Got dataset with same extended PAN ID and same or older active timestamp"
in caplog.text
)
async def test_load_datasets(hass: HomeAssistant) -> None:
"""Make sure that we can load/save data correctly."""
@ -234,19 +304,19 @@ async def test_loading_datasets_from_storage(
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": "DATASET_1",
"tlv": DATASET_1,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": "DATASET_2",
"tlv": DATASET_2,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id3",
"source": "source_3",
"tlv": "DATASET_3",
"tlv": DATASET_3,
},
],
"preferred_dataset": "id1",
@ -256,3 +326,189 @@ async def test_loading_datasets_from_storage(
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 3
assert store.preferred_dataset == "id1"
async def test_migrate_drop_bad_datasets(
hass: HomeAssistant, hass_storage: dict[str, Any], caplog
) -> None:
"""Test migrating the dataset store when the store has bad datasets."""
hass_storage[dataset_store.STORAGE_KEY] = {
"version": dataset_store.STORAGE_VERSION_MAJOR,
"minor_version": 1,
"data": {
"datasets": [
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": DATASET_1,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": DATASET_1_NO_EXTPANID,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id3",
"source": "source_3",
"tlv": DATASET_1_NO_ACTIVETIMESTAMP,
},
],
"preferred_dataset": "id1",
},
}
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1
assert store.preferred_dataset == "id1"
assert f"Dropped invalid Thread dataset '{DATASET_1_NO_EXTPANID}'" in caplog.text
assert (
f"Dropped invalid Thread dataset '{DATASET_1_NO_ACTIVETIMESTAMP}'"
in caplog.text
)
async def test_migrate_drop_bad_datasets_preferred(
hass: HomeAssistant, hass_storage: dict[str, Any], caplog
) -> None:
"""Test migrating the dataset store when the store has bad datasets."""
hass_storage[dataset_store.STORAGE_KEY] = {
"version": dataset_store.STORAGE_VERSION_MAJOR,
"minor_version": 1,
"data": {
"datasets": [
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": DATASET_1,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": DATASET_1_NO_EXTPANID,
},
],
"preferred_dataset": "id2",
},
}
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert store.preferred_dataset is None
async def test_migrate_drop_duplicate_datasets(
hass: HomeAssistant, hass_storage: dict[str, Any], caplog
) -> None:
"""Test migrating the dataset store when the store has duplicated datasets."""
hass_storage[dataset_store.STORAGE_KEY] = {
"version": dataset_store.STORAGE_VERSION_MAJOR,
"minor_version": 1,
"data": {
"datasets": [
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": DATASET_1,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": DATASET_1_LARGER_TIMESTAMP,
},
],
"preferred_dataset": None,
},
}
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1_LARGER_TIMESTAMP
assert store.preferred_dataset is None
assert (
f"Dropped duplicated Thread dataset '{DATASET_1}' "
f"(duplicate of '{DATASET_1_LARGER_TIMESTAMP}')"
) in caplog.text
async def test_migrate_drop_duplicate_datasets_2(
hass: HomeAssistant, hass_storage: dict[str, Any], caplog
) -> None:
"""Test migrating the dataset store when the store has duplicated datasets."""
hass_storage[dataset_store.STORAGE_KEY] = {
"version": dataset_store.STORAGE_VERSION_MAJOR,
"minor_version": 1,
"data": {
"datasets": [
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": DATASET_1_LARGER_TIMESTAMP,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": DATASET_1,
},
],
"preferred_dataset": None,
},
}
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1_LARGER_TIMESTAMP
assert store.preferred_dataset is None
assert (
f"Dropped duplicated Thread dataset '{DATASET_1}' "
f"(duplicate of '{DATASET_1_LARGER_TIMESTAMP}')"
) in caplog.text
async def test_migrate_drop_duplicate_datasets_preferred(
hass: HomeAssistant, hass_storage: dict[str, Any], caplog
) -> None:
"""Test migrating the dataset store when the store has duplicated datasets."""
hass_storage[dataset_store.STORAGE_KEY] = {
"version": dataset_store.STORAGE_VERSION_MAJOR,
"minor_version": 1,
"data": {
"datasets": [
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id1",
"source": "source_1",
"tlv": DATASET_1,
},
{
"created": "2023-02-02T09:41:13.746514+00:00",
"id": "id2",
"source": "source_2",
"tlv": DATASET_1_LARGER_TIMESTAMP,
},
],
"preferred_dataset": "id1",
},
}
store = await dataset_store.async_get_store(hass)
assert len(store.datasets) == 1
assert list(store.datasets.values())[0].tlv == DATASET_1
assert store.preferred_dataset == "id1"
assert (
f"Dropped duplicated Thread dataset '{DATASET_1_LARGER_TIMESTAMP}' "
f"(duplicate of preferred dataset '{DATASET_1}')"
) in caplog.text

View File

@ -166,7 +166,7 @@ async def test_list_get_dataset(
"channel": 15,
"created": dataset_2.created.isoformat(),
"dataset_id": dataset_2.id,
"extended_pan_id": "1111111122222222",
"extended_pan_id": "1111111122222233",
"network_name": "HomeAssistant!",
"pan_id": "1234",
"preferred": False,
@ -176,7 +176,7 @@ async def test_list_get_dataset(
"channel": 15,
"created": dataset_3.created.isoformat(),
"dataset_id": dataset_3.id,
"extended_pan_id": "1111111122222222",
"extended_pan_id": "1111111122222244",
"network_name": "~🐣🐥🐤~",
"pan_id": "1234",
"preferred": False,