From ffbd2d79c8e03f4a7fee2c65a28181a0158127ea Mon Sep 17 00:00:00 2001 From: Erik Montnemery Date: Wed, 25 Aug 2021 13:00:35 +0200 Subject: [PATCH] Generate statistics for all sensors with a supported state_class (#54882) * Generate statistics for all sensors * Fix bugs, add tests * Address review comments * Cleanup warnings * Simplify tests * Simplify selection of statistics * Fix tests --- .../components/recorder/statistics.py | 13 ++ homeassistant/components/sensor/recorder.py | 127 +++++++----- tests/components/sensor/test_recorder.py | 194 ++++++++++++++++-- 3 files changed, 272 insertions(+), 62 deletions(-) diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 34112fcc059..06f7851b1a6 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -227,6 +227,19 @@ def _get_metadata( return metadata +def get_metadata( + hass: HomeAssistant, + statistic_id: str, +) -> dict[str, str] | None: + """Return metadata for a statistic_id.""" + statistic_ids = [statistic_id] + with session_scope(hass=hass) as session: + metadata_ids = _get_metadata_ids(hass, session, [statistic_id]) + if not metadata_ids: + return None + return _get_metadata(hass, session, statistic_ids, None).get(metadata_ids[0]) + + def _configured_unit(unit: str, units: UnitSystem) -> str: """Return the pressure and temperature units configured by the user.""" if unit == PRESSURE_PA: diff --git a/homeassistant/components/sensor/recorder.py b/homeassistant/components/sensor/recorder.py index 6dc91b52c9a..77f91752327 100644 --- a/homeassistant/components/sensor/recorder.py +++ b/homeassistant/components/sensor/recorder.py @@ -9,10 +9,8 @@ from typing import Callable from homeassistant.components.recorder import history, statistics from homeassistant.components.sensor import ( ATTR_STATE_CLASS, - DEVICE_CLASS_BATTERY, DEVICE_CLASS_ENERGY, DEVICE_CLASS_GAS, - DEVICE_CLASS_HUMIDITY, DEVICE_CLASS_MONETARY, DEVICE_CLASS_PRESSURE, DEVICE_CLASS_TEMPERATURE, @@ -26,7 +24,6 @@ from homeassistant.const import ( DEVICE_CLASS_POWER, ENERGY_KILO_WATT_HOUR, ENERGY_WATT_HOUR, - PERCENTAGE, POWER_KILO_WATT, POWER_WATT, PRESSURE_BAR, @@ -51,24 +48,18 @@ from . import ATTR_LAST_RESET, DOMAIN _LOGGER = logging.getLogger(__name__) -DEVICE_CLASS_OR_UNIT_STATISTICS = { +DEVICE_CLASS_STATISTICS: dict[str, dict[str, set[str]]] = { STATE_CLASS_MEASUREMENT: { - DEVICE_CLASS_BATTERY: {"mean", "min", "max"}, - DEVICE_CLASS_HUMIDITY: {"mean", "min", "max"}, - DEVICE_CLASS_POWER: {"mean", "min", "max"}, - DEVICE_CLASS_PRESSURE: {"mean", "min", "max"}, - DEVICE_CLASS_TEMPERATURE: {"mean", "min", "max"}, - PERCENTAGE: {"mean", "min", "max"}, # Deprecated, support will be removed in Home Assistant 2021.11 DEVICE_CLASS_ENERGY: {"sum"}, DEVICE_CLASS_GAS: {"sum"}, DEVICE_CLASS_MONETARY: {"sum"}, }, - STATE_CLASS_TOTAL_INCREASING: { - DEVICE_CLASS_ENERGY: {"sum"}, - DEVICE_CLASS_GAS: {"sum"}, - DEVICE_CLASS_MONETARY: {"sum"}, - }, + STATE_CLASS_TOTAL_INCREASING: {}, +} +DEFAULT_STATISTICS = { + STATE_CLASS_MEASUREMENT: {"mean", "min", "max"}, + STATE_CLASS_TOTAL_INCREASING: {"sum"}, } # Normalized units which will be stored in the statistics table @@ -116,31 +107,20 @@ UNIT_CONVERSIONS: dict[str, dict[str, Callable]] = { } # Keep track of entities for which a warning about unsupported unit has been logged -WARN_UNSUPPORTED_UNIT = set() +WARN_UNSUPPORTED_UNIT = "sensor_warn_unsupported_unit" +WARN_UNSTABLE_UNIT = "sensor_warn_unstable_unit" -def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str]]: - """Get (entity_id, state_class, key) of all sensors for which to compile statistics. - - Key is either a device class or a unit and is used to index the - DEVICE_CLASS_OR_UNIT_STATISTICS map. - """ +def _get_entities(hass: HomeAssistant) -> list[tuple[str, str, str | None]]: + """Get (entity_id, state_class, device_class) of all sensors for which to compile statistics.""" all_sensors = hass.states.all(DOMAIN) entity_ids = [] for state in all_sensors: if (state_class := state.attributes.get(ATTR_STATE_CLASS)) not in STATE_CLASSES: continue - - if ( - key := state.attributes.get(ATTR_DEVICE_CLASS) - ) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]: - entity_ids.append((state.entity_id, state_class, key)) - - if ( - key := state.attributes.get(ATTR_UNIT_OF_MEASUREMENT) - ) in DEVICE_CLASS_OR_UNIT_STATISTICS[state_class]: - entity_ids.append((state.entity_id, state_class, key)) + device_class = state.attributes.get(ATTR_DEVICE_CLASS) + entity_ids.append((state.entity_id, state_class, device_class)) return entity_ids @@ -190,18 +170,39 @@ def _time_weighted_average( return accumulated / (end - start).total_seconds() +def _get_units(fstates: list[tuple[float, State]]) -> set[str | None]: + """Return True if all states have the same unit.""" + return {item[1].attributes.get(ATTR_UNIT_OF_MEASUREMENT) for item in fstates} + + def _normalize_states( - entity_history: list[State], key: str, entity_id: str + hass: HomeAssistant, + entity_history: list[State], + device_class: str | None, + entity_id: str, ) -> tuple[str | None, list[tuple[float, State]]]: """Normalize units.""" unit = None - if key not in UNIT_CONVERSIONS: + if device_class not in UNIT_CONVERSIONS: # We're not normalizing this device class, return the state as they are fstates = [ (float(el.state), el) for el in entity_history if _is_number(el.state) ] if fstates: + all_units = _get_units(fstates) + if len(all_units) > 1: + if WARN_UNSTABLE_UNIT not in hass.data: + hass.data[WARN_UNSTABLE_UNIT] = set() + if entity_id not in hass.data[WARN_UNSTABLE_UNIT]: + hass.data[WARN_UNSTABLE_UNIT].add(entity_id) + _LOGGER.warning( + "The unit of %s is changing, got %s, generation of long term " + "statistics will be suppressed unless the unit is stable", + entity_id, + all_units, + ) + return None, [] unit = fstates[0][1].attributes.get(ATTR_UNIT_OF_MEASUREMENT) return unit, fstates @@ -215,15 +216,17 @@ def _normalize_states( fstate = float(state.state) unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT) # Exclude unsupported units from statistics - if unit not in UNIT_CONVERSIONS[key]: - if entity_id not in WARN_UNSUPPORTED_UNIT: - WARN_UNSUPPORTED_UNIT.add(entity_id) + if unit not in UNIT_CONVERSIONS[device_class]: + if WARN_UNSUPPORTED_UNIT not in hass.data: + hass.data[WARN_UNSUPPORTED_UNIT] = set() + if entity_id not in hass.data[WARN_UNSUPPORTED_UNIT]: + hass.data[WARN_UNSUPPORTED_UNIT].add(entity_id) _LOGGER.warning("%s has unknown unit %s", entity_id, unit) continue - fstates.append((UNIT_CONVERSIONS[key][unit](fstate), state)) + fstates.append((UNIT_CONVERSIONS[device_class][unit](fstate), state)) - return DEVICE_CLASS_UNITS[key], fstates + return DEVICE_CLASS_UNITS[device_class], fstates def reset_detected(state: float, previous_state: float | None) -> bool: @@ -247,18 +250,39 @@ def compile_statistics( hass, start - datetime.timedelta.resolution, end, [i[0] for i in entities] ) - for entity_id, state_class, key in entities: - wanted_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key] + for entity_id, state_class, device_class in entities: + if device_class in DEVICE_CLASS_STATISTICS[state_class]: + wanted_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class] + else: + wanted_statistics = DEFAULT_STATISTICS[state_class] if entity_id not in history_list: continue entity_history = history_list[entity_id] - unit, fstates = _normalize_states(entity_history, key, entity_id) + unit, fstates = _normalize_states(hass, entity_history, device_class, entity_id) if not fstates: continue + # Check metadata + if old_metadata := statistics.get_metadata(hass, entity_id): + if old_metadata["unit_of_measurement"] != unit: + if WARN_UNSTABLE_UNIT not in hass.data: + hass.data[WARN_UNSTABLE_UNIT] = set() + if entity_id not in hass.data[WARN_UNSTABLE_UNIT]: + hass.data[WARN_UNSTABLE_UNIT].add(entity_id) + _LOGGER.warning( + "The unit of %s (%s) does not match the unit of already " + "compiled statistics (%s). Generation of long term statistics " + "will be suppressed unless the unit changes back to %s", + entity_id, + unit, + old_metadata["unit_of_measurement"], + unit, + ) + continue + result[entity_id] = {} # Set meta data @@ -370,8 +394,11 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) - statistic_ids = {} - for entity_id, state_class, key in entities: - provided_statistics = DEVICE_CLASS_OR_UNIT_STATISTICS[state_class][key] + for entity_id, state_class, device_class in entities: + if device_class in DEVICE_CLASS_STATISTICS[state_class]: + provided_statistics = DEVICE_CLASS_STATISTICS[state_class][device_class] + else: + provided_statistics = DEFAULT_STATISTICS[state_class] if statistic_type is not None and statistic_type not in provided_statistics: continue @@ -386,16 +413,20 @@ def list_statistic_ids(hass: HomeAssistant, statistic_type: str | None = None) - ): continue - native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT) + metadata = statistics.get_metadata(hass, entity_id) + if metadata: + native_unit: str | None = metadata["unit_of_measurement"] + else: + native_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT) - if key not in UNIT_CONVERSIONS: + if device_class not in UNIT_CONVERSIONS: statistic_ids[entity_id] = native_unit continue - if native_unit not in UNIT_CONVERSIONS[key]: + if native_unit not in UNIT_CONVERSIONS[device_class]: continue - statistics_unit = DEVICE_CLASS_UNITS[key] + statistics_unit = DEVICE_CLASS_UNITS[device_class] statistic_ids[entity_id] = statistics_unit return statistic_ids diff --git a/tests/components/sensor/test_recorder.py b/tests/components/sensor/test_recorder.py index abeda03ed7b..5b82fcf0506 100644 --- a/tests/components/sensor/test_recorder.py +++ b/tests/components/sensor/test_recorder.py @@ -107,24 +107,34 @@ def test_compile_hourly_statistics( @pytest.mark.parametrize("attributes", [TEMPERATURE_SENSOR_ATTRIBUTES]) def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes): """Test compiling hourly statistics for unsupported sensor.""" - attributes = dict(attributes) zero = dt_util.utcnow() hass = hass_recorder() recorder = hass.data[DATA_INSTANCE] setup_component(hass, "sensor", {}) four, states = record_states(hass, zero, "sensor.test1", attributes) - if "unit_of_measurement" in attributes: - attributes["unit_of_measurement"] = "invalid" - _, _states = record_states(hass, zero, "sensor.test2", attributes) - states = {**states, **_states} - attributes.pop("unit_of_measurement") - _, _states = record_states(hass, zero, "sensor.test3", attributes) - states = {**states, **_states} - attributes["state_class"] = "invalid" - _, _states = record_states(hass, zero, "sensor.test4", attributes) + + attributes_tmp = dict(attributes) + attributes_tmp["unit_of_measurement"] = "invalid" + _, _states = record_states(hass, zero, "sensor.test2", attributes_tmp) states = {**states, **_states} - attributes.pop("state_class") - _, _states = record_states(hass, zero, "sensor.test5", attributes) + attributes_tmp.pop("unit_of_measurement") + _, _states = record_states(hass, zero, "sensor.test3", attributes_tmp) + states = {**states, **_states} + + attributes_tmp = dict(attributes) + attributes_tmp["state_class"] = "invalid" + _, _states = record_states(hass, zero, "sensor.test4", attributes_tmp) + states = {**states, **_states} + attributes_tmp.pop("state_class") + _, _states = record_states(hass, zero, "sensor.test5", attributes_tmp) + states = {**states, **_states} + + attributes_tmp = dict(attributes) + attributes_tmp["device_class"] = "invalid" + _, _states = record_states(hass, zero, "sensor.test6", attributes_tmp) + states = {**states, **_states} + attributes_tmp.pop("device_class") + _, _states = record_states(hass, zero, "sensor.test7", attributes_tmp) states = {**states, **_states} hist = history.get_significant_states(hass, zero, four) @@ -134,7 +144,9 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes wait_recording_done(hass) statistic_ids = list_statistic_ids(hass) assert statistic_ids == [ - {"statistic_id": "sensor.test1", "unit_of_measurement": "°C"} + {"statistic_id": "sensor.test1", "unit_of_measurement": "°C"}, + {"statistic_id": "sensor.test6", "unit_of_measurement": "°C"}, + {"statistic_id": "sensor.test7", "unit_of_measurement": "°C"}, ] stats = statistics_during_period(hass, zero) assert stats == { @@ -149,7 +161,31 @@ def test_compile_hourly_statistics_unsupported(hass_recorder, caplog, attributes "state": None, "sum": None, } - ] + ], + "sensor.test6": [ + { + "statistic_id": "sensor.test6", + "start": process_timestamp_to_utc_isoformat(zero), + "mean": approx(16.440677966101696), + "min": approx(10.0), + "max": approx(30.0), + "last_reset": None, + "state": None, + "sum": None, + } + ], + "sensor.test7": [ + { + "statistic_id": "sensor.test7", + "start": process_timestamp_to_utc_isoformat(zero), + "mean": approx(16.440677966101696), + "min": approx(10.0), + "max": approx(30.0), + "last_reset": None, + "state": None, + "sum": None, + } + ], } assert "Error while processing event StatisticsTask" not in caplog.text @@ -859,6 +895,136 @@ def test_list_statistic_ids_unsupported(hass_recorder, caplog, _attributes): hass.states.set("sensor.test6", 0, attributes=attributes) +@pytest.mark.parametrize( + "device_class,unit,native_unit,mean,min,max", + [ + (None, None, None, 16.440677, 10, 30), + (None, "%", "%", 16.440677, 10, 30), + ("battery", "%", "%", 16.440677, 10, 30), + ("battery", None, None, 16.440677, 10, 30), + ], +) +def test_compile_hourly_statistics_changing_units_1( + hass_recorder, caplog, device_class, unit, native_unit, mean, min, max +): + """Test compiling hourly statistics where units change from one hour to the next.""" + zero = dt_util.utcnow() + hass = hass_recorder() + recorder = hass.data[DATA_INSTANCE] + setup_component(hass, "sensor", {}) + attributes = { + "device_class": device_class, + "state_class": "measurement", + "unit_of_measurement": unit, + } + four, states = record_states(hass, zero, "sensor.test1", attributes) + attributes["unit_of_measurement"] = "cats" + four, _states = record_states( + hass, zero + timedelta(hours=1), "sensor.test1", attributes + ) + states["sensor.test1"] += _states["sensor.test1"] + four, _states = record_states( + hass, zero + timedelta(hours=2), "sensor.test1", attributes + ) + states["sensor.test1"] += _states["sensor.test1"] + hist = history.get_significant_states(hass, zero, four) + assert dict(states) == dict(hist) + + recorder.do_adhoc_statistics(period="hourly", start=zero) + wait_recording_done(hass) + assert "does not match the unit of already compiled" not in caplog.text + statistic_ids = list_statistic_ids(hass) + assert statistic_ids == [ + {"statistic_id": "sensor.test1", "unit_of_measurement": native_unit} + ] + stats = statistics_during_period(hass, zero) + assert stats == { + "sensor.test1": [ + { + "statistic_id": "sensor.test1", + "start": process_timestamp_to_utc_isoformat(zero), + "mean": approx(mean), + "min": approx(min), + "max": approx(max), + "last_reset": None, + "state": None, + "sum": None, + } + ] + } + + recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(hours=2)) + wait_recording_done(hass) + assert ( + "The unit of sensor.test1 (cats) does not match the unit of already compiled " + f"statistics ({native_unit})" in caplog.text + ) + statistic_ids = list_statistic_ids(hass) + assert statistic_ids == [ + {"statistic_id": "sensor.test1", "unit_of_measurement": native_unit} + ] + stats = statistics_during_period(hass, zero) + assert stats == { + "sensor.test1": [ + { + "statistic_id": "sensor.test1", + "start": process_timestamp_to_utc_isoformat(zero), + "mean": approx(mean), + "min": approx(min), + "max": approx(max), + "last_reset": None, + "state": None, + "sum": None, + } + ] + } + assert "Error while processing event StatisticsTask" not in caplog.text + + +@pytest.mark.parametrize( + "device_class,unit,native_unit,mean,min,max", + [ + (None, None, None, 16.440677, 10, 30), + (None, "%", "%", 16.440677, 10, 30), + ("battery", "%", "%", 16.440677, 10, 30), + ("battery", None, None, 16.440677, 10, 30), + ], +) +def test_compile_hourly_statistics_changing_units_2( + hass_recorder, caplog, device_class, unit, native_unit, mean, min, max +): + """Test compiling hourly statistics where units change during an hour.""" + zero = dt_util.utcnow() + hass = hass_recorder() + recorder = hass.data[DATA_INSTANCE] + setup_component(hass, "sensor", {}) + attributes = { + "device_class": device_class, + "state_class": "measurement", + "unit_of_measurement": unit, + } + four, states = record_states(hass, zero, "sensor.test1", attributes) + attributes["unit_of_measurement"] = "cats" + four, _states = record_states( + hass, zero + timedelta(hours=1), "sensor.test1", attributes + ) + states["sensor.test1"] += _states["sensor.test1"] + hist = history.get_significant_states(hass, zero, four) + assert dict(states) == dict(hist) + + recorder.do_adhoc_statistics(period="hourly", start=zero + timedelta(minutes=30)) + wait_recording_done(hass) + assert "The unit of sensor.test1 is changing" in caplog.text + statistic_ids = list_statistic_ids(hass) + assert statistic_ids == [ + {"statistic_id": "sensor.test1", "unit_of_measurement": "cats"} + ] + stats = statistics_during_period(hass, zero) + assert stats == {} + + assert "Error while processing event StatisticsTask" not in caplog.text + + def record_states(hass, zero, entity_id, attributes): """Record some test states.