mirror of
https://github.com/home-assistant/supervisor.git
synced 2025-08-07 08:17:41 +00:00
Handle disks with non-existing SMART attributes (#6077)
Not all disks have all SMART attributes available, e.g. Sentry showed devices with missing "wctemp". In practice, any SMART attribute could be missing. Make sure we handle this gracefully.
This commit is contained in:
parent
5d851ad747
commit
cad14bf46e
@ -34,43 +34,49 @@ class SmartStatus:
|
||||
"""Smart status information for NVMe devices.
|
||||
|
||||
https://storaged.org/doc/udisks2-api/latest/gdbus-org.freedesktop.UDisks2.NVMe.Controller.html#gdbus-method-org-freedesktop-UDisks2-NVMe-Controller.SmartGetAttributes
|
||||
|
||||
All attributes are optional as their presence depends on the specific NVMe drive,
|
||||
firmware version, and vendor implementation.
|
||||
"""
|
||||
|
||||
available_spare: int
|
||||
spare_threshold: int
|
||||
percent_used: int
|
||||
total_data_read: int
|
||||
total_data_written: int
|
||||
controller_busy_minutes: int
|
||||
power_cycles: int
|
||||
unsafe_shutdowns: int
|
||||
media_errors: int
|
||||
number_error_log_entries: int
|
||||
temperature_sensors: list[int]
|
||||
warning_composite_temperature: int
|
||||
critical_composite_temperature: int
|
||||
warning_temperature_minutes: int
|
||||
critical_temperature_minutes: int
|
||||
available_spare: int | None
|
||||
spare_threshold: int | None
|
||||
percent_used: int | None
|
||||
total_data_read: int | None
|
||||
total_data_written: int | None
|
||||
controller_busy_minutes: int | None
|
||||
power_cycles: int | None
|
||||
unsafe_shutdowns: int | None
|
||||
media_errors: int | None
|
||||
number_error_log_entries: int | None
|
||||
temperature_sensors: list[int] | None
|
||||
warning_composite_temperature: int | None
|
||||
critical_composite_temperature: int | None
|
||||
warning_temperature_minutes: int | None
|
||||
critical_temperature_minutes: int | None
|
||||
|
||||
@classmethod
|
||||
def from_smart_get_attributes_resp(cls, resp: dict[str, Any]):
|
||||
"""Convert SmartGetAttributes response dictionary to instance."""
|
||||
"""Convert SmartGetAttributes response dictionary to instance.
|
||||
|
||||
Safely handles missing attributes as they are vendor/drive dependent.
|
||||
"""
|
||||
return cls(
|
||||
available_spare=resp["avail_spare"],
|
||||
spare_threshold=resp["spare_thresh"],
|
||||
percent_used=resp["percent_used"],
|
||||
total_data_read=resp["total_data_read"],
|
||||
total_data_written=resp["total_data_written"],
|
||||
controller_busy_minutes=resp["ctrl_busy_time"],
|
||||
power_cycles=resp["power_cycles"],
|
||||
unsafe_shutdowns=resp["unsafe_shutdowns"],
|
||||
media_errors=resp["media_errors"],
|
||||
number_error_log_entries=resp["num_err_log_entries"],
|
||||
temperature_sensors=resp["temp_sensors"],
|
||||
warning_composite_temperature=resp["wctemp"],
|
||||
critical_composite_temperature=resp["cctemp"],
|
||||
warning_temperature_minutes=resp["warning_temp_time"],
|
||||
critical_temperature_minutes=resp["critical_temp_time"],
|
||||
available_spare=resp.get("avail_spare"),
|
||||
spare_threshold=resp.get("spare_thresh"),
|
||||
percent_used=resp.get("percent_used"),
|
||||
total_data_read=resp.get("total_data_read"),
|
||||
total_data_written=resp.get("total_data_written"),
|
||||
controller_busy_minutes=resp.get("ctrl_busy_time"),
|
||||
power_cycles=resp.get("power_cycles"),
|
||||
unsafe_shutdowns=resp.get("unsafe_shutdowns"),
|
||||
media_errors=resp.get("media_errors"),
|
||||
number_error_log_entries=resp.get("num_err_log_entries"),
|
||||
temperature_sensors=resp.get("temp_sensors"),
|
||||
warning_composite_temperature=resp.get("wctemp"),
|
||||
critical_composite_temperature=resp.get("cctemp"),
|
||||
warning_temperature_minutes=resp.get("warning_temp_time"),
|
||||
critical_temperature_minutes=resp.get("critical_temp_time"),
|
||||
)
|
||||
|
||||
|
||||
|
@ -141,6 +141,13 @@ class HwDisk(CoreSysAttributes):
|
||||
)
|
||||
return None
|
||||
|
||||
# Check if percent_used is available (vendor/drive dependent)
|
||||
if smart_log.percent_used is None:
|
||||
_LOGGER.debug(
|
||||
"NVMe controller %s does not provide percent_used attribute", drive.id
|
||||
)
|
||||
return None
|
||||
|
||||
# UDisks2 documentation specifies that value can exceed 100
|
||||
if smart_log.percent_used >= 100:
|
||||
_LOGGER.warning(
|
||||
|
@ -70,3 +70,33 @@ async def test_nvme_controller_smart_get_attributes(dbus_session_bus: MessageBus
|
||||
assert smart_log.total_data_written == 27723431936000
|
||||
assert smart_log.controller_busy_minutes == 2682
|
||||
assert smart_log.temperature_sensors == [310, 305, 0, 0, 0, 0, 0, 0]
|
||||
|
||||
|
||||
async def test_nvme_controller_smart_get_attributes_missing(
|
||||
nvme_controller_service: NVMeControllerService, dbus_session_bus: MessageBus
|
||||
):
|
||||
"""Test NVMe Controller smart get attributes with missing vendor-specific attributes."""
|
||||
# Simulate a drive that doesn't provide some optional attributes
|
||||
nvme_controller_service.set_missing_attributes(
|
||||
["wctemp", "cctemp", "warning_temp_time", "critical_temp_time", "temp_sensors"]
|
||||
)
|
||||
|
||||
controller = UDisks2NVMeController(
|
||||
"/org/freedesktop/UDisks2/drives/Samsung_SSD_970_EVO_Plus_2TB_S40123456789ABC"
|
||||
)
|
||||
await controller.connect(dbus_session_bus)
|
||||
|
||||
smart_log = await controller.smart_get_attributes()
|
||||
# Core attributes should still be present
|
||||
assert smart_log.available_spare == 100
|
||||
assert smart_log.percent_used == 1
|
||||
assert smart_log.total_data_read == 22890461184000
|
||||
assert smart_log.total_data_written == 27723431936000
|
||||
assert smart_log.controller_busy_minutes == 2682
|
||||
|
||||
# Optional attributes should be None
|
||||
assert smart_log.warning_composite_temperature is None
|
||||
assert smart_log.critical_composite_temperature is None
|
||||
assert smart_log.warning_temperature_minutes is None
|
||||
assert smart_log.critical_temperature_minutes is None
|
||||
assert smart_log.temperature_sensors is None
|
||||
|
@ -46,6 +46,11 @@ class NVMeController(DBusServiceMock):
|
||||
"critical_temp_time": Variant("i", 0),
|
||||
}
|
||||
|
||||
def set_missing_attributes(self, missing_keys: list[str]):
|
||||
"""Remove specified attributes to simulate drives that don't provide them."""
|
||||
for key in missing_keys:
|
||||
self.smart_get_attributes_response.pop(key, None)
|
||||
|
||||
@dbus_property(access=PropertyAccess.READ)
|
||||
def State(self) -> "s":
|
||||
"""Get State."""
|
||||
|
@ -177,3 +177,16 @@ async def test_try_get_nvme_life_time(
|
||||
coresys.config.path_supervisor
|
||||
)
|
||||
assert lifetime == 50
|
||||
|
||||
|
||||
async def test_try_get_nvme_life_time_missing_percent_used(
|
||||
coresys: CoreSys, nvme_data_disk: NVMeControllerService
|
||||
):
|
||||
"""Test getting lifetime info from an NVMe when percent_used is missing."""
|
||||
# Simulate a drive that doesn't provide percent_used
|
||||
nvme_data_disk.set_missing_attributes(["percent_used"])
|
||||
|
||||
lifetime = await coresys.hardware.disk.get_disk_life_time(
|
||||
coresys.config.path_supervisor
|
||||
)
|
||||
assert lifetime is None
|
||||
|
Loading…
x
Reference in New Issue
Block a user