diff --git a/Dockerfile b/Dockerfile index 4bab71774..19e6c9fe7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,6 +26,7 @@ RUN \ musl \ openssl \ yaml \ + nvme-cli \ \ && curl -Lso /usr/bin/cosign "https://github.com/home-assistant/cosign/releases/download/${COSIGN_VERSION}/cosign_${BUILD_ARCH}" \ && chmod a+x /usr/bin/cosign \ diff --git a/supervisor/api/__init__.py b/supervisor/api/__init__.py index 085eaa9b9..2715324b9 100644 --- a/supervisor/api/__init__.py +++ b/supervisor/api/__init__.py @@ -198,6 +198,8 @@ class RestAPI(CoreSysAttributes): web.post("/host/reload", api_host.reload), web.post("/host/options", api_host.options), web.get("/host/services", api_host.services), + web.get("/host/nvme/{device}/status", api_host.nvme_device_status), + web.get("/host/nvme/status", api_host.nvme_device_status), ] ) diff --git a/supervisor/api/const.py b/supervisor/api/const.py index 6eb65df70..f6c9ea49a 100644 --- a/supervisor/api/const.py +++ b/supervisor/api/const.py @@ -16,6 +16,7 @@ ATTR_ADDITIONAL_LOCATIONS = "additional_locations" ATTR_AGENT_VERSION = "agent_version" ATTR_APPARMOR_VERSION = "apparmor_version" ATTR_ATTRIBUTES = "attributes" +ATTR_AVAILABLE_SPARE = "available_spare" ATTR_AVAILABLE_UPDATES = "available_updates" ATTR_BACKGROUND = "background" ATTR_BOOT_CONFIG = "boot_config" @@ -28,9 +29,14 @@ ATTR_BROADCAST_MDNS = "broadcast_mdns" ATTR_BY_ID = "by_id" ATTR_CHILDREN = "children" ATTR_CONNECTION_BUS = "connection_bus" +ATTR_CONTROLLER_BUSY_MINUTES = "controller_busy_minutes" +ATTR_CRITICAL_COMPOSITE_TEMP_MINUTES = "critical_composite_temp_minutes" +ATTR_CRITICAL_WARNING = "critical_warning" ATTR_DATA_DISK = "data_disk" -ATTR_DEVICE = "device" +ATTR_DATA_UNITS_READ = "data_units_read" +ATTR_DATA_UNITS_WRITTEN = "data_units_written" ATTR_DEV_PATH = "dev_path" +ATTR_DEVICE = "device" ATTR_DISKS = "disks" ATTR_DRIVES = "drives" ATTR_DT_SYNCHRONIZED = "dt_synchronized" @@ -40,6 +46,8 @@ ATTR_FALLBACK = "fallback" ATTR_FILESYSTEMS = "filesystems" ATTR_FORCE = "force" ATTR_GROUP_IDS = "group_ids" +ATTR_HOST_READ_COMMANDS = "host_read_commands" +ATTR_HOST_WRITE_COMMANDS = "host_write_commands" ATTR_IDENTIFIERS = "identifiers" ATTR_IS_ACTIVE = "is_active" ATTR_IS_OWNER = "is_owner" @@ -50,10 +58,16 @@ ATTR_LOCAL_ONLY = "local_only" ATTR_LOCATION_ATTRIBUTES = "location_attributes" ATTR_LOCATIONS = "locations" ATTR_MDNS = "mdns" +ATTR_MEDIA_ERRORS = "media_errors" ATTR_MODEL = "model" -ATTR_MOUNTS = "mounts" ATTR_MOUNT_POINTS = "mount_points" +ATTR_MOUNTS = "mounts" +ATTR_NUMBER_ERROR_LOG_ENTRIES = "number_error_log_entries" +ATTR_NVME_DEVICES = "nvme_devices" ATTR_PANEL_PATH = "panel_path" +ATTR_PERCENT_USED = "percent_used" +ATTR_POWER_CYCLES = "power_cycles" +ATTR_POWER_ON_HOURS = "power_on_hours" ATTR_REMOVABLE = "removable" ATTR_REMOVE_CONFIG = "remove_config" ATTR_REVISION = "revision" @@ -65,14 +79,17 @@ ATTR_STATUS = "status" ATTR_SUBSYSTEM = "subsystem" ATTR_SYSFS = "sysfs" ATTR_SYSTEM_HEALTH_LED = "system_health_led" +ATTR_TEMPERATURE_KELVIN = "temperature_kelvin" ATTR_TIME_DETECTED = "time_detected" +ATTR_UNSAFE_SHUTDOWNS = "unsafe_shutdowns" ATTR_UPDATE_TYPE = "update_type" ATTR_USAGE = "usage" ATTR_USE_NTP = "use_ntp" -ATTR_USERS = "users" ATTR_USER_PATH = "user_path" +ATTR_USERS = "users" ATTR_VENDOR = "vendor" ATTR_VIRTUALIZATION = "virtualization" +ATTR_WARNING_TEMP_MINUTES = "warning_temp_minutes" class BootSlot(StrEnum): diff --git a/supervisor/api/host.py b/supervisor/api/host.py index 129b1da7a..a7dc90cd5 100644 --- a/supervisor/api/host.py +++ b/supervisor/api/host.py @@ -3,6 +3,7 @@ import asyncio from contextlib import suppress import logging +from pathlib import Path from typing import Any from aiohttp import ClientConnectionResetError, ClientPayloadError, web @@ -21,15 +22,17 @@ from ..const import ( ATTR_DISK_USED, ATTR_FEATURES, ATTR_HOSTNAME, + ATTR_ID, ATTR_KERNEL, ATTR_NAME, ATTR_OPERATING_SYSTEM, + ATTR_PATH, ATTR_SERVICES, ATTR_STATE, ATTR_TIMEZONE, ) from ..coresys import CoreSysAttributes -from ..exceptions import APIDBMigrationInProgress, APIError, HostLogError +from ..exceptions import APIDBMigrationInProgress, APIError, APINotFound, HostLogError from ..host.const import ( PARAM_BOOT_ID, PARAM_FOLLOW, @@ -38,22 +41,40 @@ from ..host.const import ( LogFormatter, ) from ..host.logs import SYSTEMD_JOURNAL_GATEWAYD_LINES_MAX +from ..host.nvme.manager import NvmeDevice from ..utils.systemd_journal import journal_logs_reader from .const import ( ATTR_AGENT_VERSION, ATTR_APPARMOR_VERSION, + ATTR_AVAILABLE_SPARE, ATTR_BOOT_TIMESTAMP, ATTR_BOOTS, ATTR_BROADCAST_LLMNR, ATTR_BROADCAST_MDNS, + ATTR_CONTROLLER_BUSY_MINUTES, + ATTR_CRITICAL_COMPOSITE_TEMP_MINUTES, + ATTR_CRITICAL_WARNING, + ATTR_DATA_UNITS_READ, + ATTR_DATA_UNITS_WRITTEN, ATTR_DT_SYNCHRONIZED, ATTR_DT_UTC, ATTR_FORCE, + ATTR_HOST_READ_COMMANDS, + ATTR_HOST_WRITE_COMMANDS, ATTR_IDENTIFIERS, ATTR_LLMNR_HOSTNAME, + ATTR_MEDIA_ERRORS, + ATTR_NUMBER_ERROR_LOG_ENTRIES, + ATTR_NVME_DEVICES, + ATTR_PERCENT_USED, + ATTR_POWER_CYCLES, + ATTR_POWER_ON_HOURS, ATTR_STARTUP_TIME, + ATTR_TEMPERATURE_KELVIN, + ATTR_UNSAFE_SHUTDOWNS, ATTR_USE_NTP, ATTR_VIRTUALIZATION, + ATTR_WARNING_TEMP_MINUTES, CONTENT_TYPE_TEXT, CONTENT_TYPE_X_LOG, ) @@ -117,6 +138,13 @@ class APIHost(CoreSysAttributes): ATTR_BOOT_TIMESTAMP: self.sys_host.info.boot_timestamp, ATTR_BROADCAST_LLMNR: self.sys_host.info.broadcast_llmnr, ATTR_BROADCAST_MDNS: self.sys_host.info.broadcast_mdns, + ATTR_NVME_DEVICES: [ + { + ATTR_ID: dev.id, + ATTR_PATH: dev.path.as_posix(), + } + for dev in self.sys_host.nvme.devices.values() + ], } @api_process @@ -289,3 +317,57 @@ class APIHost(CoreSysAttributes): ) -> web.StreamResponse: """Return systemd-journald logs. Wrapped as standard API handler.""" return await self.advanced_logs_handler(request, identifier, follow) + + def get_nvme_device_for_request(self, request: web.Request) -> NvmeDevice: + """Return NVME device, raise an exception if it doesn't exist.""" + if "device" in request.match_info: + device: str = request.match_info["device"] + if device in self.sys_host.nvme.devices: + return self.sys_host.nvme.devices[device] + if device.startswith("/dev") and ( + nvme_device := self.sys_host.nvme.get_by_path(Path(device)) + ): + return nvme_device + raise APINotFound(f"NVME device {device} does not exist") + + if self.sys_os.available: + if self.sys_os.datadisk.disk_used and ( + nvme_device := self.sys_host.nvme.get_by_path( + self.sys_os.datadisk.disk_used.device_path + ) + ): + return nvme_device + raise APIError( + "Data Disk is not an NVME device, an ID for the NVME device is required" + ) + + raise APIError( + "Not using Home Assistant Operating System, an ID for the NVME device is required" + ) + + @api_process + async def nvme_device_status(self, request: web.Request): + """Return status on NVME device from smart log. + + User can provide a path to identify device. Identifier can be omitted if using HAOS and data disk is an NVME device. + """ + nvme_device = self.get_nvme_device_for_request(request) + smart_log = await nvme_device.get_smart_log() + return { + ATTR_AVAILABLE_SPARE: smart_log.avail_spare, + ATTR_CRITICAL_WARNING: smart_log.critical_warning, + ATTR_DATA_UNITS_READ: smart_log.data_units_read, + ATTR_DATA_UNITS_WRITTEN: smart_log.data_units_written, + ATTR_PERCENT_USED: smart_log.percent_used, + ATTR_TEMPERATURE_KELVIN: smart_log.temperature, + ATTR_HOST_READ_COMMANDS: smart_log.host_read_commands, + ATTR_HOST_WRITE_COMMANDS: smart_log.host_write_commands, + ATTR_CONTROLLER_BUSY_MINUTES: smart_log.controller_busy_time, + ATTR_POWER_CYCLES: smart_log.power_cycles, + ATTR_POWER_ON_HOURS: smart_log.power_on_hours, + ATTR_UNSAFE_SHUTDOWNS: smart_log.unsafe_shutdowns, + ATTR_MEDIA_ERRORS: smart_log.media_errors, + ATTR_NUMBER_ERROR_LOG_ENTRIES: smart_log.num_err_log_entries, + ATTR_WARNING_TEMP_MINUTES: smart_log.warning_temp_time, + ATTR_CRITICAL_COMPOSITE_TEMP_MINUTES: smart_log.critical_comp_time, + } diff --git a/supervisor/exceptions.py b/supervisor/exceptions.py index 7abfa9365..d4cb08888 100644 --- a/supervisor/exceptions.py +++ b/supervisor/exceptions.py @@ -307,6 +307,10 @@ class HostLogError(HostError): """Internal error with host log.""" +class HostNvmeError(HostError): + """Error accessing nvme device info.""" + + # API diff --git a/supervisor/host/manager.py b/supervisor/host/manager.py index a8a94f5b9..6051eafae 100644 --- a/supervisor/host/manager.py +++ b/supervisor/host/manager.py @@ -9,7 +9,7 @@ from awesomeversion import AwesomeVersion from ..const import BusEvent from ..coresys import CoreSys, CoreSysAttributes -from ..exceptions import HassioError, HostLogError, PulseAudioError +from ..exceptions import HassioError, HostLogError, HostNvmeError, PulseAudioError from ..hardware.const import PolicyGroup from ..hardware.data import Device from .apparmor import AppArmorControl @@ -18,6 +18,7 @@ from .control import SystemControl from .info import InfoCenter from .logs import LogsControl from .network import NetworkManager +from .nvme.manager import NvmeManager from .services import ServiceManager from .sound import SoundControl @@ -38,6 +39,7 @@ class HostManager(CoreSysAttributes): self._network: NetworkManager = NetworkManager(coresys) self._sound: SoundControl = SoundControl(coresys) self._logs: LogsControl = LogsControl(coresys) + self._nvme: NvmeManager = NvmeManager() async def post_init(self) -> Self: """Post init actions that must occur in event loop.""" @@ -79,6 +81,11 @@ class HostManager(CoreSysAttributes): """Return host logs handler.""" return self._logs + @property + def nvme(self) -> NvmeManager: + """Return NVME device manager.""" + return self._nvme + @property def features(self) -> list[HostFeature]: """Return a list of host features.""" @@ -151,6 +158,9 @@ class HostManager(CoreSysAttributes): with suppress(PulseAudioError): await self.sound.update() + with suppress(HostNvmeError): + await self.nvme.update() + _LOGGER.info("Host information reload completed") self.supported_features.cache_clear() # pylint: disable=no-member @@ -167,6 +177,7 @@ class HostManager(CoreSysAttributes): await self.logs.load() await self.network.load() + await self.nvme.load() # Register for events self.sys_bus.register_event(BusEvent.HARDWARE_NEW_DEVICE, self._hardware_events) diff --git a/supervisor/host/nvme/__init__.py b/supervisor/host/nvme/__init__.py new file mode 100644 index 000000000..2f7513ca2 --- /dev/null +++ b/supervisor/host/nvme/__init__.py @@ -0,0 +1 @@ +"""NVME device management.""" diff --git a/supervisor/host/nvme/data.py b/supervisor/host/nvme/data.py new file mode 100644 index 000000000..64b70ebbd --- /dev/null +++ b/supervisor/host/nvme/data.py @@ -0,0 +1,226 @@ +"""NVME cli data structures.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + + +@dataclass +class CriticalWarning: + """NVME Critical Warning model.""" + + value: int + available_spare: int + temp_threshold: int + reliability_degraded: int + ro: int + vmbu_failed: int + pmr_ro: int + + @classmethod + def from_dict(cls: type[CriticalWarning], data: dict[str, Any]) -> CriticalWarning: + """Create CriticalWarning from dictionary.""" + return cls( + value=data["value"], + available_spare=data["available_spare"], + temp_threshold=data["temp_threshold"], + reliability_degraded=data["reliability_degraded"], + ro=data["ro"], + vmbu_failed=data["vmbu_failed"], + pmr_ro=data["pmr_ro"], + ) + + +@dataclass +class NvmeSmartLogData: + """NVME Smart log model. + + Documentation on fields at https://manpages.debian.org/testing/libnvme-dev/nvme_smart_log.2.en.html. + """ + + critical_warning: int + temperature: int + avail_spare: int + spare_thresh: int + percent_used: int + endurance_grp_critical_warning_summary: int + data_units_read: int + data_units_written: int + host_read_commands: int + host_write_commands: int + controller_busy_time: int + power_cycles: int + power_on_hours: int + unsafe_shutdowns: int + media_errors: int + num_err_log_entries: int + warning_temp_time: int + critical_comp_time: int + # According to documentation there can be up to 8 of these, depends on the device + # Documentation says devices should report 0 if not implemented but test device + # only had 1 and 2 so making them optional to be safe + temperature_sensor_1: int | None + temperature_sensor_2: int | None + temperature_sensor_3: int | None + temperature_sensor_4: int | None + temperature_sensor_5: int | None + temperature_sensor_6: int | None + temperature_sensor_7: int | None + temperature_sensor_8: int | None + thm_temp1_trans_count: int + thm_temp2_trans_count: int + thm_temp1_total_time: int + thm_temp2_total_time: int + + @classmethod + def from_dict( + cls: type[NvmeSmartLogData], data: dict[str, Any] + ) -> NvmeSmartLogData: + """Create NVME Smart Log Data from dictionary.""" + return cls( + # Critical warning seems to sometimes be a number and sometimes be a breakdown of warning types + # For now lets simplify and just keep the warning count + critical_warning=data["critical_warning"] + if isinstance(data["critical_warning"], int) + else CriticalWarning.from_dict(data["critical_warning"]).value, + temperature=data["temperature"], + avail_spare=data["avail_spare"], + spare_thresh=data["spare_thresh"], + percent_used=data["percent_used"], + endurance_grp_critical_warning_summary=data[ + "endurance_grp_critical_warning_summary" + ], + data_units_read=data["data_units_read"], + data_units_written=data["data_units_written"], + host_read_commands=data["host_read_commands"], + host_write_commands=data["host_write_commands"], + controller_busy_time=data["controller_busy_time"], + power_cycles=data["power_cycles"], + power_on_hours=data["power_on_hours"], + unsafe_shutdowns=data["unsafe_shutdowns"], + media_errors=data["media_errors"], + num_err_log_entries=data["num_err_log_entries"], + warning_temp_time=data["warning_temp_time"], + critical_comp_time=data["critical_comp_time"], + temperature_sensor_1=data.get("temperature_sensor_1"), + temperature_sensor_2=data.get("temperature_sensor_2"), + temperature_sensor_3=data.get("temperature_sensor_3"), + temperature_sensor_4=data.get("temperature_sensor_4"), + temperature_sensor_5=data.get("temperature_sensor_5"), + temperature_sensor_6=data.get("temperature_sensor_6"), + temperature_sensor_7=data.get("temperature_sensor_7"), + temperature_sensor_8=data.get("temperature_sensor_8"), + thm_temp1_trans_count=data["thm_temp1_trans_count"], + thm_temp2_trans_count=data["thm_temp2_trans_count"], + thm_temp1_total_time=data["thm_temp1_total_time"], + thm_temp2_total_time=data["thm_temp2_total_time"], + ) + + +@dataclass +class Namespace: + """NVME namespace model.""" + + name_space: str + generic: str + nsid: int + used_bytes: int + maximum_lba: int + physical_size: int + sector_size: int + + @classmethod + def from_dict(cls: type[Namespace], data: dict[str, Any]) -> Namespace: + """Create Namespace from dictionary.""" + return cls( + name_space=data["NameSpace"], + generic=data["Generic"], + nsid=data["NSID"], + used_bytes=data["UsedBytes"], + maximum_lba=data["MaximumLBA"], + physical_size=data["PhysicalSize"], + sector_size=data["SectorSize"], + ) + + +@dataclass +class Controller: + """NVME Controller model.""" + + controller: str + cntlid: str + serial_number: str + model_number: str + firmware: str + transport: str + address: str + slot: str + namespaces: list[Namespace] + paths: list[Any] + + @classmethod + def from_dict(cls: type[Controller], data: dict[str, Any]) -> Controller: + """Create Controller from dictionary.""" + return cls( + controller=data["Controller"], + cntlid=data["Cntlid"], + serial_number=data["SerialNumber"], + model_number=data["ModelNumber"], + firmware=data["Firmware"], + transport=data["Transport"], + address=data["Address"], + slot=data["Slot"], + namespaces=[Namespace.from_dict(ns) for ns in data.get("Namespaces", [])], + paths=data.get("Paths", []), + ) + + +@dataclass +class Subsystem: + """NVME Subsystem model.""" + + subsystem: str + subsystem_nqn: str + controllers: list[Controller] + namespaces: list[Any] + + @classmethod + def from_dict(cls: type[Subsystem], data: dict[str, Any]) -> Subsystem: + """Create Subsystem from dictionary.""" + return cls( + subsystem=data["Subsystem"], + subsystem_nqn=data["SubsystemNQN"], + controllers=[Controller.from_dict(c) for c in data.get("Controllers", [])], + namespaces=list(data.get("Namespaces", [])), + ) + + +@dataclass +class Device: + """NVME Device model.""" + + host_nqn: str + host_id: str + subsystems: list[Subsystem] + + @classmethod + def from_dict(cls: type[Device], data: dict[str, Any]) -> Device: + """Create Device from dictionary.""" + return cls( + host_nqn=data["HostNQN"], + host_id=data["HostID"], + subsystems=[Subsystem.from_dict(s) for s in data.get("Subsystems", [])], + ) + + +@dataclass +class NvmeList: + """NVME List model.""" + + devices: list[Device] + + @classmethod + def from_dict(cls: type[NvmeList], data: dict[str, Any]) -> NvmeList: + """Create NVME List from dictionary.""" + return cls(devices=[Device.from_dict(d) for d in data.get("Devices", [])]) diff --git a/supervisor/host/nvme/manager.py b/supervisor/host/nvme/manager.py new file mode 100644 index 000000000..8e18b4672 --- /dev/null +++ b/supervisor/host/nvme/manager.py @@ -0,0 +1,112 @@ +"""NVME device manager.""" + +import asyncio +from collections.abc import Awaitable +import json +import logging +from pathlib import Path +from typing import Any + +from ...exceptions import HostNvmeError +from .data import Device, NvmeList, NvmeSmartLogData + +_LOGGER: logging.Logger = logging.getLogger(__name__) + + +class NvmeDevice: + """Interface for NVME Device. + + Currently just provides smart log access using 'nvme smart-log -o json'. + """ + + def __init__(self, path: Path, device: Device): + """Initialize object.""" + self.id = device.host_id + self.path = path + self.device = device + + async def get_smart_log(self) -> NvmeSmartLogData: + """Run smart log command and return output.""" + cmd = f"nvme smart-log {self.path.as_posix()} -o json" + proc = await asyncio.create_subprocess_shell( + cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + raise HostNvmeError( + f"Failed to run nvme smart-log: {stderr.decode().strip()}", + _LOGGER.error, + ) + try: + raw = json.loads(stdout.decode()) + except json.JSONDecodeError: + raise HostNvmeError( + "Failed to parse nvme smart-log output", _LOGGER.error + ) from None + + return NvmeSmartLogData.from_dict(raw) + + +class NvmeManager: + """NVME Manager for machine. + + Lists available NVME devices (if any) and provides management capabilities using nvme-cli. + """ + + def __init__(self) -> None: + """Initialize object.""" + self.devices: dict[str, NvmeDevice] = {} + + async def _list_nvme_devices(self) -> dict[str, Any]: + """List all NVME devices on system.""" + cmd = "nvme list -o json" + proc = await asyncio.create_subprocess_shell( + cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + stdout, stderr = await proc.communicate() + if proc.returncode != 0: + raise HostNvmeError(f"Failed to run nvme list: {stderr.decode().strip()}") + try: + return json.loads(stdout.decode()) + except json.JSONDecodeError: + raise HostNvmeError( + "Failed to parse nvme list output", _LOGGER.error + ) from None + + def load(self) -> Awaitable[None]: + """Load info on NVME devices at startup.""" + return self.update() + + async def update(self) -> None: + """Refresh info on NVME devices.""" + raw = await self._list_nvme_devices() + self.devices = {} + for dev in NvmeList.from_dict(raw).devices: + if ( + dev.subsystems + and dev.subsystems[0].controllers + and dev.subsystems[0].controllers[0].namespaces + and dev.subsystems[0].controllers[0].namespaces[0].name_space + ): + path = Path( + "/dev", + dev.subsystems[0].controllers[0].namespaces[0].name_space, + ) + self.devices[dev.host_id] = NvmeDevice(path, dev) + else: + _LOGGER.info( + "Unusable NVME device returned in list with NQN %s and ID %s", + dev.host_nqn, + dev.host_id, + ) + + def get_by_path(self, path: Path) -> NvmeDevice | None: + """Get NVME device by path if it exists.""" + for dev in self.devices.values(): + if dev.path == path: + return dev + return None diff --git a/supervisor/os/data_disk.py b/supervisor/os/data_disk.py index 17a2f4116..2cf464970 100644 --- a/supervisor/os/data_disk.py +++ b/supervisor/os/data_disk.py @@ -132,7 +132,7 @@ class DataDisk(CoreSysAttributes): serial="", id=self.sys_dbus.agent.datadisk.current_device.as_posix(), size=0, - device_path=self.sys_dbus.agent.datadisk.current_device.as_posix(), + device_path=self.sys_dbus.agent.datadisk.current_device, object_path="", device_object_path="", ) diff --git a/tests/api/test_host.py b/tests/api/test_host.py index 0c00bc21c..b829c691e 100644 --- a/tests/api/test_host.py +++ b/tests/api/test_host.py @@ -1,7 +1,9 @@ """Test Host API.""" from collections.abc import AsyncGenerator +from typing import cast from unittest.mock import ANY, MagicMock, patch +from urllib.parse import quote from aiohttp.test_utils import TestClient import pytest @@ -12,6 +14,8 @@ from supervisor.homeassistant.api import APIState from supervisor.host.const import LogFormat, LogFormatter from supervisor.host.control import SystemControl +from tests.common import load_binary_fixture +from tests.dbus_service_mocks.agent_datadisk import DataDisk as DataDiskService from tests.dbus_service_mocks.base import DBusServiceMock from tests.dbus_service_mocks.systemd import Systemd as SystemdService @@ -413,3 +417,136 @@ async def test_force_shutdown_during_migration( with patch.object(SystemControl, "shutdown") as shutdown: await api_client.post("/host/shutdown", json={"force": True}) shutdown.assert_called_once() + + +@pytest.mark.parametrize( + ("device_id", "smart_log_fixture"), + [ + ("00000000-0000-0000-0000-000000000000", "nvme-smart-log"), + ("00000000-0000-0000-0000-000000000000", "nvme-smart-log-warning-detail"), + (quote("/dev/nvme0n1", safe=""), "nvme-smart-log"), + (quote("/dev/nvme0n1", safe=""), "nvme-smart-log-warning-detail"), + ], +) +async def test_nvme_device_status( + api_client: TestClient, coresys: CoreSys, device_id: str, smart_log_fixture: str +): + """Test getting smart log information on nvme device.""" + with patch( + "supervisor.host.nvme.manager.asyncio.create_subprocess_shell" + ) as shell_mock: + shell_mock.return_value.returncode = 0 + shell_mock.return_value.communicate.side_effect = [ + (load_binary_fixture("nvme-list.json"), b""), + (load_binary_fixture(f"{smart_log_fixture}.json"), b""), + ] + await coresys.host.nvme.load() + + resp = await api_client.get(f"/host/nvme/{device_id}/status") + assert resp.status == 200 + body = await resp.json() + assert body["data"]["critical_warning"] == 0 + assert body["data"]["available_spare"] == 100 + assert body["data"]["data_units_read"] == 44707691 + assert body["data"]["data_units_written"] == 54117388 + assert body["data"]["percent_used"] == 1 + assert body["data"]["temperature_kelvin"] == 312 + assert body["data"]["host_read_commands"] == 428871098 + assert body["data"]["host_write_commands"] == 900245782 + assert body["data"]["controller_busy_minutes"] == 2678 + assert body["data"]["power_cycles"] == 652 + assert body["data"]["power_on_hours"] == 3192 + assert body["data"]["unsafe_shutdowns"] == 107 + assert body["data"]["media_errors"] == 0 + assert body["data"]["number_error_log_entries"] == 1069 + assert body["data"]["warning_temp_minutes"] == 0 + assert body["data"]["critical_composite_temp_minutes"] == 0 + + +@pytest.mark.usefixtures("os_available") +async def test_nvme_datadisk_status( + api_client: TestClient, + coresys: CoreSys, + all_dbus_services: dict[str, DBusServiceMock | dict[str, DBusServiceMock]], +): + """Test getting smart log information when datadisk is an nvme device.""" + cast( + DataDiskService, all_dbus_services["agent_datadisk"] + ).current_device = "/dev/nvme0n1" + await coresys.dbus.agent.datadisk.update() + await coresys.os.datadisk.load() + with patch( + "supervisor.host.nvme.manager.asyncio.create_subprocess_shell" + ) as shell_mock: + shell_mock.return_value.returncode = 0 + shell_mock.return_value.communicate.side_effect = [ + (load_binary_fixture("nvme-list.json"), b""), + (load_binary_fixture("nvme-smart-log.json"), b""), + ] + await coresys.host.nvme.load() + + resp = await api_client.get("/host/nvme/status") + assert resp.status == 200 + body = await resp.json() + assert body["data"]["critical_warning"] == 0 + + +async def test_nvme_datadisk_no_os(api_client: TestClient, coresys: CoreSys): + """Test failure getting smart log information for datadisk when not using OS.""" + with patch( + "supervisor.host.nvme.manager.asyncio.create_subprocess_shell" + ) as shell_mock: + shell_mock.return_value.returncode = 0 + shell_mock.return_value.communicate.side_effect = [ + (load_binary_fixture("nvme-list.json"), b""), + (load_binary_fixture("nvme-smart-log.json"), b""), + ] + await coresys.host.nvme.load() + + resp = await api_client.get("/host/nvme/status") + assert resp.status == 400 + body = await resp.json() + assert ( + body["message"] + == "Not using Home Assistant Operating System, an ID for the NVME device is required" + ) + + +@pytest.mark.usefixtures("os_available") +async def test_nvme_datadisk_not_nvme(api_client: TestClient, coresys: CoreSys): + """Test failure getting smart log information for datadisk when it is not nvme.""" + with patch( + "supervisor.host.nvme.manager.asyncio.create_subprocess_shell" + ) as shell_mock: + shell_mock.return_value.returncode = 0 + shell_mock.return_value.communicate.side_effect = [ + (load_binary_fixture("nvme-list.json"), b""), + (load_binary_fixture("nvme-smart-log.json"), b""), + ] + await coresys.host.nvme.load() + + resp = await api_client.get("/host/nvme/status") + assert resp.status == 400 + body = await resp.json() + assert ( + body["message"] + == "Data Disk is not an NVME device, an ID for the NVME device is required" + ) + + +async def test_nvme_device_status_404(api_client: TestClient, coresys: CoreSys): + """Test failure getting smart log information for non-existent nvme device.""" + with patch( + "supervisor.host.nvme.manager.asyncio.create_subprocess_shell" + ) as shell_mock: + shell_mock.return_value.returncode = 0 + shell_mock.return_value.communicate.side_effect = [ + (load_binary_fixture("nvme-list.json"), b""), + (load_binary_fixture("nvme-smart-log.json"), b""), + ] + await coresys.host.nvme.load() + + resp = await api_client.get("/host/nvme/does-not-exist/status") + assert resp.status == 404 + body = await resp.json() + assert body["message"] == "NVME device does-not-exist does not exist" diff --git a/tests/dbus_service_mocks/agent_datadisk.py b/tests/dbus_service_mocks/agent_datadisk.py index 2830e8fd2..75dc896b5 100644 --- a/tests/dbus_service_mocks/agent_datadisk.py +++ b/tests/dbus_service_mocks/agent_datadisk.py @@ -20,11 +20,12 @@ class DataDisk(DBusServiceMock): object_path = "/io/hass/os/DataDisk" interface = "io.hass.os.DataDisk" + current_device = "/dev/mmcblk1" @dbus_property(access=PropertyAccess.READ) def CurrentDevice(self) -> "s": """Get Current Device.""" - return "/dev/mmcblk1" + return self.current_device @dbus_method() def ChangeDevice(self, arg_0: "s") -> "b": diff --git a/tests/fixtures/nvme-list.json b/tests/fixtures/nvme-list.json new file mode 100644 index 000000000..eb520584b --- /dev/null +++ b/tests/fixtures/nvme-list.json @@ -0,0 +1,39 @@ +{ + "Devices": [ + { + "HostNQN": "nqn.2014-08.org.nvmexpress:uuid:00000000-0000-0000-0000-000000000000", + "HostID": "00000000-0000-0000-0000-000000000000", + "Subsystems": [ + { + "Subsystem": "nvme-subsys0", + "SubsystemNQN": "nqn.2014.08.org.nvmexpress:144d144dSERIAL Samsung SSD 970 EVO Plus 2TB", + "Controllers": [ + { + "Controller": "nvme0", + "Cntlid": "4", + "SerialNumber": "SERIAL", + "ModelNumber": "Samsung SSD 970 EVO Plus 2TB", + "Firmware": "2B2QEXM7", + "Transport": "pcie", + "Address": "0001:01:00.0", + "Slot": "", + "Namespaces": [ + { + "NameSpace": "nvme0n1", + "Generic": "ng0n1", + "NSID": 1, + "UsedBytes": 38262640640, + "MaximumLBA": 3907029168, + "PhysicalSize": 2000398934016, + "SectorSize": 512 + } + ], + "Paths": [] + } + ], + "Namespaces": [] + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/fixtures/nvme-smart-log-warning-detail.json b/tests/fixtures/nvme-smart-log-warning-detail.json new file mode 100644 index 000000000..9d895a821 --- /dev/null +++ b/tests/fixtures/nvme-smart-log-warning-detail.json @@ -0,0 +1,34 @@ +{ + "critical_warning": { + "value": 0, + "available_spare": 0, + "temp_threshold": 0, + "reliability_degraded": 0, + "ro": 0, + "vmbu_failed": 0, + "pmr_ro": 0 + }, + "temperature": 312, + "avail_spare": 100, + "spare_thresh": 10, + "percent_used": 1, + "endurance_grp_critical_warning_summary": 0, + "data_units_read": 44707691, + "data_units_written": 54117388, + "host_read_commands": 428871098, + "host_write_commands": 900245782, + "controller_busy_time": 2678, + "power_cycles": 652, + "power_on_hours": 3192, + "unsafe_shutdowns": 107, + "media_errors": 0, + "num_err_log_entries": 1069, + "warning_temp_time": 0, + "critical_comp_time": 0, + "temperature_sensor_1": 312, + "temperature_sensor_2": 307, + "thm_temp1_trans_count": 0, + "thm_temp2_trans_count": 0, + "thm_temp1_total_time": 0, + "thm_temp2_total_time": 0 +} \ No newline at end of file diff --git a/tests/fixtures/nvme-smart-log.json b/tests/fixtures/nvme-smart-log.json new file mode 100644 index 000000000..ceec138c7 --- /dev/null +++ b/tests/fixtures/nvme-smart-log.json @@ -0,0 +1,26 @@ +{ + "critical_warning": 0, + "temperature": 312, + "avail_spare": 100, + "spare_thresh": 10, + "percent_used": 1, + "endurance_grp_critical_warning_summary": 0, + "data_units_read": 44707691, + "data_units_written": 54117388, + "host_read_commands": 428871098, + "host_write_commands": 900245782, + "controller_busy_time": 2678, + "power_cycles": 652, + "power_on_hours": 3192, + "unsafe_shutdowns": 107, + "media_errors": 0, + "num_err_log_entries": 1069, + "warning_temp_time": 0, + "critical_comp_time": 0, + "temperature_sensor_1": 312, + "temperature_sensor_2": 307, + "thm_temp1_trans_count": 0, + "thm_temp2_trans_count": 0, + "thm_temp1_total_time": 0, + "thm_temp2_total_time": 0 +} \ No newline at end of file