diff --git a/homeassistant/components/recorder/core.py b/homeassistant/components/recorder/core.py index ad05cad3d54..07591c468b8 100644 --- a/homeassistant/components/recorder/core.py +++ b/homeassistant/components/recorder/core.py @@ -119,6 +119,7 @@ from .tasks import ( WaitTask, ) from .util import ( + async_create_backup_failure_issue, build_mysqldb_conv, dburl_to_path, end_incomplete_runs, @@ -1006,9 +1007,11 @@ class Recorder(threading.Thread): def _async_set_database_locked(task: DatabaseLockTask) -> None: task.database_locked.set() + local_start_time = dt_util.now() + hass = self.hass with write_lock_db_sqlite(self): # Notify that lock is being held, wait until database can be used again. - self.hass.add_job(_async_set_database_locked, task) + hass.add_job(_async_set_database_locked, task) while not task.database_unlock.wait(timeout=DB_LOCK_QUEUE_CHECK_TIMEOUT): if self._reached_max_backlog_percentage(90): _LOGGER.warning( @@ -1020,6 +1023,9 @@ class Recorder(threading.Thread): self.backlog, ) task.queue_overflow = True + hass.add_job( + async_create_backup_failure_issue, self.hass, local_start_time + ) break _LOGGER.info( "Database queue backlog reached %d entries during backup", diff --git a/homeassistant/components/recorder/strings.json b/homeassistant/components/recorder/strings.json index eb162628727..74b248354d7 100644 --- a/homeassistant/components/recorder/strings.json +++ b/homeassistant/components/recorder/strings.json @@ -12,6 +12,10 @@ "maria_db_range_index_regression": { "title": "Update MariaDB to {min_version} or later resolve a significant performance issue", "description": "Older versions of MariaDB suffer from a significant performance regression when retrieving history data or purging the database. Update to MariaDB version {min_version} or later and restart Home Assistant. If you are using the MariaDB core add-on, make sure to update it to the latest version." + }, + "backup_failed_out_of_resources": { + "title": "Database backup failed due to lack of resources", + "description": "The database backup stated at {start_time} failed due to lack of resources. The backup cannot be trusted and must be restarted. This can happen if the database is too large or if the system is under heavy load. Consider upgrading the system hardware or reducing the size of the database by decreasing the number of history days to keep or creating a filter." } }, "services": { diff --git a/homeassistant/components/recorder/util.py b/homeassistant/components/recorder/util.py index 4a1bf940b24..f684160f86f 100644 --- a/homeassistant/components/recorder/util.py +++ b/homeassistant/components/recorder/util.py @@ -470,6 +470,24 @@ def _async_create_mariadb_range_index_regression_issue( ) +@callback +def async_create_backup_failure_issue( + hass: HomeAssistant, + local_start_time: datetime, +) -> None: + """Create an issue when the backup fails because we run out of resources.""" + ir.async_create_issue( + hass, + DOMAIN, + "backup_failed_out_of_resources", + is_fixable=False, + severity=ir.IssueSeverity.CRITICAL, + learn_more_url="https://www.home-assistant.io/integrations/recorder", + translation_key="backup_failed_out_of_resources", + translation_placeholders={"start_time": local_start_time.strftime("%H:%M:%S")}, + ) + + def setup_connection_for_dialect( instance: Recorder, dialect_name: str, diff --git a/tests/components/recorder/test_init.py b/tests/components/recorder/test_init.py index f8aa219fdb4..78af9a64257 100644 --- a/tests/components/recorder/test_init.py +++ b/tests/components/recorder/test_init.py @@ -73,6 +73,7 @@ from homeassistant.const import ( ) from homeassistant.core import Context, CoreState, Event, HomeAssistant, callback from homeassistant.helpers import entity_registry as er, recorder as recorder_helper +from homeassistant.helpers.issue_registry import async_get as async_get_issue_registry from homeassistant.setup import async_setup_component, setup_component from homeassistant.util import dt as dt_util from homeassistant.util.json import json_loads @@ -1832,6 +1833,15 @@ async def test_database_lock_and_overflow( assert "Database queue backlog reached more than" in caplog.text assert not instance.unlock_database() + registry = async_get_issue_registry(hass) + issue = registry.async_get_issue(DOMAIN, "backup_failed_out_of_resources") + assert issue is not None + assert "start_time" in issue.translation_placeholders + start_time = issue.translation_placeholders["start_time"] + assert start_time is not None + # Should be in H:M:S format + assert start_time.count(":") == 2 + async def test_database_lock_and_overflow_checks_available_memory( async_setup_recorder_instance: RecorderInstanceGenerator, @@ -1910,6 +1920,15 @@ async def test_database_lock_and_overflow_checks_available_memory( db_events = await instance.async_add_executor_job(_get_db_events) assert len(db_events) >= 2 + registry = async_get_issue_registry(hass) + issue = registry.async_get_issue(DOMAIN, "backup_failed_out_of_resources") + assert issue is not None + assert "start_time" in issue.translation_placeholders + start_time = issue.translation_placeholders["start_time"] + assert start_time is not None + # Should be in H:M:S format + assert start_time.count(":") == 2 + async def test_database_lock_timeout( recorder_mock: Recorder, hass: HomeAssistant, recorder_db_url: str