mirror of
https://github.com/home-assistant/core.git
synced 2025-07-23 05:07:41 +00:00
Recover and restart the recorder if the sqlite database encounters corruption while running (#46612)
This commit is contained in:
parent
5b0b01d727
commit
9b69549f73
@ -5,6 +5,7 @@ import concurrent.futures
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import logging
|
import logging
|
||||||
import queue
|
import queue
|
||||||
|
import sqlite3
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from typing import Any, Callable, List, Optional
|
from typing import Any, Callable, List, Optional
|
||||||
@ -37,7 +38,12 @@ import homeassistant.util.dt as dt_util
|
|||||||
from . import migration, purge
|
from . import migration, purge
|
||||||
from .const import CONF_DB_INTEGRITY_CHECK, DATA_INSTANCE, DOMAIN, SQLITE_URL_PREFIX
|
from .const import CONF_DB_INTEGRITY_CHECK, DATA_INSTANCE, DOMAIN, SQLITE_URL_PREFIX
|
||||||
from .models import Base, Events, RecorderRuns, States
|
from .models import Base, Events, RecorderRuns, States
|
||||||
from .util import session_scope, validate_or_move_away_sqlite_database
|
from .util import (
|
||||||
|
dburl_to_path,
|
||||||
|
move_away_broken_database,
|
||||||
|
session_scope,
|
||||||
|
validate_or_move_away_sqlite_database,
|
||||||
|
)
|
||||||
|
|
||||||
_LOGGER = logging.getLogger(__name__)
|
_LOGGER = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -247,7 +253,7 @@ class Recorder(threading.Thread):
|
|||||||
self._pending_expunge = []
|
self._pending_expunge = []
|
||||||
self.event_session = None
|
self.event_session = None
|
||||||
self.get_session = None
|
self.get_session = None
|
||||||
self._completed_database_setup = False
|
self._completed_database_setup = None
|
||||||
|
|
||||||
@callback
|
@callback
|
||||||
def async_initialize(self):
|
def async_initialize(self):
|
||||||
@ -278,39 +284,8 @@ class Recorder(threading.Thread):
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""Start processing events to save."""
|
"""Start processing events to save."""
|
||||||
tries = 1
|
|
||||||
connected = False
|
|
||||||
|
|
||||||
while not connected and tries <= self.db_max_retries:
|
if not self._setup_recorder():
|
||||||
if tries != 1:
|
|
||||||
time.sleep(self.db_retry_wait)
|
|
||||||
try:
|
|
||||||
self._setup_connection()
|
|
||||||
migration.migrate_schema(self)
|
|
||||||
self._setup_run()
|
|
||||||
connected = True
|
|
||||||
_LOGGER.debug("Connected to recorder database")
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
_LOGGER.error(
|
|
||||||
"Error during connection setup: %s (retrying in %s seconds)",
|
|
||||||
err,
|
|
||||||
self.db_retry_wait,
|
|
||||||
)
|
|
||||||
tries += 1
|
|
||||||
|
|
||||||
if not connected:
|
|
||||||
|
|
||||||
@callback
|
|
||||||
def connection_failed():
|
|
||||||
"""Connect failed tasks."""
|
|
||||||
self.async_db_ready.set_result(False)
|
|
||||||
persistent_notification.async_create(
|
|
||||||
self.hass,
|
|
||||||
"The recorder could not start, please check the log",
|
|
||||||
"Recorder",
|
|
||||||
)
|
|
||||||
|
|
||||||
self.hass.add_job(connection_failed)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
shutdown_task = object()
|
shutdown_task = object()
|
||||||
@ -346,15 +321,11 @@ class Recorder(threading.Thread):
|
|||||||
self.hass.add_job(register)
|
self.hass.add_job(register)
|
||||||
result = hass_started.result()
|
result = hass_started.result()
|
||||||
|
|
||||||
self.event_session = self.get_session()
|
|
||||||
self.event_session.expire_on_commit = False
|
|
||||||
|
|
||||||
# If shutdown happened before Home Assistant finished starting
|
# If shutdown happened before Home Assistant finished starting
|
||||||
if result is shutdown_task:
|
if result is shutdown_task:
|
||||||
# Make sure we cleanly close the run if
|
# Make sure we cleanly close the run if
|
||||||
# we restart before startup finishes
|
# we restart before startup finishes
|
||||||
self._close_run()
|
self._shutdown()
|
||||||
self._close_connection()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# Start periodic purge
|
# Start periodic purge
|
||||||
@ -370,175 +341,180 @@ class Recorder(threading.Thread):
|
|||||||
async_purge, hour=4, minute=12, second=0
|
async_purge, hour=4, minute=12, second=0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_LOGGER.debug("Recorder processing the queue")
|
||||||
# Use a session for the event read loop
|
# Use a session for the event read loop
|
||||||
# with a commit every time the event time
|
# with a commit every time the event time
|
||||||
# has changed. This reduces the disk io.
|
# has changed. This reduces the disk io.
|
||||||
while True:
|
while True:
|
||||||
event = self.queue.get()
|
event = self.queue.get()
|
||||||
|
|
||||||
if event is None:
|
if event is None:
|
||||||
self._close_run()
|
self._shutdown()
|
||||||
self._close_connection()
|
|
||||||
return
|
return
|
||||||
if isinstance(event, PurgeTask):
|
|
||||||
# Schedule a new purge task if this one didn't finish
|
|
||||||
if not purge.purge_old_data(self, event.keep_days, event.repack):
|
|
||||||
self.queue.put(PurgeTask(event.keep_days, event.repack))
|
|
||||||
continue
|
|
||||||
if isinstance(event, WaitTask):
|
|
||||||
self._queue_watch.set()
|
|
||||||
continue
|
|
||||||
if event.event_type == EVENT_TIME_CHANGED:
|
|
||||||
self._keepalive_count += 1
|
|
||||||
if self._keepalive_count >= KEEPALIVE_TIME:
|
|
||||||
self._keepalive_count = 0
|
|
||||||
self._send_keep_alive()
|
|
||||||
if self.commit_interval:
|
|
||||||
self._timechanges_seen += 1
|
|
||||||
if self._timechanges_seen >= self.commit_interval:
|
|
||||||
self._timechanges_seen = 0
|
|
||||||
self._commit_event_session_or_retry()
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
self._process_one_event(event)
|
||||||
|
|
||||||
|
def _setup_recorder(self) -> bool:
|
||||||
|
"""Create schema and connect to the database."""
|
||||||
|
tries = 1
|
||||||
|
|
||||||
|
while tries <= self.db_max_retries:
|
||||||
try:
|
try:
|
||||||
if event.event_type == EVENT_STATE_CHANGED:
|
self._setup_connection()
|
||||||
dbevent = Events.from_event(event, event_data="{}")
|
migration.migrate_schema(self)
|
||||||
else:
|
self._setup_run()
|
||||||
dbevent = Events.from_event(event)
|
|
||||||
dbevent.created = event.time_fired
|
|
||||||
self.event_session.add(dbevent)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
_LOGGER.warning("Event is not JSON serializable: %s", event)
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
except Exception as err: # pylint: disable=broad-except
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
_LOGGER.error(
|
||||||
_LOGGER.exception("Error adding event: %s", err)
|
"Error during connection setup to %s: %s (retrying in %s seconds)",
|
||||||
|
self.db_url,
|
||||||
|
err,
|
||||||
|
self.db_retry_wait,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_LOGGER.debug("Connected to recorder database")
|
||||||
|
self._open_event_session()
|
||||||
|
return True
|
||||||
|
|
||||||
if dbevent and event.event_type == EVENT_STATE_CHANGED:
|
tries += 1
|
||||||
try:
|
time.sleep(self.db_retry_wait)
|
||||||
dbstate = States.from_event(event)
|
|
||||||
has_new_state = event.data.get("new_state")
|
|
||||||
if dbstate.entity_id in self._old_states:
|
|
||||||
old_state = self._old_states.pop(dbstate.entity_id)
|
|
||||||
if old_state.state_id:
|
|
||||||
dbstate.old_state_id = old_state.state_id
|
|
||||||
else:
|
|
||||||
dbstate.old_state = old_state
|
|
||||||
if not has_new_state:
|
|
||||||
dbstate.state = None
|
|
||||||
dbstate.event = dbevent
|
|
||||||
dbstate.created = event.time_fired
|
|
||||||
self.event_session.add(dbstate)
|
|
||||||
if has_new_state:
|
|
||||||
self._old_states[dbstate.entity_id] = dbstate
|
|
||||||
self._pending_expunge.append(dbstate)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
_LOGGER.warning(
|
|
||||||
"State is not JSON serializable: %s",
|
|
||||||
event.data.get("new_state"),
|
|
||||||
)
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
|
||||||
_LOGGER.exception("Error adding state change: %s", err)
|
|
||||||
|
|
||||||
# If they do not have a commit interval
|
@callback
|
||||||
# than we commit right away
|
def connection_failed():
|
||||||
if not self.commit_interval:
|
"""Connect failed tasks."""
|
||||||
self._commit_event_session_or_retry()
|
self.async_db_ready.set_result(False)
|
||||||
|
persistent_notification.async_create(
|
||||||
|
self.hass,
|
||||||
|
"The recorder could not start, please check the log",
|
||||||
|
"Recorder",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.hass.add_job(connection_failed)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _process_one_event(self, event):
|
||||||
|
"""Process one event."""
|
||||||
|
if isinstance(event, PurgeTask):
|
||||||
|
# Schedule a new purge task if this one didn't finish
|
||||||
|
if not purge.purge_old_data(self, event.keep_days, event.repack):
|
||||||
|
self.queue.put(PurgeTask(event.keep_days, event.repack))
|
||||||
|
return
|
||||||
|
if isinstance(event, WaitTask):
|
||||||
|
self._queue_watch.set()
|
||||||
|
return
|
||||||
|
if event.event_type == EVENT_TIME_CHANGED:
|
||||||
|
self._keepalive_count += 1
|
||||||
|
if self._keepalive_count >= KEEPALIVE_TIME:
|
||||||
|
self._keepalive_count = 0
|
||||||
|
self._send_keep_alive()
|
||||||
|
if self.commit_interval:
|
||||||
|
self._timechanges_seen += 1
|
||||||
|
if self._timechanges_seen >= self.commit_interval:
|
||||||
|
self._timechanges_seen = 0
|
||||||
|
self._commit_event_session_or_recover()
|
||||||
|
return
|
||||||
|
|
||||||
def _send_keep_alive(self):
|
|
||||||
try:
|
try:
|
||||||
_LOGGER.debug("Sending keepalive")
|
if event.event_type == EVENT_STATE_CHANGED:
|
||||||
self.event_session.connection().scalar(select([1]))
|
dbevent = Events.from_event(event, event_data="{}")
|
||||||
|
else:
|
||||||
|
dbevent = Events.from_event(event)
|
||||||
|
dbevent.created = event.time_fired
|
||||||
|
self.event_session.add(dbevent)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
_LOGGER.warning("Event is not JSON serializable: %s", event)
|
||||||
return
|
return
|
||||||
except Exception as err: # pylint: disable=broad-except
|
except Exception as err: # pylint: disable=broad-except
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
# Must catch the exception to prevent the loop from collapsing
|
||||||
_LOGGER.error(
|
_LOGGER.exception("Error adding event: %s", err)
|
||||||
"Error in database connectivity during keepalive: %s",
|
return
|
||||||
err,
|
|
||||||
)
|
if event.event_type == EVENT_STATE_CHANGED:
|
||||||
self._reopen_event_session()
|
try:
|
||||||
|
dbstate = States.from_event(event)
|
||||||
|
has_new_state = event.data.get("new_state")
|
||||||
|
if dbstate.entity_id in self._old_states:
|
||||||
|
old_state = self._old_states.pop(dbstate.entity_id)
|
||||||
|
if old_state.state_id:
|
||||||
|
dbstate.old_state_id = old_state.state_id
|
||||||
|
else:
|
||||||
|
dbstate.old_state = old_state
|
||||||
|
if not has_new_state:
|
||||||
|
dbstate.state = None
|
||||||
|
dbstate.event = dbevent
|
||||||
|
dbstate.created = event.time_fired
|
||||||
|
self.event_session.add(dbstate)
|
||||||
|
if has_new_state:
|
||||||
|
self._old_states[dbstate.entity_id] = dbstate
|
||||||
|
self._pending_expunge.append(dbstate)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
_LOGGER.warning(
|
||||||
|
"State is not JSON serializable: %s",
|
||||||
|
event.data.get("new_state"),
|
||||||
|
)
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
# Must catch the exception to prevent the loop from collapsing
|
||||||
|
_LOGGER.exception("Error adding state change: %s", err)
|
||||||
|
|
||||||
|
# If they do not have a commit interval
|
||||||
|
# than we commit right away
|
||||||
|
if not self.commit_interval:
|
||||||
|
self._commit_event_session_or_recover()
|
||||||
|
|
||||||
|
def _commit_event_session_or_recover(self):
|
||||||
|
"""Commit changes to the database and recover if the database fails when possible."""
|
||||||
|
try:
|
||||||
|
self._commit_event_session_or_retry()
|
||||||
|
return
|
||||||
|
except exc.DatabaseError as err:
|
||||||
|
if isinstance(err.__cause__, sqlite3.DatabaseError):
|
||||||
|
_LOGGER.exception(
|
||||||
|
"Unrecoverable sqlite3 database corruption detected: %s", err
|
||||||
|
)
|
||||||
|
self._handle_sqlite_corruption()
|
||||||
|
return
|
||||||
|
_LOGGER.exception("Unexpected error saving events: %s", err)
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
# Must catch the exception to prevent the loop from collapsing
|
||||||
|
_LOGGER.exception("Unexpected error saving events: %s", err)
|
||||||
|
|
||||||
|
self._reopen_event_session()
|
||||||
|
return
|
||||||
|
|
||||||
def _commit_event_session_or_retry(self):
|
def _commit_event_session_or_retry(self):
|
||||||
tries = 1
|
tries = 1
|
||||||
while tries <= self.db_max_retries:
|
while tries <= self.db_max_retries:
|
||||||
if tries != 1:
|
|
||||||
time.sleep(self.db_retry_wait)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._commit_event_session()
|
self._commit_event_session()
|
||||||
return
|
return
|
||||||
except (exc.InternalError, exc.OperationalError) as err:
|
except (exc.InternalError, exc.OperationalError) as err:
|
||||||
if err.connection_invalidated:
|
if err.connection_invalidated:
|
||||||
_LOGGER.error(
|
message = "Database connection invalidated"
|
||||||
"Database connection invalidated: %s. "
|
|
||||||
"(retrying in %s seconds)",
|
|
||||||
err,
|
|
||||||
self.db_retry_wait,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
_LOGGER.error(
|
message = "Error in database connectivity during commit"
|
||||||
"Error in database connectivity during commit: %s. "
|
_LOGGER.error(
|
||||||
"(retrying in %s seconds)",
|
"%s: Error executing query: %s. (retrying in %s seconds)",
|
||||||
err,
|
message,
|
||||||
self.db_retry_wait,
|
err,
|
||||||
)
|
self.db_retry_wait,
|
||||||
|
)
|
||||||
|
if tries == self.db_max_retries:
|
||||||
|
raise
|
||||||
|
|
||||||
tries += 1
|
tries += 1
|
||||||
|
time.sleep(self.db_retry_wait)
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
|
||||||
_LOGGER.exception("Error saving events: %s", err)
|
|
||||||
return
|
|
||||||
|
|
||||||
_LOGGER.error(
|
|
||||||
"Error in database update. Could not save " "after %d tries. Giving up",
|
|
||||||
tries,
|
|
||||||
)
|
|
||||||
self._reopen_event_session()
|
|
||||||
|
|
||||||
def _reopen_event_session(self):
|
|
||||||
try:
|
|
||||||
self.event_session.rollback()
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
|
||||||
_LOGGER.exception("Error while rolling back event session: %s", err)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.event_session.close()
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
|
||||||
_LOGGER.exception("Error while closing event session: %s", err)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.event_session = self.get_session()
|
|
||||||
self.event_session.expire_on_commit = False
|
|
||||||
except Exception as err: # pylint: disable=broad-except
|
|
||||||
# Must catch the exception to prevent the loop from collapsing
|
|
||||||
_LOGGER.exception("Error while creating new event session: %s", err)
|
|
||||||
|
|
||||||
def _commit_event_session(self):
|
def _commit_event_session(self):
|
||||||
self._commits_without_expire += 1
|
self._commits_without_expire += 1
|
||||||
|
|
||||||
try:
|
if self._pending_expunge:
|
||||||
if self._pending_expunge:
|
self.event_session.flush()
|
||||||
self.event_session.flush()
|
for dbstate in self._pending_expunge:
|
||||||
for dbstate in self._pending_expunge:
|
# Expunge the state so its not expired
|
||||||
# Expunge the state so its not expired
|
# until we use it later for dbstate.old_state
|
||||||
# until we use it later for dbstate.old_state
|
if dbstate in self.event_session:
|
||||||
if dbstate in self.event_session:
|
self.event_session.expunge(dbstate)
|
||||||
self.event_session.expunge(dbstate)
|
self._pending_expunge = []
|
||||||
self._pending_expunge = []
|
self.event_session.commit()
|
||||||
self.event_session.commit()
|
|
||||||
except exc.IntegrityError as err:
|
|
||||||
_LOGGER.error(
|
|
||||||
"Integrity error executing query (database likely deleted out from under us): %s",
|
|
||||||
err,
|
|
||||||
)
|
|
||||||
self.event_session.rollback()
|
|
||||||
self._old_states = {}
|
|
||||||
raise
|
|
||||||
except Exception as err:
|
|
||||||
_LOGGER.error("Error executing query: %s", err)
|
|
||||||
self.event_session.rollback()
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Expire is an expensive operation (frequently more expensive
|
# Expire is an expensive operation (frequently more expensive
|
||||||
# than the flush and commit itself) so we only
|
# than the flush and commit itself) so we only
|
||||||
@ -547,6 +523,47 @@ class Recorder(threading.Thread):
|
|||||||
self._commits_without_expire = 0
|
self._commits_without_expire = 0
|
||||||
self.event_session.expire_all()
|
self.event_session.expire_all()
|
||||||
|
|
||||||
|
def _handle_sqlite_corruption(self):
|
||||||
|
"""Handle the sqlite3 database being corrupt."""
|
||||||
|
self._close_connection()
|
||||||
|
move_away_broken_database(dburl_to_path(self.db_url))
|
||||||
|
self._setup_recorder()
|
||||||
|
|
||||||
|
def _reopen_event_session(self):
|
||||||
|
"""Rollback the event session and reopen it after a failure."""
|
||||||
|
self._old_states = {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.event_session.rollback()
|
||||||
|
self.event_session.close()
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
# Must catch the exception to prevent the loop from collapsing
|
||||||
|
_LOGGER.exception(
|
||||||
|
"Error while rolling back and closing the event session: %s", err
|
||||||
|
)
|
||||||
|
|
||||||
|
self._open_event_session()
|
||||||
|
|
||||||
|
def _open_event_session(self):
|
||||||
|
"""Open the event session."""
|
||||||
|
try:
|
||||||
|
self.event_session = self.get_session()
|
||||||
|
self.event_session.expire_on_commit = False
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
_LOGGER.exception("Error while creating new event session: %s", err)
|
||||||
|
|
||||||
|
def _send_keep_alive(self):
|
||||||
|
try:
|
||||||
|
_LOGGER.debug("Sending keepalive")
|
||||||
|
self.event_session.connection().scalar(select([1]))
|
||||||
|
return
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
_LOGGER.error(
|
||||||
|
"Error in database connectivity during keepalive: %s",
|
||||||
|
err,
|
||||||
|
)
|
||||||
|
self._reopen_event_session()
|
||||||
|
|
||||||
@callback
|
@callback
|
||||||
def event_listener(self, event):
|
def event_listener(self, event):
|
||||||
"""Listen for new events and put them in the process queue."""
|
"""Listen for new events and put them in the process queue."""
|
||||||
@ -571,6 +588,7 @@ class Recorder(threading.Thread):
|
|||||||
def _setup_connection(self):
|
def _setup_connection(self):
|
||||||
"""Ensure database is ready to fly."""
|
"""Ensure database is ready to fly."""
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
|
self._completed_database_setup = False
|
||||||
|
|
||||||
def setup_recorder_connection(dbapi_connection, connection_record):
|
def setup_recorder_connection(dbapi_connection, connection_record):
|
||||||
"""Dbapi specific connection settings."""
|
"""Dbapi specific connection settings."""
|
||||||
@ -603,9 +621,7 @@ class Recorder(threading.Thread):
|
|||||||
else:
|
else:
|
||||||
kwargs["echo"] = False
|
kwargs["echo"] = False
|
||||||
|
|
||||||
if self.db_url != SQLITE_URL_PREFIX and self.db_url.startswith(
|
if self._using_file_sqlite:
|
||||||
SQLITE_URL_PREFIX
|
|
||||||
):
|
|
||||||
with self.hass.timeout.freeze(DOMAIN):
|
with self.hass.timeout.freeze(DOMAIN):
|
||||||
#
|
#
|
||||||
# Here we run an sqlite3 quick_check. In the majority
|
# Here we run an sqlite3 quick_check. In the majority
|
||||||
@ -628,6 +644,13 @@ class Recorder(threading.Thread):
|
|||||||
Base.metadata.create_all(self.engine)
|
Base.metadata.create_all(self.engine)
|
||||||
self.get_session = scoped_session(sessionmaker(bind=self.engine))
|
self.get_session = scoped_session(sessionmaker(bind=self.engine))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _using_file_sqlite(self):
|
||||||
|
"""Short version to check if we are using sqlite3 as a file."""
|
||||||
|
return self.db_url != SQLITE_URL_PREFIX and self.db_url.startswith(
|
||||||
|
SQLITE_URL_PREFIX
|
||||||
|
)
|
||||||
|
|
||||||
def _close_connection(self):
|
def _close_connection(self):
|
||||||
"""Close the connection."""
|
"""Close the connection."""
|
||||||
self.engine.dispose()
|
self.engine.dispose()
|
||||||
@ -652,12 +675,18 @@ class Recorder(threading.Thread):
|
|||||||
session.flush()
|
session.flush()
|
||||||
session.expunge(self.run_info)
|
session.expunge(self.run_info)
|
||||||
|
|
||||||
def _close_run(self):
|
def _shutdown(self):
|
||||||
"""Save end time for current run."""
|
"""Save end time for current run."""
|
||||||
if self.event_session is not None:
|
if self.event_session is not None:
|
||||||
self.run_info.end = dt_util.utcnow()
|
self.run_info.end = dt_util.utcnow()
|
||||||
self.event_session.add(self.run_info)
|
self.event_session.add(self.run_info)
|
||||||
self._commit_event_session_or_retry()
|
try:
|
||||||
self.event_session.close()
|
self._commit_event_session_or_retry()
|
||||||
|
self.event_session.close()
|
||||||
|
except Exception as err: # pylint: disable=broad-except
|
||||||
|
_LOGGER.exception(
|
||||||
|
"Error saving the event session during shutdown: %s", err
|
||||||
|
)
|
||||||
|
|
||||||
self.run_info = None
|
self.run_info = None
|
||||||
|
self._close_connection()
|
||||||
|
@ -112,19 +112,24 @@ def execute(qry, to_native=False, validate_entity_ids=True):
|
|||||||
|
|
||||||
def validate_or_move_away_sqlite_database(dburl: str, db_integrity_check: bool) -> bool:
|
def validate_or_move_away_sqlite_database(dburl: str, db_integrity_check: bool) -> bool:
|
||||||
"""Ensure that the database is valid or move it away."""
|
"""Ensure that the database is valid or move it away."""
|
||||||
dbpath = dburl[len(SQLITE_URL_PREFIX) :]
|
dbpath = dburl_to_path(dburl)
|
||||||
|
|
||||||
if not os.path.exists(dbpath):
|
if not os.path.exists(dbpath):
|
||||||
# Database does not exist yet, this is OK
|
# Database does not exist yet, this is OK
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if not validate_sqlite_database(dbpath, db_integrity_check):
|
if not validate_sqlite_database(dbpath, db_integrity_check):
|
||||||
_move_away_broken_database(dbpath)
|
move_away_broken_database(dbpath)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def dburl_to_path(dburl):
|
||||||
|
"""Convert the db url into a filesystem path."""
|
||||||
|
return dburl[len(SQLITE_URL_PREFIX) :]
|
||||||
|
|
||||||
|
|
||||||
def last_run_was_recently_clean(cursor):
|
def last_run_was_recently_clean(cursor):
|
||||||
"""Verify the last recorder run was recently clean."""
|
"""Verify the last recorder run was recently clean."""
|
||||||
|
|
||||||
@ -208,7 +213,7 @@ def run_checks_on_open_db(dbpath, cursor, db_integrity_check):
|
|||||||
cursor.execute("PRAGMA QUICK_CHECK")
|
cursor.execute("PRAGMA QUICK_CHECK")
|
||||||
|
|
||||||
|
|
||||||
def _move_away_broken_database(dbfile: str) -> None:
|
def move_away_broken_database(dbfile: str) -> None:
|
||||||
"""Move away a broken sqlite3 database."""
|
"""Move away a broken sqlite3 database."""
|
||||||
|
|
||||||
isotime = dt_util.utcnow().isoformat()
|
isotime = dt_util.utcnow().isoformat()
|
||||||
|
@ -10,14 +10,27 @@ from tests.common import fire_time_changed
|
|||||||
|
|
||||||
def wait_recording_done(hass):
|
def wait_recording_done(hass):
|
||||||
"""Block till recording is done."""
|
"""Block till recording is done."""
|
||||||
|
hass.block_till_done()
|
||||||
trigger_db_commit(hass)
|
trigger_db_commit(hass)
|
||||||
hass.block_till_done()
|
hass.block_till_done()
|
||||||
hass.data[recorder.DATA_INSTANCE].block_till_done()
|
hass.data[recorder.DATA_INSTANCE].block_till_done()
|
||||||
hass.block_till_done()
|
hass.block_till_done()
|
||||||
|
|
||||||
|
|
||||||
|
async def async_wait_recording_done(hass):
|
||||||
|
"""Block till recording is done."""
|
||||||
|
await hass.loop.run_in_executor(None, wait_recording_done, hass)
|
||||||
|
|
||||||
|
|
||||||
def trigger_db_commit(hass):
|
def trigger_db_commit(hass):
|
||||||
"""Force the recorder to commit."""
|
"""Force the recorder to commit."""
|
||||||
for _ in range(recorder.DEFAULT_COMMIT_INTERVAL):
|
for _ in range(recorder.DEFAULT_COMMIT_INTERVAL):
|
||||||
# We only commit on time change
|
# We only commit on time change
|
||||||
fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=1))
|
fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=1))
|
||||||
|
|
||||||
|
|
||||||
|
def corrupt_db_file(test_db_file):
|
||||||
|
"""Corrupt an sqlite3 database file."""
|
||||||
|
with open(test_db_file, "w+") as fhandle:
|
||||||
|
fhandle.seek(200)
|
||||||
|
fhandle.write("I am a corrupt db" * 100)
|
||||||
|
@ -6,6 +6,7 @@ from unittest.mock import patch
|
|||||||
from sqlalchemy.exc import OperationalError
|
from sqlalchemy.exc import OperationalError
|
||||||
|
|
||||||
from homeassistant.components.recorder import (
|
from homeassistant.components.recorder import (
|
||||||
|
CONF_DB_URL,
|
||||||
CONFIG_SCHEMA,
|
CONFIG_SCHEMA,
|
||||||
DOMAIN,
|
DOMAIN,
|
||||||
Recorder,
|
Recorder,
|
||||||
@ -13,7 +14,7 @@ from homeassistant.components.recorder import (
|
|||||||
run_information_from_instance,
|
run_information_from_instance,
|
||||||
run_information_with_session,
|
run_information_with_session,
|
||||||
)
|
)
|
||||||
from homeassistant.components.recorder.const import DATA_INSTANCE
|
from homeassistant.components.recorder.const import DATA_INSTANCE, SQLITE_URL_PREFIX
|
||||||
from homeassistant.components.recorder.models import Events, RecorderRuns, States
|
from homeassistant.components.recorder.models import Events, RecorderRuns, States
|
||||||
from homeassistant.components.recorder.util import session_scope
|
from homeassistant.components.recorder.util import session_scope
|
||||||
from homeassistant.const import (
|
from homeassistant.const import (
|
||||||
@ -26,7 +27,7 @@ from homeassistant.core import Context, CoreState, callback
|
|||||||
from homeassistant.setup import async_setup_component
|
from homeassistant.setup import async_setup_component
|
||||||
from homeassistant.util import dt as dt_util
|
from homeassistant.util import dt as dt_util
|
||||||
|
|
||||||
from .common import wait_recording_done
|
from .common import async_wait_recording_done, corrupt_db_file, wait_recording_done
|
||||||
|
|
||||||
from tests.common import (
|
from tests.common import (
|
||||||
async_init_recorder_component,
|
async_init_recorder_component,
|
||||||
@ -519,3 +520,52 @@ def test_run_information(hass_recorder):
|
|||||||
|
|
||||||
class CannotSerializeMe:
|
class CannotSerializeMe:
|
||||||
"""A class that the JSONEncoder cannot serialize."""
|
"""A class that the JSONEncoder cannot serialize."""
|
||||||
|
|
||||||
|
|
||||||
|
async def test_database_corruption_while_running(hass, tmpdir, caplog):
|
||||||
|
"""Test we can recover from sqlite3 db corruption."""
|
||||||
|
|
||||||
|
def _create_tmpdir_for_test_db():
|
||||||
|
return tmpdir.mkdir("sqlite").join("test.db")
|
||||||
|
|
||||||
|
test_db_file = await hass.async_add_executor_job(_create_tmpdir_for_test_db)
|
||||||
|
dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}"
|
||||||
|
|
||||||
|
assert await async_setup_component(hass, DOMAIN, {DOMAIN: {CONF_DB_URL: dburl}})
|
||||||
|
await hass.async_block_till_done()
|
||||||
|
caplog.clear()
|
||||||
|
|
||||||
|
hass.states.async_set("test.lost", "on", {})
|
||||||
|
|
||||||
|
await async_wait_recording_done(hass)
|
||||||
|
await hass.async_add_executor_job(corrupt_db_file, test_db_file)
|
||||||
|
await async_wait_recording_done(hass)
|
||||||
|
|
||||||
|
# This state will not be recorded because
|
||||||
|
# the database corruption will be discovered
|
||||||
|
# and we will have to rollback to recover
|
||||||
|
hass.states.async_set("test.one", "off", {})
|
||||||
|
await async_wait_recording_done(hass)
|
||||||
|
|
||||||
|
assert "Unrecoverable sqlite3 database corruption detected" in caplog.text
|
||||||
|
assert "The system will rename the corrupt database file" in caplog.text
|
||||||
|
assert "Connected to recorder database" in caplog.text
|
||||||
|
|
||||||
|
# This state should go into the new database
|
||||||
|
hass.states.async_set("test.two", "on", {})
|
||||||
|
await async_wait_recording_done(hass)
|
||||||
|
|
||||||
|
def _get_last_state():
|
||||||
|
with session_scope(hass=hass) as session:
|
||||||
|
db_states = list(session.query(States))
|
||||||
|
assert len(db_states) == 1
|
||||||
|
assert db_states[0].event_id > 0
|
||||||
|
return db_states[0].to_native()
|
||||||
|
|
||||||
|
state = await hass.async_add_executor_job(_get_last_state)
|
||||||
|
assert state.entity_id == "test.two"
|
||||||
|
assert state.state == "on"
|
||||||
|
|
||||||
|
hass.bus.async_fire(EVENT_HOMEASSISTANT_STOP)
|
||||||
|
await hass.async_block_till_done()
|
||||||
|
hass.stop()
|
||||||
|
@ -10,7 +10,7 @@ from homeassistant.components.recorder import util
|
|||||||
from homeassistant.components.recorder.const import DATA_INSTANCE, SQLITE_URL_PREFIX
|
from homeassistant.components.recorder.const import DATA_INSTANCE, SQLITE_URL_PREFIX
|
||||||
from homeassistant.util import dt as dt_util
|
from homeassistant.util import dt as dt_util
|
||||||
|
|
||||||
from .common import wait_recording_done
|
from .common import corrupt_db_file, wait_recording_done
|
||||||
|
|
||||||
from tests.common import get_test_home_assistant, init_recorder_component
|
from tests.common import get_test_home_assistant, init_recorder_component
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ def test_validate_or_move_away_sqlite_database_with_integrity_check(
|
|||||||
util.validate_or_move_away_sqlite_database(dburl, db_integrity_check) is False
|
util.validate_or_move_away_sqlite_database(dburl, db_integrity_check) is False
|
||||||
)
|
)
|
||||||
|
|
||||||
_corrupt_db_file(test_db_file)
|
corrupt_db_file(test_db_file)
|
||||||
|
|
||||||
assert util.validate_sqlite_database(dburl, db_integrity_check) is False
|
assert util.validate_sqlite_database(dburl, db_integrity_check) is False
|
||||||
|
|
||||||
@ -127,7 +127,7 @@ def test_validate_or_move_away_sqlite_database_without_integrity_check(
|
|||||||
util.validate_or_move_away_sqlite_database(dburl, db_integrity_check) is False
|
util.validate_or_move_away_sqlite_database(dburl, db_integrity_check) is False
|
||||||
)
|
)
|
||||||
|
|
||||||
_corrupt_db_file(test_db_file)
|
corrupt_db_file(test_db_file)
|
||||||
|
|
||||||
assert util.validate_sqlite_database(dburl, db_integrity_check) is False
|
assert util.validate_sqlite_database(dburl, db_integrity_check) is False
|
||||||
|
|
||||||
@ -150,7 +150,7 @@ def test_last_run_was_recently_clean(hass_recorder):
|
|||||||
|
|
||||||
assert util.last_run_was_recently_clean(cursor) is False
|
assert util.last_run_was_recently_clean(cursor) is False
|
||||||
|
|
||||||
hass.data[DATA_INSTANCE]._close_run()
|
hass.data[DATA_INSTANCE]._shutdown()
|
||||||
wait_recording_done(hass)
|
wait_recording_done(hass)
|
||||||
|
|
||||||
assert util.last_run_was_recently_clean(cursor) is True
|
assert util.last_run_was_recently_clean(cursor) is True
|
||||||
@ -244,10 +244,3 @@ def test_combined_checks(hass_recorder, caplog):
|
|||||||
caplog.clear()
|
caplog.clear()
|
||||||
with pytest.raises(sqlite3.DatabaseError):
|
with pytest.raises(sqlite3.DatabaseError):
|
||||||
util.run_checks_on_open_db("fake_db_path", cursor, True)
|
util.run_checks_on_open_db("fake_db_path", cursor, True)
|
||||||
|
|
||||||
|
|
||||||
def _corrupt_db_file(test_db_file):
|
|
||||||
"""Corrupt an sqlite3 database file."""
|
|
||||||
f = open(test_db_file, "a")
|
|
||||||
f.write("I am a corrupt db")
|
|
||||||
f.close()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user