mirror of
https://github.com/home-assistant/core.git
synced 2025-04-24 01:08:12 +00:00
Auto repair incorrect collation on MySQL schema (#92270)
* Auto repair incorrect collation on MySQL schema As we do more union queries in 2023.5.x if there is a mismatch between collations on tables, they will fail with an error that is hard for the user to figure out how to fix `Error executing query: (MySQLdb.OperationalError) (1271, "Illegal mix of collations for operation UNION")` This was reported in the #beta channel and by PM from others so the problem is not isolated to a single user https://discord.com/channels/330944238910963714/427516175237382144/1100908739910963272 * test with ascii since older maraidb versions may not work otherwise * Revert "test with ascii since older maraidb versions may not work otherwise" This reverts commit 787fda1aefcd8418a28a8a8f430e7e7232218ef8.t * older version need to check collation_server because the collation is not reflected if its the default
This commit is contained in:
parent
3a5a9a90b2
commit
1a82b353e0
@ -8,6 +8,7 @@ from ..schema import (
|
||||
correct_db_schema_precision,
|
||||
correct_db_schema_utf8,
|
||||
validate_db_schema_precision,
|
||||
validate_table_schema_has_correct_collation,
|
||||
validate_table_schema_supports_utf8,
|
||||
)
|
||||
|
||||
@ -17,9 +18,12 @@ if TYPE_CHECKING:
|
||||
|
||||
def validate_db_schema(instance: Recorder) -> set[str]:
|
||||
"""Do some basic checks for common schema errors caused by manual migration."""
|
||||
return validate_table_schema_supports_utf8(
|
||||
schema_errors = validate_table_schema_supports_utf8(
|
||||
instance, EventData, (EventData.shared_data,)
|
||||
) | validate_db_schema_precision(instance, Events)
|
||||
for table in (Events, EventData):
|
||||
schema_errors |= validate_table_schema_has_correct_collation(instance, table)
|
||||
return schema_errors
|
||||
|
||||
|
||||
def correct_db_schema(
|
||||
@ -27,5 +31,6 @@ def correct_db_schema(
|
||||
schema_errors: set[str],
|
||||
) -> None:
|
||||
"""Correct issues detected by validate_db_schema."""
|
||||
correct_db_schema_utf8(instance, EventData, schema_errors)
|
||||
for table in (Events, EventData):
|
||||
correct_db_schema_utf8(instance, table, schema_errors)
|
||||
correct_db_schema_precision(instance, Events, schema_errors)
|
||||
|
@ -5,6 +5,7 @@ from collections.abc import Iterable, Mapping
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import MetaData
|
||||
from sqlalchemy.exc import OperationalError
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
from sqlalchemy.orm.attributes import InstrumentedAttribute
|
||||
@ -60,6 +61,60 @@ def validate_table_schema_supports_utf8(
|
||||
return schema_errors
|
||||
|
||||
|
||||
def validate_table_schema_has_correct_collation(
|
||||
instance: Recorder,
|
||||
table_object: type[DeclarativeBase],
|
||||
) -> set[str]:
|
||||
"""Verify the table has the correct collation."""
|
||||
schema_errors: set[str] = set()
|
||||
# Lack of full utf8 support is only an issue for MySQL / MariaDB
|
||||
if instance.dialect_name != SupportedDialect.MYSQL:
|
||||
return schema_errors
|
||||
|
||||
try:
|
||||
schema_errors = _validate_table_schema_has_correct_collation(
|
||||
instance, table_object
|
||||
)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
_LOGGER.exception("Error when validating DB schema: %s", exc)
|
||||
|
||||
_log_schema_errors(table_object, schema_errors)
|
||||
return schema_errors
|
||||
|
||||
|
||||
def _validate_table_schema_has_correct_collation(
|
||||
instance: Recorder,
|
||||
table_object: type[DeclarativeBase],
|
||||
) -> set[str]:
|
||||
"""Ensure the table has the correct collation to avoid union errors with mixed collations."""
|
||||
schema_errors: set[str] = set()
|
||||
# Mark the session as read_only to ensure that the test data is not committed
|
||||
# to the database and we always rollback when the scope is exited
|
||||
with session_scope(session=instance.get_session(), read_only=True) as session:
|
||||
table = table_object.__tablename__
|
||||
metadata_obj = MetaData()
|
||||
connection = session.connection()
|
||||
metadata_obj.reflect(bind=connection)
|
||||
dialect_kwargs = metadata_obj.tables[table].dialect_kwargs
|
||||
# Check if the table has a collation set, if its not set than its
|
||||
# using the server default collation for the database
|
||||
|
||||
collate = (
|
||||
dialect_kwargs.get("mysql_collate")
|
||||
or dialect_kwargs.get(
|
||||
"mariadb_collate"
|
||||
) # pylint: disable-next=protected-access
|
||||
or connection.dialect._fetch_setting(connection, "collation_server") # type: ignore[attr-defined]
|
||||
)
|
||||
if collate and collate != "utf8mb4_unicode_ci":
|
||||
_LOGGER.debug(
|
||||
"Database %s collation is not utf8mb4_unicode_ci",
|
||||
table,
|
||||
)
|
||||
schema_errors.add(f"{table}.utf8mb4_unicode_ci")
|
||||
return schema_errors
|
||||
|
||||
|
||||
def _validate_table_schema_supports_utf8(
|
||||
instance: Recorder,
|
||||
table_object: type[DeclarativeBase],
|
||||
@ -184,7 +239,10 @@ def correct_db_schema_utf8(
|
||||
) -> None:
|
||||
"""Correct utf8 issues detected by validate_db_schema."""
|
||||
table_name = table_object.__tablename__
|
||||
if f"{table_name}.4-byte UTF-8" in schema_errors:
|
||||
if (
|
||||
f"{table_name}.4-byte UTF-8" in schema_errors
|
||||
or f"{table_name}.utf8mb4_unicode_ci" in schema_errors
|
||||
):
|
||||
from ..migration import ( # pylint: disable=import-outside-toplevel
|
||||
_correct_table_character_set_and_collation,
|
||||
)
|
||||
|
@ -8,6 +8,7 @@ from ..schema import (
|
||||
correct_db_schema_precision,
|
||||
correct_db_schema_utf8,
|
||||
validate_db_schema_precision,
|
||||
validate_table_schema_has_correct_collation,
|
||||
validate_table_schema_supports_utf8,
|
||||
)
|
||||
|
||||
@ -26,6 +27,8 @@ def validate_db_schema(instance: Recorder) -> set[str]:
|
||||
for table, columns in TABLE_UTF8_COLUMNS.items():
|
||||
schema_errors |= validate_table_schema_supports_utf8(instance, table, columns)
|
||||
schema_errors |= validate_db_schema_precision(instance, States)
|
||||
for table in (States, StateAttributes):
|
||||
schema_errors |= validate_table_schema_has_correct_collation(instance, table)
|
||||
return schema_errors
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ from ..schema import (
|
||||
correct_db_schema_precision,
|
||||
correct_db_schema_utf8,
|
||||
validate_db_schema_precision,
|
||||
validate_table_schema_has_correct_collation,
|
||||
validate_table_schema_supports_utf8,
|
||||
)
|
||||
|
||||
@ -26,6 +27,7 @@ def validate_db_schema(instance: Recorder) -> set[str]:
|
||||
)
|
||||
for table in (Statistics, StatisticsShortTerm):
|
||||
schema_errors |= validate_db_schema_precision(instance, table)
|
||||
schema_errors |= validate_table_schema_has_correct_collation(instance, table)
|
||||
if schema_errors:
|
||||
_LOGGER.debug(
|
||||
"Detected statistics schema errors: %s", ", ".join(sorted(schema_errors))
|
||||
@ -41,3 +43,4 @@ def correct_db_schema(
|
||||
correct_db_schema_utf8(instance, StatisticsMeta, schema_errors)
|
||||
for table in (Statistics, StatisticsShortTerm):
|
||||
correct_db_schema_precision(instance, table, schema_errors)
|
||||
correct_db_schema_utf8(instance, table, schema_errors)
|
||||
|
@ -74,3 +74,32 @@ async def test_validate_db_schema_fix_utf8_issue_event_data(
|
||||
"Updating character set and collation of table event_data to utf8mb4"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enable_schema_validation", [True])
|
||||
async def test_validate_db_schema_fix_collation_issue(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test validating DB schema with MySQL.
|
||||
|
||||
Note: The test uses SQLite, the purpose is only to exercise the code.
|
||||
"""
|
||||
with patch(
|
||||
"homeassistant.components.recorder.core.Recorder.dialect_name", "mysql"
|
||||
), patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"events.utf8mb4_unicode_ci"},
|
||||
):
|
||||
await async_setup_recorder_instance(hass)
|
||||
await async_wait_recording_done(hass)
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: events.utf8mb4_unicode_ci"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table events to utf8mb4" in caplog.text
|
||||
)
|
||||
|
@ -104,3 +104,32 @@ async def test_validate_db_schema_fix_utf8_issue_state_attributes(
|
||||
"Updating character set and collation of table state_attributes to utf8mb4"
|
||||
in caplog.text
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enable_schema_validation", [True])
|
||||
async def test_validate_db_schema_fix_collation_issue(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test validating DB schema with MySQL.
|
||||
|
||||
Note: The test uses SQLite, the purpose is only to exercise the code.
|
||||
"""
|
||||
with patch(
|
||||
"homeassistant.components.recorder.core.Recorder.dialect_name", "mysql"
|
||||
), patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"states.utf8mb4_unicode_ci"},
|
||||
):
|
||||
await async_setup_recorder_instance(hass)
|
||||
await async_wait_recording_done(hass)
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: states.utf8mb4_unicode_ci"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table states to utf8mb4" in caplog.text
|
||||
)
|
||||
|
@ -83,3 +83,33 @@ async def test_validate_db_schema_fix_float_issue(
|
||||
"sum DOUBLE PRECISION",
|
||||
]
|
||||
modify_columns_mock.assert_called_once_with(ANY, ANY, table, modification)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("enable_schema_validation", [True])
|
||||
async def test_validate_db_schema_fix_collation_issue(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test validating DB schema with MySQL.
|
||||
|
||||
Note: The test uses SQLite, the purpose is only to exercise the code.
|
||||
"""
|
||||
with patch(
|
||||
"homeassistant.components.recorder.core.Recorder.dialect_name", "mysql"
|
||||
), patch(
|
||||
"homeassistant.components.recorder.auto_repairs.schema._validate_table_schema_has_correct_collation",
|
||||
return_value={"statistics.utf8mb4_unicode_ci"},
|
||||
):
|
||||
await async_setup_recorder_instance(hass)
|
||||
await async_wait_recording_done(hass)
|
||||
|
||||
assert "Schema validation failed" not in caplog.text
|
||||
assert (
|
||||
"Database is about to correct DB schema errors: statistics.utf8mb4_unicode_ci"
|
||||
in caplog.text
|
||||
)
|
||||
assert (
|
||||
"Updating character set and collation of table statistics to utf8mb4"
|
||||
in caplog.text
|
||||
)
|
||||
|
@ -10,6 +10,7 @@ from homeassistant.components.recorder.auto_repairs.schema import (
|
||||
correct_db_schema_precision,
|
||||
correct_db_schema_utf8,
|
||||
validate_db_schema_precision,
|
||||
validate_table_schema_has_correct_collation,
|
||||
validate_table_schema_supports_utf8,
|
||||
)
|
||||
from homeassistant.components.recorder.db_schema import States
|
||||
@ -106,6 +107,69 @@ async def test_validate_db_schema_fix_utf8_issue_with_broken_schema(
|
||||
assert schema_errors == set()
|
||||
|
||||
|
||||
async def test_validate_db_schema_fix_incorrect_collation(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
recorder_db_url: str,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test validating DB schema with MySQL when the collation is incorrect."""
|
||||
if not recorder_db_url.startswith("mysql://"):
|
||||
# This problem only happens on MySQL
|
||||
return
|
||||
await async_setup_recorder_instance(hass)
|
||||
await async_wait_recording_done(hass)
|
||||
instance = get_instance(hass)
|
||||
session_maker = instance.get_session
|
||||
|
||||
def _break_states_schema():
|
||||
with session_scope(session=session_maker()) as session:
|
||||
session.execute(
|
||||
text(
|
||||
"ALTER TABLE states CHARACTER SET utf8mb3 COLLATE utf8_general_ci, "
|
||||
"LOCK=EXCLUSIVE;"
|
||||
)
|
||||
)
|
||||
|
||||
await instance.async_add_executor_job(_break_states_schema)
|
||||
schema_errors = await instance.async_add_executor_job(
|
||||
validate_table_schema_has_correct_collation, instance, States
|
||||
)
|
||||
assert schema_errors == {"states.utf8mb4_unicode_ci"}
|
||||
|
||||
# Now repair the schema
|
||||
await instance.async_add_executor_job(
|
||||
correct_db_schema_utf8, instance, States, schema_errors
|
||||
)
|
||||
|
||||
# Now validate the schema again
|
||||
schema_errors = await instance.async_add_executor_job(
|
||||
validate_table_schema_has_correct_collation, instance, States
|
||||
)
|
||||
assert schema_errors == set()
|
||||
|
||||
|
||||
async def test_validate_db_schema_precision_correct_collation(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
recorder_db_url: str,
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Test validating DB schema when the schema is correct with the correct collation."""
|
||||
if not recorder_db_url.startswith("mysql://"):
|
||||
# This problem only happens on MySQL
|
||||
return
|
||||
await async_setup_recorder_instance(hass)
|
||||
await async_wait_recording_done(hass)
|
||||
instance = get_instance(hass)
|
||||
schema_errors = await instance.async_add_executor_job(
|
||||
validate_table_schema_has_correct_collation,
|
||||
instance,
|
||||
States,
|
||||
)
|
||||
assert schema_errors == set()
|
||||
|
||||
|
||||
async def test_validate_db_schema_fix_utf8_issue_with_broken_schema_unrepairable(
|
||||
async_setup_recorder_instance: RecorderInstanceGenerator,
|
||||
hass: HomeAssistant,
|
||||
|
Loading…
x
Reference in New Issue
Block a user