Fix statistics_at_time query not using index (#88460)

* Fix statistics_at_time query not using index

fixes #82411

* fix refactoring error

* fix query so sqlalc does not get confused

* split it

* write as subquery

* reduce

* cleanup

* reduce

* Revert "reduce"

This reverts commit 43b4b55778207d87e3f24543a876ae73e4a7107a.
This commit is contained in:
J. Nick Koston 2023-02-19 20:07:31 -06:00 committed by GitHub
parent eac9ad8437
commit 0b311c8c7f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -16,7 +16,7 @@ import re
from statistics import mean
from typing import TYPE_CHECKING, Any, Literal, cast
from sqlalchemy import bindparam, func, lambda_stmt, select, text
from sqlalchemy import and_, bindparam, func, lambda_stmt, select, text
from sqlalchemy.engine import Engine
from sqlalchemy.engine.row import Row
from sqlalchemy.exc import OperationalError, SQLAlchemyError, StatementError
@ -1976,6 +1976,24 @@ def get_latest_short_term_statistics(
)
def _get_most_recent_statistics_subquery(
metadata_ids: set[int], table: type[StatisticsBase], start_time_ts: float
) -> Subquery:
"""Generate the subquery to find the most recent statistic row."""
return (
select(
# https://github.com/sqlalchemy/sqlalchemy/issues/9189
# pylint: disable-next=not-callable
func.max(table.start_ts).label("max_start_ts"),
table.metadata_id.label("max_metadata_id"),
)
.filter(table.start_ts < start_time_ts)
.filter(table.metadata_id.in_(metadata_ids))
.group_by(table.metadata_id)
.subquery()
)
def _statistics_at_time(
session: Session,
metadata_ids: set[int],
@ -1999,22 +2017,17 @@ def _statistics_at_time(
columns = columns.add_columns(table.sum)
start_time_ts = start_time.timestamp()
stmt = lambda_stmt(lambda: columns)
most_recent_statistic_ids = (
# https://github.com/sqlalchemy/sqlalchemy/issues/9189
# pylint: disable-next=not-callable
lambda_stmt(lambda: select(func.max(table.id).label("max_id")))
.filter(table.start_ts < start_time_ts)
.filter(table.metadata_id.in_(metadata_ids))
.group_by(table.metadata_id)
.subquery()
most_recent_statistic_ids = _get_most_recent_statistics_subquery(
metadata_ids, table, start_time_ts
)
stmt += lambda q: q.join(
stmt = lambda_stmt(lambda: columns).join(
most_recent_statistic_ids,
table.id == most_recent_statistic_ids.c.max_id,
and_(
table.start_ts == most_recent_statistic_ids.c.max_start_ts,
table.metadata_id == most_recent_statistic_ids.c.max_metadata_id,
),
)
return cast(Sequence[Row], execute_stmt_lambda_element(session, stmt))