Compare commits

...

13 Commits

Author SHA1 Message Date
Robert Resch
7d2818d882 Improve printing 2025-03-28 23:06:28 +01:00
Robert Resch
16c56d9f6b Improve script 2025-03-28 22:40:12 +01:00
Robert Resch
98f32c204b Fix script call 2025-03-28 21:21:52 +01:00
Robert Resch
0c7a2759c3 fix 2025-03-28 20:53:13 +01:00
Robert Resch
b869546c12 create dedicated python script to combine reports 2025-03-28 20:20:30 +01:00
Robert Resch
b7b18d2f51 Print not measured files at the end 2025-03-28 19:26:34 +01:00
Robert Resch
b447f62bda Small improvements 2025-03-27 18:38:14 +01:00
Robert Resch
ffc0dae6a4 Fix file name 2025-03-27 18:09:07 +01:00
Robert Resch
269a21584b Fix cleanup 2025-03-27 17:10:59 +01:00
Robert Resch
219a8cdf40 Improve report plugin 2025-03-27 16:53:50 +01:00
Robert Resch
9cc61d46cf typo 2025-03-27 16:22:37 +01:00
Robert Resch
b42f1395fa Fix split tests 2025-03-27 16:17:47 +01:00
Robert Resch
7b0e4871da Split test by execution time 2025-03-27 15:20:05 +01:00
5 changed files with 331 additions and 76 deletions

View File

@@ -876,15 +876,6 @@ jobs:
- mypy
name: Split tests for full run
steps:
- name: Install additional OS dependencies
run: |
sudo rm /etc/apt/sources.list.d/microsoft-prod.list
sudo apt-get update
sudo apt-get -y install \
bluez \
ffmpeg \
libturbojpeg \
libgammu-dev
- name: Check out code from GitHub
uses: actions/checkout@v4.2.2
- name: Set up Python ${{ env.DEFAULT_PYTHON }}
@@ -893,6 +884,18 @@ jobs:
with:
python-version: ${{ env.DEFAULT_PYTHON }}
check-latest: true
- name: Generate partial pytest execution time restore key
id: generate-pytest-execution-time-report-key
run: |
echo "key=pytest-execution-time-report-$(date -u '+%Y-%m-%dT%H:%M:%s')" >> $GITHUB_OUTPUT
- name: Restore pytest execution time cache
uses: actions/cache/restore@v4.2.3
with:
path: pytest-execution-time-report.json
key: >-
${{ runner.os }}-${{ steps.generate-pytest-execution-time-report-key.outputs.key }}
restore-keys: |
${{ runner.os }}-pytest-execution-time-report-
- name: Restore base Python virtual environment
id: cache-venv
uses: actions/cache/restore@v4.2.3
@@ -905,7 +908,8 @@ jobs:
- name: Run split_tests.py
run: |
. venv/bin/activate
python -m script.split_tests ${{ needs.info.outputs.test_group_count }} tests
python -m script.split_tests ${{ needs.info.outputs.test_group_count }} \
tests pytest-execution-time-report.json
- name: Upload pytest_buckets
uses: actions/upload-artifact@v4.6.2
with:
@@ -1002,6 +1006,7 @@ jobs:
${cov_params[@]} \
-o console_output_style=count \
-p no:sugar \
--execution-time-report-name pytest-execution-time-report-${{ matrix.python-version }}-${{ matrix.group }}.json \
--exclude-warning-annotations \
$(sed -n "${{ matrix.group }},1p" pytest_buckets.txt) \
2>&1 | tee pytest-${{ matrix.python-version }}-${{ matrix.group }}.txt
@@ -1010,7 +1015,9 @@ jobs:
uses: actions/upload-artifact@v4.6.2
with:
name: pytest-${{ github.run_number }}-${{ matrix.python-version }}-${{ matrix.group }}
path: pytest-*.txt
path: |
pytest-*.txt
pytest-*.json
overwrite: true
- name: Upload coverage artifact
if: needs.info.outputs.skip_coverage != 'true'
@@ -1025,12 +1032,60 @@ jobs:
with:
name: test-results-full-${{ matrix.python-version }}-${{ matrix.group }}
path: junit.xml
- name: Remove pytest_buckets
run: rm pytest_buckets.txt
- name: Check dirty
run: |
./script/check_dirty
pytest-combine-test-execution-time:
runs-on: ubuntu-24.04
needs:
- info
- pytest-full
name: Combine test execution times
steps:
- name: Check out code from GitHub
uses: actions/checkout@v4.2.2
- name: Set up Python ${{ env.DEFAULT_PYTHON }}
id: python
uses: actions/setup-python@v5.5.0
with:
python-version: ${{ env.DEFAULT_PYTHON }}
check-latest: true
- name: Restore base Python virtual environment
id: cache-venv
uses: actions/cache/restore@v4.2.3
with:
path: venv
fail-on-cache-miss: true
key: >-
${{ runner.os }}-${{ steps.python.outputs.python-version }}-${{
needs.info.outputs.python_cache_key }}
- name: Generate partial pytest execution time restore key
id: generate-pytest-execution-time-report-key
run: |
echo "key=pytest-execution-time-report-$(date -u '+%Y-%m-%dT%H:%M:%s')" >> $GITHUB_OUTPUT
- name: Download pytest execution time artifacts
uses: actions/download-artifact@v4.2.1
with:
pattern: pytest-${{ github.run_number }}-${{ env.DEFAULT_PYTHON }}-*
merge-multiple: true
- name: Combine files into one
run: |
. venv/bin/activate
python -m script.merge_pytest_execution_time_reports "pytest-execution-time-report-${{ env.DEFAULT_PYTHON }}-*.json"
- name: Upload combined pytest execution time artifact
uses: actions/upload-artifact@v4.6.2
with:
name: pytest-execution-time-report-${{ github.run_number }}
path: pytest-execution-time-report.json
- name: Save pytest execution time cache
uses: actions/cache/save@v4.2.3
with:
path: pytest-execution-time-report.json
key: >-
${{ runner.os }}-${{
steps.generate-pytest-execution-time-report-key.outputs.key }}
pytest-mariadb:
runs-on: ubuntu-24.04
services:

5
.gitignore vendored
View File

@@ -137,4 +137,7 @@ tmp_cache
.ropeproject
# Will be created from script/split_tests.py
pytest_buckets.txt
pytest_buckets.txt
# Contains test execution times used for splitting tests
pytest-execution-time-report*.json

View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
"""Helper script to merge all pytest execution time reports into one file."""
from __future__ import annotations
import argparse
import pathlib
from homeassistant.helpers.json import save_json
from homeassistant.util.json import load_json_object
def merge_json_files(pattern: str, output_file: str) -> None:
"""Merge JSON files matching the pattern into a single JSON file."""
# Needs to be in sync with PytestExecutionTimeReport in conftest.py
result: dict[str, float] = {}
for file in pathlib.Path().glob(pattern):
print(f"Processing {file}")
data = load_json_object(file)
if not isinstance(data, dict):
print(f"Skipping {file} due to invalid data format.")
continue
for key, value in data.items():
if not isinstance(value, (int, float)):
print(
f"Skipping {key} in {file} due to invalid value type: {type(value)}."
)
continue
if key in result:
result[key] += value
else:
result[key] = value
# Write the merged data to the output file
save_json(output_file, result)
def main() -> None:
"""Execute script."""
parser = argparse.ArgumentParser(
description="Merge all pytest execution time reports into one file."
)
parser.add_argument(
"pattern",
help="Glob pattern to match JSON pytest execution time report files",
type=str,
)
parser.add_argument(
"output_file",
help="Path to the output file",
type=str,
nargs="?",
default="pytest-execution-time-report.json",
)
arguments = parser.parse_args()
merge_json_files(arguments.pattern, arguments.output_file)
if __name__ == "__main__":
main()

View File

@@ -5,11 +5,11 @@ from __future__ import annotations
import argparse
from dataclasses import dataclass, field
from math import ceil
from datetime import timedelta
from pathlib import Path
import subprocess
import sys
from typing import Final
from typing import Final, cast
from homeassistant.util.json import load_json_object
class Bucket:
@@ -19,13 +19,15 @@ class Bucket:
self,
):
"""Initialize bucket."""
self.total_tests = 0
self.approx_execution_time = timedelta(seconds=0)
self.not_measured_files = 0
self._paths: list[str] = []
def add(self, part: TestFolder | TestFile) -> None:
"""Add tests to bucket."""
part.add_to_bucket()
self.total_tests += part.total_tests
self.approx_execution_time += part.approx_execution_time
self.not_measured_files += part.not_measured_files
self._paths.append(str(part.path))
def get_paths_line(self) -> str:
@@ -33,64 +35,132 @@ class Bucket:
return " ".join(self._paths) + "\n"
def add_not_measured_files(
test: TestFolder | TestFile, not_measured_files: set[TestFile]
) -> None:
"""Add not measured files to test folder."""
if test.not_measured_files > 0:
if isinstance(test, TestFolder):
for child in test.children.values():
add_not_measured_files(child, not_measured_files)
else:
not_measured_files.add(test)
def sort_by_not_measured(bucket: Bucket) -> tuple[int, float]:
"""Sort by not measured files."""
return (bucket.not_measured_files, bucket.approx_execution_time.total_seconds())
def sort_by_execution_time(bucket: Bucket) -> tuple[float, int]:
"""Sort by execution time."""
return (bucket.approx_execution_time.total_seconds(), bucket.not_measured_files)
class BucketHolder:
"""Class to hold buckets."""
def __init__(self, tests_per_bucket: int, bucket_count: int) -> None:
def __init__(self, bucket_count: int) -> None:
"""Initialize bucket holder."""
self._tests_per_bucket = tests_per_bucket
self._bucket_count = bucket_count
self._buckets: list[Bucket] = [Bucket() for _ in range(bucket_count)]
def split_tests(self, test_folder: TestFolder) -> None:
"""Split tests into buckets."""
digits = len(str(test_folder.total_tests))
avg_execution_time = test_folder.approx_execution_time / self._bucket_count
avg_not_measured_files = test_folder.not_measured_files / self._bucket_count
sorted_tests = sorted(
test_folder.get_all_flatten(), reverse=True, key=lambda x: x.total_tests
test_folder.get_all_flatten(),
key=lambda x: (
-x.approx_execution_time,
-x.count_children() if isinstance(x, TestFolder) else 0,
x.not_measured_files,
),
)
not_measured_tests = set()
for tests in sorted_tests:
if tests.added_to_bucket:
# Already added to bucket
continue
print(f"{tests.total_tests:>{digits}} tests in {tests.path}")
smallest_bucket = min(self._buckets, key=lambda x: x.total_tests)
print(f"~{tests.approx_execution_time} execution time for {tests.path}")
is_file = isinstance(tests, TestFile)
sort_key = sort_by_execution_time
if tests.not_measured_files and tests.approx_execution_time == 0:
# If tests are not measured, sort by not measured files
sort_key = sort_by_not_measured
smallest_bucket = min(self._buckets, key=sort_key)
if (
smallest_bucket.total_tests + tests.total_tests < self._tests_per_bucket
(smallest_bucket.approx_execution_time + tests.approx_execution_time)
< avg_execution_time
and (smallest_bucket.not_measured_files + tests.not_measured_files)
< avg_not_measured_files
) or is_file:
smallest_bucket.add(tests)
add_not_measured_files(
tests,
not_measured_tests,
)
# Ensure all files from the same folder are in the same bucket
# to ensure that syrupy correctly identifies unused snapshots
if is_file:
added_tests = []
for other_test in tests.parent.children.values():
if other_test is tests or isinstance(other_test, TestFolder):
continue
print(
f"{other_test.total_tests:>{digits}} tests in {other_test.path} (same bucket)"
)
smallest_bucket.add(other_test)
added_tests.append(other_test)
add_not_measured_files(
other_test,
not_measured_tests,
)
if added_tests:
print(
f"Added {len(added_tests)} tests to the same bucket so syrupy can identify unused snapshots"
)
print(
" - "
+ "\n - ".join(
str(test.path) for test in sorted(added_tests)
)
)
# verify that all tests are added to a bucket
if not test_folder.added_to_bucket:
raise ValueError("Not all tests are added to a bucket")
if not_measured_tests:
print(f"Found {len(not_measured_tests)} not measured test files: ")
for test in sorted(not_measured_tests, key=lambda x: x.path):
print(f" - {test.path}")
def create_ouput_file(self) -> None:
"""Create output file."""
with Path("pytest_buckets.txt").open("w") as file:
for idx, bucket in enumerate(self._buckets):
print(f"Bucket {idx + 1} has {bucket.total_tests} tests")
print(
f"Bucket {idx + 1} execution time should be ~{str_without_milliseconds(bucket.approx_execution_time)}"
f" with {bucket.not_measured_files} not measured files"
)
file.write(bucket.get_paths_line())
def str_without_milliseconds(td: timedelta) -> str:
"""Return str without milliseconds."""
return str(td).split(".")[0]
@dataclass
class TestFile:
"""Class represents a single test file and the number of tests it has."""
total_tests: int
path: Path
parent: TestFolder
# 0 means not measured
approx_execution_time: timedelta
added_to_bucket: bool = field(default=False, init=False)
parent: TestFolder | None = field(default=None, init=False)
def add_to_bucket(self) -> None:
"""Add test file to bucket."""
@@ -98,9 +168,18 @@ class TestFile:
raise ValueError("Already added to bucket")
self.added_to_bucket = True
@property
def not_measured_files(self) -> int:
"""Return files not measured."""
return 1 if self.approx_execution_time.total_seconds() == 0 else 0
def __gt__(self, other: TestFile) -> bool:
"""Return if greater than."""
return self.total_tests > other.total_tests
return self.approx_execution_time > other.approx_execution_time
def __hash__(self) -> int:
"""Return hash."""
return hash(self.path)
class TestFolder:
@@ -112,15 +191,31 @@ class TestFolder:
self.children: dict[Path, TestFolder | TestFile] = {}
@property
def total_tests(self) -> int:
"""Return total tests."""
return sum([test.total_tests for test in self.children.values()])
def approx_execution_time(self) -> timedelta:
"""Return approximate execution time."""
time = timedelta(seconds=0)
for test in self.children.values():
time += test.approx_execution_time
return time
@property
def not_measured_files(self) -> int:
"""Return files not measured."""
return sum([test.not_measured_files for test in self.children.values()])
@property
def added_to_bucket(self) -> bool:
"""Return if added to bucket."""
return all(test.added_to_bucket for test in self.children.values())
def count_children(self) -> int:
"""Return the number of children."""
return len(self.children) + sum(
child.count_children()
for child in self.children.values()
if isinstance(child, TestFolder)
)
def add_to_bucket(self) -> None:
"""Add test file to bucket."""
if self.added_to_bucket:
@@ -130,11 +225,18 @@ class TestFolder:
def __repr__(self) -> str:
"""Return representation."""
return (
f"TestFolder(total_tests={self.total_tests}, children={len(self.children)})"
return f"TestFolder(approx_execution_time={self.approx_execution_time}, children={len(self.children)})"
def add_test_file(
self, path: Path, execution_time: float, skip_file_if_present: bool
) -> None:
"""Add test file to folder."""
self._add_test_file(
TestFile(path, self, timedelta(seconds=execution_time)),
skip_file_if_present,
)
def add_test_file(self, file: TestFile) -> None:
def _add_test_file(self, file: TestFile, skip_file_if_present: bool) -> None:
"""Add test file to folder."""
path = file.path
file.parent = self
@@ -143,6 +245,10 @@ class TestFolder:
raise ValueError("Path is not a child of this folder")
if len(relative_path.parts) == 1:
if path in self.children:
if skip_file_if_present:
return
raise ValueError(f"File already exists: {path}")
self.children[path] = file
return
@@ -151,7 +257,7 @@ class TestFolder:
self.children[child_path] = child = TestFolder(child_path)
elif not isinstance(child, TestFolder):
raise ValueError("Child is not a folder")
child.add_test_file(file)
child._add_test_file(file, skip_file_if_present)
def get_all_flatten(self) -> list[TestFolder | TestFile]:
"""Return self and all children as flatten list."""
@@ -164,35 +270,21 @@ class TestFolder:
return result
def collect_tests(path: Path) -> TestFolder:
"""Collect all tests."""
result = subprocess.run(
["pytest", "--collect-only", "-qq", "-p", "no:warnings", path],
check=False,
capture_output=True,
text=True,
)
def process_execution_time_file(
execution_time_file: Path, test_folder: TestFolder
) -> None:
"""Process the execution time file."""
for file, execution_time in load_json_object(execution_time_file).items():
test_folder.add_test_file(Path(file), cast(float, execution_time), False)
if result.returncode != 0:
print("Failed to collect tests:")
print(result.stderr)
print(result.stdout)
sys.exit(1)
folder = TestFolder(path)
for line in result.stdout.splitlines():
if not line.strip():
continue
file_path, _, total_tests = line.partition(": ")
if not path or not total_tests:
print(f"Unexpected line: {line}")
sys.exit(1)
file = TestFile(int(total_tests), Path(file_path))
folder.add_test_file(file)
return folder
def add_missing_test_files(folder: Path, test_folder: TestFolder) -> None:
"""Scan test folder for missing files."""
for path in folder.iterdir():
if path.is_dir():
add_missing_test_files(path, test_folder)
elif path.name.startswith("test_") and path.suffix == ".py":
test_folder.add_test_file(path, 0.0, True)
def main() -> None:
@@ -213,24 +305,31 @@ def main() -> None:
type=check_greater_0,
)
parser.add_argument(
"path",
"test_folder",
help="Path to the test files to split into buckets",
type=Path,
)
parser.add_argument(
"execution_time_file",
help="Path to the file containing the execution time of each test",
type=Path,
)
arguments = parser.parse_args()
print("Collecting tests...")
tests = collect_tests(arguments.path)
tests_per_bucket = ceil(tests.total_tests / arguments.bucket_count)
tests = TestFolder(arguments.test_folder)
bucket_holder = BucketHolder(tests_per_bucket, arguments.bucket_count)
if arguments.execution_time_file.exists():
print(f"Using execution time file: {arguments.execution_time_file}")
process_execution_time_file(arguments.execution_time_file, tests)
print("Scanning test files...")
add_missing_test_files(arguments.test_folder, tests)
bucket_holder = BucketHolder(arguments.bucket_count)
print("Splitting tests...")
bucket_holder.split_tests(tests)
print(f"Total tests: {tests.total_tests}")
print(f"Estimated tests per bucket: {tests_per_bucket}")
bucket_holder.create_ouput_file()

View File

@@ -44,6 +44,7 @@ from syrupy.session import SnapshotSession
from homeassistant import block_async_io
from homeassistant.exceptions import ServiceNotFound
from homeassistant.helpers.json import save_json
# Setup patching of recorder functions before any other Home Assistant imports
from . import patch_recorder
@@ -51,6 +52,8 @@ from . import patch_recorder
# Setup patching of dt_util time functions before any other Home Assistant imports
from . import patch_time # noqa: F401, isort:skip
from _pytest.terminal import TerminalReporter
from homeassistant import components, core as ha, loader, runner
from homeassistant.auth.const import GROUP_ID_ADMIN, GROUP_ID_READ_ONLY
from homeassistant.auth.models import Credentials
@@ -123,6 +126,7 @@ if TYPE_CHECKING:
pytest.register_assert_rewrite("tests.common")
from .common import ( # noqa: E402, isort:skip
CLIENT_ID,
INSTANCES,
@@ -153,6 +157,38 @@ asyncio.set_event_loop_policy = lambda policy: None
def pytest_addoption(parser: pytest.Parser) -> None:
"""Register custom pytest options."""
parser.addoption("--dburl", action="store", default="sqlite://")
parser.addoption(
"--execution-time-report-name",
action="store",
default="pytest-execution-time-report.json",
)
class PytestExecutionTimeReport:
"""Pytest plugin to generate a JSON report with the execution time of each test."""
def pytest_terminal_summary(
self,
terminalreporter: TerminalReporter,
exitstatus: pytest.ExitCode,
config: pytest.Config,
) -> None:
"""Generate a JSON report with the execution time of each test."""
if config.option.collectonly:
return
data: dict[str, float] = {}
for replist in terminalreporter.stats.values():
for rep in replist:
if isinstance(rep, pytest.TestReport):
location = rep.location[0]
if location not in data:
data[location] = rep.duration
else:
data[location] += rep.duration
time_report_filename = config.option.execution_time_report_name
save_json(time_report_filename, data)
def pytest_configure(config: pytest.Config) -> None:
@@ -167,6 +203,7 @@ def pytest_configure(config: pytest.Config) -> None:
# Temporary workaround until it is finalised inside syrupy
# See https://github.com/syrupy-project/syrupy/pull/901
SnapshotSession.finish = override_syrupy_finish
config.pluginmanager.register(PytestExecutionTimeReport())
def pytest_runtest_setup() -> None: