Improve printing

Improve script
Fix script call
2025-11-12 20:40:18 +00:00 · 2025-03-28 23:06:28 +01:00 · 2025-03-28 22:40:12 +01:00 · 2025-03-28 21:21:52 +01:00 · 2025-03-28 20:53:13 +01:00 · 2025-03-28 20:20:30 +01:00
5 changed files with 331 additions and 76 deletions
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -876,15 +876,6 @@ jobs:
      - mypy
    name: Split tests for full run
    steps:
-      - name: Install additional OS dependencies
-        run: |
-          sudo rm /etc/apt/sources.list.d/microsoft-prod.list
-          sudo apt-get update
-          sudo apt-get -y install \
-            bluez \
-            ffmpeg \
-            libturbojpeg \
-            libgammu-dev
      - name: Check out code from GitHub
        uses: actions/checkout@v4.2.2
      - name: Set up Python ${{ env.DEFAULT_PYTHON }}
@@ -893,6 +884,18 @@ jobs:
        with:
          python-version: ${{ env.DEFAULT_PYTHON }}
          check-latest: true
+      - name: Generate partial pytest execution time restore key
+        id: generate-pytest-execution-time-report-key
+        run: |
+          echo "key=pytest-execution-time-report-$(date -u '+%Y-%m-%dT%H:%M:%s')" >> $GITHUB_OUTPUT
+      - name: Restore pytest execution time cache
+        uses: actions/cache/restore@v4.2.3
+        with:
+          path: pytest-execution-time-report.json
+          key: >-
+            ${{ runner.os }}-${{ steps.generate-pytest-execution-time-report-key.outputs.key }}
+          restore-keys: |
+            ${{ runner.os }}-pytest-execution-time-report-
      - name: Restore base Python virtual environment
        id: cache-venv
        uses: actions/cache/restore@v4.2.3
@@ -905,7 +908,8 @@ jobs:
      - name: Run split_tests.py
        run: |
          . venv/bin/activate
-          python -m script.split_tests ${{ needs.info.outputs.test_group_count }} tests
+          python -m script.split_tests ${{ needs.info.outputs.test_group_count }} \
+          tests pytest-execution-time-report.json
      - name: Upload pytest_buckets
        uses: actions/upload-artifact@v4.6.2
        with:
@@ -1002,6 +1006,7 @@ jobs:
            ${cov_params[@]} \
            -o console_output_style=count \
            -p no:sugar \
+            --execution-time-report-name pytest-execution-time-report-${{ matrix.python-version }}-${{ matrix.group }}.json \
            --exclude-warning-annotations \
            $(sed -n "${{ matrix.group }},1p" pytest_buckets.txt) \
              2>&1 | tee pytest-${{ matrix.python-version }}-${{ matrix.group }}.txt
@@ -1010,7 +1015,9 @@ jobs:
        uses: actions/upload-artifact@v4.6.2
        with:
          name: pytest-${{ github.run_number }}-${{ matrix.python-version }}-${{ matrix.group }}
-          path: pytest-*.txt
+          path: |
+            pytest-*.txt
+            pytest-*.json
          overwrite: true
      - name: Upload coverage artifact
        if: needs.info.outputs.skip_coverage != 'true'
@@ -1025,12 +1032,60 @@ jobs:
        with:
          name: test-results-full-${{ matrix.python-version }}-${{ matrix.group }}
          path: junit.xml
-      - name: Remove pytest_buckets
-        run: rm pytest_buckets.txt
      - name: Check dirty
        run: |
          ./script/check_dirty

+  pytest-combine-test-execution-time:
+    runs-on: ubuntu-24.04
+    needs:
+      - info
+      - pytest-full
+    name: Combine test execution times
+    steps:
+      - name: Check out code from GitHub
+        uses: actions/checkout@v4.2.2
+      - name: Set up Python ${{ env.DEFAULT_PYTHON }}
+        id: python
+        uses: actions/setup-python@v5.5.0
+        with:
+          python-version: ${{ env.DEFAULT_PYTHON }}
+          check-latest: true
+      - name: Restore base Python virtual environment
+        id: cache-venv
+        uses: actions/cache/restore@v4.2.3
+        with:
+          path: venv
+          fail-on-cache-miss: true
+          key: >-
+            ${{ runner.os }}-${{ steps.python.outputs.python-version }}-${{
+            needs.info.outputs.python_cache_key }}
+      - name: Generate partial pytest execution time restore key
+        id: generate-pytest-execution-time-report-key
+        run: |
+          echo "key=pytest-execution-time-report-$(date -u '+%Y-%m-%dT%H:%M:%s')" >> $GITHUB_OUTPUT
+      - name: Download pytest execution time artifacts
+        uses: actions/download-artifact@v4.2.1
+        with:
+          pattern: pytest-${{ github.run_number }}-${{ env.DEFAULT_PYTHON }}-*
+          merge-multiple: true
+      - name: Combine files into one
+        run: |
+          . venv/bin/activate
+          python -m script.merge_pytest_execution_time_reports "pytest-execution-time-report-${{ env.DEFAULT_PYTHON }}-*.json"
+      - name: Upload combined pytest execution time artifact
+        uses: actions/upload-artifact@v4.6.2
+        with:
+          name: pytest-execution-time-report-${{ github.run_number }}
+          path: pytest-execution-time-report.json
+      - name: Save pytest execution time cache
+        uses: actions/cache/save@v4.2.3
+        with:
+          path: pytest-execution-time-report.json
+          key: >-
+            ${{ runner.os }}-${{
+            steps.generate-pytest-execution-time-report-key.outputs.key }}
+
  pytest-mariadb:
    runs-on: ubuntu-24.04
    services:
--- a/.gitignore
+++ b/.gitignore
@@ -137,4 +137,7 @@ tmp_cache
 .ropeproject

 # Will be created from script/split_tests.py
-pytest_buckets.txt
+pytest_buckets.txt
+
+# Contains test execution times used for splitting tests
+pytest-execution-time-report*.json
--- a/script/merge_pytest_execution_time_reports.py
+++ b/script/merge_pytest_execution_time_reports.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""Helper script to merge all pytest execution time reports into one file."""
+
+from __future__ import annotations
+
+import argparse
+import pathlib
+
+from homeassistant.helpers.json import save_json
+from homeassistant.util.json import load_json_object
+
+
+def merge_json_files(pattern: str, output_file: str) -> None:
+    """Merge JSON files matching the pattern into a single JSON file."""
+    # Needs to be in sync with PytestExecutionTimeReport in conftest.py
+    result: dict[str, float] = {}
+
+    for file in pathlib.Path().glob(pattern):
+        print(f"Processing {file}")
+        data = load_json_object(file)
+        if not isinstance(data, dict):
+            print(f"Skipping {file} due to invalid data format.")
+            continue
+        for key, value in data.items():
+            if not isinstance(value, (int, float)):
+                print(
+                    f"Skipping {key} in {file} due to invalid value type: {type(value)}."
+                )
+                continue
+            if key in result:
+                result[key] += value
+            else:
+                result[key] = value
+
+    # Write the merged data to the output file
+    save_json(output_file, result)
+
+
+def main() -> None:
+    """Execute script."""
+    parser = argparse.ArgumentParser(
+        description="Merge all pytest execution time reports into one file."
+    )
+    parser.add_argument(
+        "pattern",
+        help="Glob pattern to match JSON  pytest execution time report files",
+        type=str,
+    )
+    parser.add_argument(
+        "output_file",
+        help="Path to the output file",
+        type=str,
+        nargs="?",
+        default="pytest-execution-time-report.json",
+    )
+    arguments = parser.parse_args()
+    merge_json_files(arguments.pattern, arguments.output_file)
+
+
+if __name__ == "__main__":
+    main()
--- a/script/split_tests.py
+++ b/script/split_tests.py
@@ -5,11 +5,11 @@ from __future__ import annotations

 import argparse
 from dataclasses import dataclass, field
-from math import ceil
+from datetime import timedelta
 from pathlib import Path
-import subprocess
-import sys
-from typing import Final
+from typing import Final, cast
+
+from homeassistant.util.json import load_json_object


 class Bucket:
@@ -19,13 +19,15 @@ class Bucket:
        self,
    ):
        """Initialize bucket."""
-        self.total_tests = 0
+        self.approx_execution_time = timedelta(seconds=0)
+        self.not_measured_files = 0
        self._paths: list[str] = []

    def add(self, part: TestFolder | TestFile) -> None:
        """Add tests to bucket."""
        part.add_to_bucket()
-        self.total_tests += part.total_tests
+        self.approx_execution_time += part.approx_execution_time
+        self.not_measured_files += part.not_measured_files
        self._paths.append(str(part.path))

    def get_paths_line(self) -> str:
@@ -33,64 +35,132 @@ class Bucket:
        return " ".join(self._paths) + "\n"


+def add_not_measured_files(
+    test: TestFolder | TestFile, not_measured_files: set[TestFile]
+) -> None:
+    """Add not measured files to test folder."""
+    if test.not_measured_files > 0:
+        if isinstance(test, TestFolder):
+            for child in test.children.values():
+                add_not_measured_files(child, not_measured_files)
+        else:
+            not_measured_files.add(test)
+
+
+def sort_by_not_measured(bucket: Bucket) -> tuple[int, float]:
+    """Sort by not measured files."""
+    return (bucket.not_measured_files, bucket.approx_execution_time.total_seconds())
+
+
+def sort_by_execution_time(bucket: Bucket) -> tuple[float, int]:
+    """Sort by execution time."""
+    return (bucket.approx_execution_time.total_seconds(), bucket.not_measured_files)
+
+
 class BucketHolder:
    """Class to hold buckets."""

-    def __init__(self, tests_per_bucket: int, bucket_count: int) -> None:
+    def __init__(self, bucket_count: int) -> None:
        """Initialize bucket holder."""
-        self._tests_per_bucket = tests_per_bucket
        self._bucket_count = bucket_count
        self._buckets: list[Bucket] = [Bucket() for _ in range(bucket_count)]

    def split_tests(self, test_folder: TestFolder) -> None:
        """Split tests into buckets."""
-        digits = len(str(test_folder.total_tests))
+        avg_execution_time = test_folder.approx_execution_time / self._bucket_count
+        avg_not_measured_files = test_folder.not_measured_files / self._bucket_count
        sorted_tests = sorted(
-            test_folder.get_all_flatten(), reverse=True, key=lambda x: x.total_tests
+            test_folder.get_all_flatten(),
+            key=lambda x: (
+                -x.approx_execution_time,
+                -x.count_children() if isinstance(x, TestFolder) else 0,
+                x.not_measured_files,
+            ),
        )
+        not_measured_tests = set()
        for tests in sorted_tests:
            if tests.added_to_bucket:
                # Already added to bucket
                continue

-            print(f"{tests.total_tests:>{digits}} tests in {tests.path}")
-            smallest_bucket = min(self._buckets, key=lambda x: x.total_tests)
+            print(f"~{tests.approx_execution_time} execution time for {tests.path}")
            is_file = isinstance(tests, TestFile)
+
+            sort_key = sort_by_execution_time
+            if tests.not_measured_files and tests.approx_execution_time == 0:
+                # If tests are not measured, sort by not measured files
+                sort_key = sort_by_not_measured
+
+            smallest_bucket = min(self._buckets, key=sort_key)
            if (
-                smallest_bucket.total_tests + tests.total_tests < self._tests_per_bucket
+                (smallest_bucket.approx_execution_time + tests.approx_execution_time)
+                < avg_execution_time
+                and (smallest_bucket.not_measured_files + tests.not_measured_files)
+                < avg_not_measured_files
            ) or is_file:
                smallest_bucket.add(tests)
+                add_not_measured_files(
+                    tests,
+                    not_measured_tests,
+                )
                # Ensure all files from the same folder are in the same bucket
                # to ensure that syrupy correctly identifies unused snapshots
                if is_file:
+                    added_tests = []
                    for other_test in tests.parent.children.values():
                        if other_test is tests or isinstance(other_test, TestFolder):
                            continue
-                        print(
-                            f"{other_test.total_tests:>{digits}} tests in {other_test.path} (same bucket)"
-                        )
                        smallest_bucket.add(other_test)
+                        added_tests.append(other_test)
+                        add_not_measured_files(
+                            other_test,
+                            not_measured_tests,
+                        )
+                    if added_tests:
+                        print(
+                            f"Added {len(added_tests)} tests to the same bucket so syrupy can identify unused snapshots"
+                        )
+                        print(
+                            "  - "
+                            + "\n  - ".join(
+                                str(test.path) for test in sorted(added_tests)
+                            )
+                        )

        # verify that all tests are added to a bucket
        if not test_folder.added_to_bucket:
            raise ValueError("Not all tests are added to a bucket")

+        if not_measured_tests:
+            print(f"Found {len(not_measured_tests)} not measured test files: ")
+            for test in sorted(not_measured_tests, key=lambda x: x.path):
+                print(f"  - {test.path}")
+
    def create_ouput_file(self) -> None:
        """Create output file."""
        with Path("pytest_buckets.txt").open("w") as file:
            for idx, bucket in enumerate(self._buckets):
-                print(f"Bucket {idx + 1} has {bucket.total_tests} tests")
+                print(
+                    f"Bucket {idx + 1} execution time should be ~{str_without_milliseconds(bucket.approx_execution_time)}"
+                    f" with {bucket.not_measured_files} not measured files"
+                )
                file.write(bucket.get_paths_line())


+def str_without_milliseconds(td: timedelta) -> str:
+    """Return str without milliseconds."""
+    return str(td).split(".")[0]
+
+
@dataclass
 class TestFile:
    """Class represents a single test file and the number of tests it has."""

-    total_tests: int
    path: Path
+    parent: TestFolder
+    # 0 means not measured
+    approx_execution_time: timedelta
    added_to_bucket: bool = field(default=False, init=False)
-    parent: TestFolder | None = field(default=None, init=False)

    def add_to_bucket(self) -> None:
        """Add test file to bucket."""
@@ -98,9 +168,18 @@ class TestFile:
            raise ValueError("Already added to bucket")
        self.added_to_bucket = True

+    @property
+    def not_measured_files(self) -> int:
+        """Return files not measured."""
+        return 1 if self.approx_execution_time.total_seconds() == 0 else 0
+
    def __gt__(self, other: TestFile) -> bool:
        """Return if greater than."""
-        return self.total_tests > other.total_tests
+        return self.approx_execution_time > other.approx_execution_time
+
+    def __hash__(self) -> int:
+        """Return hash."""
+        return hash(self.path)


 class TestFolder:
@@ -112,15 +191,31 @@ class TestFolder:
        self.children: dict[Path, TestFolder | TestFile] = {}

    @property
-    def total_tests(self) -> int:
-        """Return total tests."""
-        return sum([test.total_tests for test in self.children.values()])
+    def approx_execution_time(self) -> timedelta:
+        """Return approximate execution time."""
+        time = timedelta(seconds=0)
+        for test in self.children.values():
+            time += test.approx_execution_time
+        return time
+
+    @property
+    def not_measured_files(self) -> int:
+        """Return files not measured."""
+        return sum([test.not_measured_files for test in self.children.values()])

    @property
    def added_to_bucket(self) -> bool:
        """Return if added to bucket."""
        return all(test.added_to_bucket for test in self.children.values())

+    def count_children(self) -> int:
+        """Return the number of children."""
+        return len(self.children) + sum(
+            child.count_children()
+            for child in self.children.values()
+            if isinstance(child, TestFolder)
+        )
+
    def add_to_bucket(self) -> None:
        """Add test file to bucket."""
        if self.added_to_bucket:
@@ -130,11 +225,18 @@ class TestFolder:

    def __repr__(self) -> str:
        """Return representation."""
-        return (
-            f"TestFolder(total_tests={self.total_tests}, children={len(self.children)})"
+        return f"TestFolder(approx_execution_time={self.approx_execution_time}, children={len(self.children)})"
+
+    def add_test_file(
+        self, path: Path, execution_time: float, skip_file_if_present: bool
+    ) -> None:
+        """Add test file to folder."""
+        self._add_test_file(
+            TestFile(path, self, timedelta(seconds=execution_time)),
+            skip_file_if_present,
        )

-    def add_test_file(self, file: TestFile) -> None:
+    def _add_test_file(self, file: TestFile, skip_file_if_present: bool) -> None:
        """Add test file to folder."""
        path = file.path
        file.parent = self
@@ -143,6 +245,10 @@ class TestFolder:
            raise ValueError("Path is not a child of this folder")

        if len(relative_path.parts) == 1:
+            if path in self.children:
+                if skip_file_if_present:
+                    return
+                raise ValueError(f"File already exists: {path}")
            self.children[path] = file
            return

@@ -151,7 +257,7 @@ class TestFolder:
            self.children[child_path] = child = TestFolder(child_path)
        elif not isinstance(child, TestFolder):
            raise ValueError("Child is not a folder")
-        child.add_test_file(file)
+        child._add_test_file(file, skip_file_if_present)

    def get_all_flatten(self) -> list[TestFolder | TestFile]:
        """Return self and all children as flatten list."""
@@ -164,35 +270,21 @@ class TestFolder:
        return result


-def collect_tests(path: Path) -> TestFolder:
-    """Collect all tests."""
-    result = subprocess.run(
-        ["pytest", "--collect-only", "-qq", "-p", "no:warnings", path],
-        check=False,
-        capture_output=True,
-        text=True,
-    )
+def process_execution_time_file(
+    execution_time_file: Path, test_folder: TestFolder
+) -> None:
+    """Process the execution time file."""
+    for file, execution_time in load_json_object(execution_time_file).items():
+        test_folder.add_test_file(Path(file), cast(float, execution_time), False)

-    if result.returncode != 0:
-        print("Failed to collect tests:")
-        print(result.stderr)
-        print(result.stdout)
-        sys.exit(1)

-    folder = TestFolder(path)
-
-    for line in result.stdout.splitlines():
-        if not line.strip():
-            continue
-        file_path, _, total_tests = line.partition(": ")
-        if not path or not total_tests:
-            print(f"Unexpected line: {line}")
-            sys.exit(1)
-
-        file = TestFile(int(total_tests), Path(file_path))
-        folder.add_test_file(file)
-
-    return folder
+def add_missing_test_files(folder: Path, test_folder: TestFolder) -> None:
+    """Scan test folder for missing files."""
+    for path in folder.iterdir():
+        if path.is_dir():
+            add_missing_test_files(path, test_folder)
+        elif path.name.startswith("test_") and path.suffix == ".py":
+            test_folder.add_test_file(path, 0.0, True)


 def main() -> None:
@@ -213,24 +305,31 @@ def main() -> None:
        type=check_greater_0,
    )
    parser.add_argument(
-        "path",
+        "test_folder",
        help="Path to the test files to split into buckets",
        type=Path,
    )
+    parser.add_argument(
+        "execution_time_file",
+        help="Path to the file containing the execution time of each test",
+        type=Path,
+    )

    arguments = parser.parse_args()

-    print("Collecting tests...")
-    tests = collect_tests(arguments.path)
-    tests_per_bucket = ceil(tests.total_tests / arguments.bucket_count)
+    tests = TestFolder(arguments.test_folder)

-    bucket_holder = BucketHolder(tests_per_bucket, arguments.bucket_count)
+    if arguments.execution_time_file.exists():
+        print(f"Using execution time file: {arguments.execution_time_file}")
+        process_execution_time_file(arguments.execution_time_file, tests)
+
+    print("Scanning test files...")
+    add_missing_test_files(arguments.test_folder, tests)
+
+    bucket_holder = BucketHolder(arguments.bucket_count)
    print("Splitting tests...")
    bucket_holder.split_tests(tests)

-    print(f"Total tests: {tests.total_tests}")
-    print(f"Estimated tests per bucket: {tests_per_bucket}")
-
    bucket_holder.create_ouput_file()


--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -44,6 +44,7 @@ from syrupy.session import SnapshotSession

 from homeassistant import block_async_io
 from homeassistant.exceptions import ServiceNotFound
+from homeassistant.helpers.json import save_json

 # Setup patching of recorder functions before any other Home Assistant imports
 from . import patch_recorder
@@ -51,6 +52,8 @@ from . import patch_recorder
 # Setup patching of dt_util time functions before any other Home Assistant imports
 from . import patch_time  # noqa: F401, isort:skip

+from _pytest.terminal import TerminalReporter
+
 from homeassistant import components, core as ha, loader, runner
 from homeassistant.auth.const import GROUP_ID_ADMIN, GROUP_ID_READ_ONLY
 from homeassistant.auth.models import Credentials
@@ -123,6 +126,7 @@ if TYPE_CHECKING:

 pytest.register_assert_rewrite("tests.common")

+
 from .common import (  # noqa: E402, isort:skip
    CLIENT_ID,
    INSTANCES,
@@ -153,6 +157,38 @@ asyncio.set_event_loop_policy = lambda policy: None
 def pytest_addoption(parser: pytest.Parser) -> None:
    """Register custom pytest options."""
    parser.addoption("--dburl", action="store", default="sqlite://")
+    parser.addoption(
+        "--execution-time-report-name",
+        action="store",
+        default="pytest-execution-time-report.json",
+    )
+
+
+class PytestExecutionTimeReport:
+    """Pytest plugin to generate a JSON report with the execution time of each test."""
+
+    def pytest_terminal_summary(
+        self,
+        terminalreporter: TerminalReporter,
+        exitstatus: pytest.ExitCode,
+        config: pytest.Config,
+    ) -> None:
+        """Generate a JSON report with the execution time of each test."""
+        if config.option.collectonly:
+            return
+
+        data: dict[str, float] = {}
+        for replist in terminalreporter.stats.values():
+            for rep in replist:
+                if isinstance(rep, pytest.TestReport):
+                    location = rep.location[0]
+                    if location not in data:
+                        data[location] = rep.duration
+                    else:
+                        data[location] += rep.duration
+
+        time_report_filename = config.option.execution_time_report_name
+        save_json(time_report_filename, data)


 def pytest_configure(config: pytest.Config) -> None:
@@ -167,6 +203,7 @@ def pytest_configure(config: pytest.Config) -> None:
    # Temporary workaround until it is finalised inside syrupy
    # See https://github.com/syrupy-project/syrupy/pull/901
    SnapshotSession.finish = override_syrupy_finish
+    config.pluginmanager.register(PytestExecutionTimeReport())


 def pytest_runtest_setup() -> None:
Author	SHA1	Message	Date
Robert Resch	7d2818d882	Improve printing	2025-03-28 23:06:28 +01:00
Robert Resch	16c56d9f6b	Improve script	2025-03-28 22:40:12 +01:00
Robert Resch	98f32c204b	Fix script call	2025-03-28 21:21:52 +01:00
Robert Resch	0c7a2759c3	fix	2025-03-28 20:53:13 +01:00
Robert Resch	b869546c12	create dedicated python script to combine reports	2025-03-28 20:20:30 +01:00
Robert Resch	b7b18d2f51	Print not measured files at the end	2025-03-28 19:26:34 +01:00
Robert Resch	b447f62bda	Small improvements	2025-03-27 18:38:14 +01:00
Robert Resch	ffc0dae6a4	Fix file name	2025-03-27 18:09:07 +01:00
Robert Resch	269a21584b	Fix cleanup	2025-03-27 17:10:59 +01:00
Robert Resch	219a8cdf40	Improve report plugin	2025-03-27 16:53:50 +01:00
Robert Resch	9cc61d46cf	typo	2025-03-27 16:22:37 +01:00
Robert Resch	b42f1395fa	Fix split tests	2025-03-27 16:17:47 +01:00
Robert Resch	7b0e4871da	Split test by execution time	2025-03-27 15:20:05 +01:00