Improve script

This commit is contained in:
Robert Resch 2025-03-28 22:40:12 +01:00
parent 98f32c204b
commit 16c56d9f6b
No known key found for this signature in database
GPG Key ID: 9D9D9DCB43120143

View File

@ -5,6 +5,7 @@ from __future__ import annotations
import argparse import argparse
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import timedelta
from pathlib import Path from pathlib import Path
from typing import Final, cast from typing import Final, cast
@ -18,7 +19,7 @@ class Bucket:
self, self,
): ):
"""Initialize bucket.""" """Initialize bucket."""
self.approx_execution_time = 0.0 self.approx_execution_time = timedelta(seconds=0)
self.not_measured_files = 0 self.not_measured_files = 0
self._paths: list[str] = [] self._paths: list[str] = []
@ -46,6 +47,16 @@ def add_not_measured_files(
not_measured_files.add(test) not_measured_files.add(test)
def sort_by_not_measured(bucket: Bucket) -> tuple[int, float]:
"""Sort by not measured files."""
return (bucket.not_measured_files, bucket.approx_execution_time.total_seconds())
def sort_by_execution_time(bucket: Bucket) -> tuple[float, int]:
"""Sort by execution time."""
return (bucket.approx_execution_time.total_seconds(), bucket.not_measured_files)
class BucketHolder: class BucketHolder:
"""Class to hold buckets.""" """Class to hold buckets."""
@ -58,7 +69,6 @@ class BucketHolder:
"""Split tests into buckets.""" """Split tests into buckets."""
avg_execution_time = test_folder.approx_execution_time / self._bucket_count avg_execution_time = test_folder.approx_execution_time / self._bucket_count
avg_not_measured_files = test_folder.not_measured_files / self._bucket_count avg_not_measured_files = test_folder.not_measured_files / self._bucket_count
digits = len(str(round(test_folder.approx_execution_time, 0)))
sorted_tests = sorted( sorted_tests = sorted(
test_folder.get_all_flatten(), test_folder.get_all_flatten(),
key=lambda x: ( key=lambda x: (
@ -68,54 +78,45 @@ class BucketHolder:
), ),
) )
not_measured_tests = set() not_measured_tests = set()
bucket_sort_keys = (
lambda x: (x.not_measured_files, x.approx_execution_time),
lambda x: (x.approx_execution_time, x.not_measured_files),
)
for tests in sorted_tests: for tests in sorted_tests:
if tests.added_to_bucket: if tests.added_to_bucket:
# Already added to bucket # Already added to bucket
continue continue
print( print(f"~{tests.approx_execution_time} execution time for {tests.path}")
f"~{round(tests.approx_execution_time, 2):>{digits}}s execution time for {tests.path}"
)
is_file = isinstance(tests, TestFile) is_file = isinstance(tests, TestFile)
for sort_key in bucket_sort_keys: sort_key = sort_by_execution_time
smallest_bucket = min(self._buckets, key=sort_key) if tests.not_measured_files and tests.approx_execution_time == 0:
# If tests are not measured, sort by not measured files
sort_key = sort_by_not_measured
if ( smallest_bucket = min(self._buckets, key=sort_key)
( if (
smallest_bucket.approx_execution_time (smallest_bucket.approx_execution_time + tests.approx_execution_time)
+ tests.approx_execution_time < avg_execution_time
) and (smallest_bucket.not_measured_files + tests.not_measured_files)
< avg_execution_time < avg_not_measured_files
and (smallest_bucket.not_measured_files + tests.not_measured_files) ) or is_file:
< avg_not_measured_files smallest_bucket.add(tests)
) or is_file: add_not_measured_files(
smallest_bucket.add(tests) tests,
add_not_measured_files( not_measured_tests,
tests, )
not_measured_tests, # Ensure all files from the same folder are in the same bucket
) # to ensure that syrupy correctly identifies unused snapshots
# Ensure all files from the same folder are in the same bucket if is_file:
# to ensure that syrupy correctly identifies unused snapshots for other_test in tests.parent.children.values():
if is_file: if other_test is tests or isinstance(other_test, TestFolder):
for other_test in tests.parent.children.values(): continue
if other_test is tests or isinstance( print(
other_test, TestFolder f"Adding {other_test.path} tests to same bucket due syrupy"
): )
continue smallest_bucket.add(other_test)
print( add_not_measured_files(
f"Adding {other_test.path} tests to same bucket due syrupy" other_test,
) not_measured_tests,
smallest_bucket.add(other_test) )
add_not_measured_files(
tests,
not_measured_tests,
)
break
# verify that all tests are added to a bucket # verify that all tests are added to a bucket
if not test_folder.added_to_bucket: if not test_folder.added_to_bucket:
@ -131,12 +132,17 @@ class BucketHolder:
with Path("pytest_buckets.txt").open("w") as file: with Path("pytest_buckets.txt").open("w") as file:
for idx, bucket in enumerate(self._buckets): for idx, bucket in enumerate(self._buckets):
print( print(
f"Bucket {idx + 1} execution time should be ~{bucket.approx_execution_time}s" f"Bucket {idx + 1} execution time should be ~{str_without_milliseconds(bucket.approx_execution_time)}"
f" with {bucket.not_measured_files} not measured files" f" with {bucket.not_measured_files} not measured files"
) )
file.write(bucket.get_paths_line()) file.write(bucket.get_paths_line())
def str_without_milliseconds(td: timedelta) -> str:
"""Return str without milliseconds."""
return str(td).split(".")[0]
@dataclass @dataclass
class TestFile: class TestFile:
"""Class represents a single test file and the number of tests it has.""" """Class represents a single test file and the number of tests it has."""
@ -144,7 +150,7 @@ class TestFile:
path: Path path: Path
parent: TestFolder parent: TestFolder
# 0 means not measured # 0 means not measured
approx_execution_time: float = 0.0 approx_execution_time: timedelta
added_to_bucket: bool = field(default=False, init=False) added_to_bucket: bool = field(default=False, init=False)
def add_to_bucket(self) -> None: def add_to_bucket(self) -> None:
@ -156,7 +162,7 @@ class TestFile:
@property @property
def not_measured_files(self) -> int: def not_measured_files(self) -> int:
"""Return files not measured.""" """Return files not measured."""
return 1 if self.approx_execution_time == 0 else 0 return 1 if self.approx_execution_time.total_seconds() == 0 else 0
def __gt__(self, other: TestFile) -> bool: def __gt__(self, other: TestFile) -> bool:
"""Return if greater than.""" """Return if greater than."""
@ -176,9 +182,12 @@ class TestFolder:
self.children: dict[Path, TestFolder | TestFile] = {} self.children: dict[Path, TestFolder | TestFile] = {}
@property @property
def approx_execution_time(self) -> float: def approx_execution_time(self) -> timedelta:
"""Return approximate execution time.""" """Return approximate execution time."""
return sum([test.approx_execution_time for test in self.children.values()]) time = timedelta(seconds=0)
for test in self.children.values():
time += test.approx_execution_time
return time
@property @property
def not_measured_files(self) -> int: def not_measured_files(self) -> int:
@ -213,7 +222,10 @@ class TestFolder:
self, path: Path, execution_time: float, skip_file_if_present: bool self, path: Path, execution_time: float, skip_file_if_present: bool
) -> None: ) -> None:
"""Add test file to folder.""" """Add test file to folder."""
self._add_test_file(TestFile(path, self, execution_time), skip_file_if_present) self._add_test_file(
TestFile(path, self, timedelta(seconds=execution_time)),
skip_file_if_present,
)
def _add_test_file(self, file: TestFile, skip_file_if_present: bool) -> None: def _add_test_file(self, file: TestFile, skip_file_if_present: bool) -> None:
"""Add test file to folder.""" """Add test file to folder."""