Speed up clang-tidy CI by 80%+ with incremental checking (#9396)

2025-07-23 11:46:39 +00:00 · 2025-07-09 11:00:44 -10:00 · 2025-07-09 11:00:44 -10:00 · 6616567b05
commit 6616567b05
parent 0ffc446315
11 changed files with 1774 additions and 53 deletions
--- a/.clang-tidy.hash
+++ b/.clang-tidy.hash
@ -0,0 +1 @@
+a3cdfc378d28b53b416a1d5bf0ab9077ee18867f0d39436ea8013cf5a4ead87a
--- a/.github/workflows/ci-clang-tidy-hash.yml
+++ b/.github/workflows/ci-clang-tidy-hash.yml
@ -0,0 +1,76 @@
+name: Clang-tidy Hash CI
+
+on:
+  pull_request:
+    paths:
+      - ".clang-tidy"
+      - "platformio.ini"
+      - "requirements_dev.txt"
+      - ".clang-tidy.hash"
+      - "script/clang_tidy_hash.py"
+      - ".github/workflows/ci-clang-tidy-hash.yml"
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  verify-hash:
+    name: Verify clang-tidy hash
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4.2.2
+
+      - name: Set up Python
+        uses: actions/setup-python@v5.6.0
+        with:
+          python-version: "3.11"
+
+      - name: Verify hash
+        run: |
+          python script/clang_tidy_hash.py --verify
+
+      - if: failure()
+        name: Show hash details
+        run: |
+          python script/clang_tidy_hash.py
+          echo "## Job Failed" | tee -a $GITHUB_STEP_SUMMARY
+          echo "You have modified clang-tidy configuration but have not updated the hash." | tee -a $GITHUB_STEP_SUMMARY
+          echo "Please run 'script/clang_tidy_hash.py --update' and commit the changes." | tee -a $GITHUB_STEP_SUMMARY
+
+      - if: failure()
+        name: Request changes
+        uses: actions/github-script@v7.0.1
+        with:
+          script: |
+            await github.rest.pulls.createReview({
+              pull_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              event: 'REQUEST_CHANGES',
+              body: 'You have modified clang-tidy configuration but have not updated the hash.\nPlease run `script/clang_tidy_hash.py --update` and commit the changes.'
+            })
+
+      - if: success()
+        name: Dismiss review
+        uses: actions/github-script@v7.0.1
+        with:
+          script: |
+            let reviews = await github.rest.pulls.listReviews({
+              pull_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo
+            });
+            for (let review of reviews.data) {
+              if (review.user.login === 'github-actions[bot]' && review.state === 'CHANGES_REQUESTED') {
+                await github.rest.pulls.dismissReview({
+                  pull_number: context.issue.number,
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  review_id: review.id,
+                  message: 'Clang-tidy hash now matches configuration.'
+                });
+              }
+            }
+
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -297,6 +297,8 @@ jobs:
      - pylint
      - pytest
      - pyupgrade
+    env:
+      GH_TOKEN: ${{ github.token }}
    strategy:
      fail-fast: false
      max-parallel: 2
@ -335,6 +337,7 @@ jobs:
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@v4.2.2
+
      - name: Restore Python
        uses: ./.github/actions/restore-python
        with:
@ -355,6 +358,20 @@ jobs:
          path: ~/.platformio
          key: platformio-${{ matrix.pio_cache_key }}

+      - name: Cache platformio libdeps
+        if: github.ref == 'refs/heads/dev'
+        uses: actions/cache@v4.2.3
+        with:
+          path: .pio/libdeps
+          key: pio-libdeps-${{ matrix.pio_cache_key }}-${{ hashFiles('platformio.ini') }}
+
+      - name: Cache platformio libdeps
+        if: github.ref != 'refs/heads/dev'
+        uses: actions/cache/restore@v4.2.3
+        with:
+          path: .pio/libdeps
+          key: pio-libdeps-${{ matrix.pio_cache_key }}-${{ hashFiles('platformio.ini') }}
+
      - name: Register problem matchers
        run: |
          echo "::add-matcher::.github/workflows/matchers/gcc.json"
@ -367,10 +384,28 @@ jobs:
          mkdir -p .temp
          pio run --list-targets -e esp32-idf-tidy

+      - name: Check if full clang-tidy scan needed
+        id: check_full_scan
+        run: |
+          . venv/bin/activate
+          if python script/clang_tidy_hash.py --check; then
+            echo "full_scan=true" >> $GITHUB_OUTPUT
+            echo "reason=hash_changed" >> $GITHUB_OUTPUT
+          else
+            echo "full_scan=false" >> $GITHUB_OUTPUT
+            echo "reason=normal" >> $GITHUB_OUTPUT
+          fi
+
      - name: Run clang-tidy
        run: |
          . venv/bin/activate
-          script/clang-tidy --all-headers --fix ${{ matrix.options }} ${{ matrix.ignore_errors && '|| true' || '' }}
+          if [ "${{ steps.check_full_scan.outputs.full_scan }}" = "true" ]; then
+            echo "Running FULL clang-tidy scan (hash changed)"
+            script/clang-tidy --all-headers --fix ${{ matrix.options }} ${{ matrix.ignore_errors && '|| true' || '' }}
+          else
+            echo "Running clang-tidy on changed files only"
+            script/clang-tidy --all-headers --fix --changed ${{ matrix.options }} ${{ matrix.ignore_errors && '|| true' || '' }}
+          fi
        env:
          # Also cache libdeps, store them in a ~/.platformio subfolder
          PLATFORMIO_LIBDEPS_DIR: ~/.platformio/libdeps
@ -385,23 +420,14 @@ jobs:
    needs:
      - common
    if: github.event_name == 'pull_request'
+    env:
+      GH_TOKEN: ${{ github.token }}
    outputs:
      components: ${{ steps.list-components.outputs.components }}
      count: ${{ steps.list-components.outputs.count }}
    steps:
      - name: Check out code from GitHub
        uses: actions/checkout@v4.2.2
-        with:
-          # Fetch enough history so `git merge-base refs/remotes/origin/dev HEAD` works.
-          fetch-depth: 500
-      - name: Get target branch
-        id: target-branch
-        run: |
-          echo "branch=${{ github.event.pull_request.base.ref }}" >> $GITHUB_OUTPUT
-      - name: Fetch ${{ steps.target-branch.outputs.branch }} branch
-        run: |
-          git -c protocol.version=2 fetch --no-tags --prune --no-recurse-submodules --depth=1 origin +refs/heads/${{ steps.target-branch.outputs.branch }}:refs/remotes/origin/${{ steps.target-branch.outputs.branch }}
-          git merge-base refs/remotes/origin/${{ steps.target-branch.outputs.branch }} HEAD
      - name: Restore Python
        uses: ./.github/actions/restore-python
        with:
@ -411,7 +437,7 @@ jobs:
        id: list-components
        run: |
          . venv/bin/activate
-          components=$(script/list-components.py --changed --branch ${{ steps.target-branch.outputs.branch }})
+          components=$(script/list-components.py --changed)
          output_components=$(echo "$components" | jq -R -s -c 'split("\n")[:-1] | map(select(length > 0))')
          count=$(echo "$output_components" | jq length)

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -48,3 +48,10 @@ repos:
        entry: python3 script/run-in-env.py pylint
        language: system
        types: [python]
+      - id: clang-tidy-hash
+        name: Update clang-tidy hash
+        entry: python script/clang_tidy_hash.py --update-if-changed
+        language: python
+        files: ^(\.clang-tidy|platformio\.ini|requirements_dev\.txt)$
+        pass_filenames: false
+        additional_dependencies: []
--- a/script/ci-custom.py
+++ b/script/ci-custom.py
@ -270,7 +270,7 @@ def lint_newline(fname):
    return "File contains Windows newline. Please set your editor to Unix newline mode."


-@lint_content_check(exclude=["*.svg"])
+@lint_content_check(exclude=["*.svg", ".clang-tidy.hash"])
 def lint_end_newline(fname, content):
    if content and not content.endswith("\n"):
        return "File does not end with a newline, please add an empty line at the end of the file."
--- a/script/clang-tidy
+++ b/script/clang-tidy
@ -22,6 +22,7 @@ from helpers import (
    git_ls_files,
    load_idedata,
    print_error_for_file,
+    print_file_list,
    root_path,
    temp_header_file,
 )
@ -218,13 +219,14 @@ def main():
    )
    args = parser.parse_args()

-    idedata = load_idedata(args.environment)
-    options = clang_options(idedata)
-
    files = []
    for path in git_ls_files(["*.cpp"]):
        files.append(os.path.relpath(path, os.getcwd()))

+    # Print initial file count if it's large
+    if len(files) > 50:
+        print(f"Found {len(files)} total files to process")
+
    if args.files:
        # Match against files specified on command-line
        file_name_re = re.compile("|".join(args.files))
@ -240,10 +242,28 @@ def main():

    if args.split_num:
        files = split_list(files, args.split_num)[args.split_at - 1]
+        print(f"Split {args.split_at}/{args.split_num}: checking {len(files)} files")

+    # Print file count before adding header file
+    print(f"\nTotal files to check: {len(files)}")
+
+    # Early exit if no files to check
+    if not files:
+        print("No files to check - exiting early")
+        return 0
+
+    # Only build header file if we have actual files to check
    if args.all_headers and args.split_at in (None, 1):
        build_all_include()
        files.insert(0, temp_header_file)
+        print(f"Added all-include header file, new total: {len(files)}")
+
+    # Print final file list before loading idedata
+    print_file_list(files, "Final files to process:")
+
+    # Load idedata and options only if we have files to check
+    idedata = load_idedata(args.environment)
+    options = clang_options(idedata)

    tmpdir = None
    if args.fix:
--- a/script/clang_tidy_hash.py
+++ b/script/clang_tidy_hash.py
@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+"""Calculate and manage hash for clang-tidy configuration."""
+
+from __future__ import annotations
+
+import argparse
+import hashlib
+from pathlib import Path
+import re
+import sys
+
+# Add the script directory to path to import helpers
+script_dir = Path(__file__).parent
+sys.path.insert(0, str(script_dir))
+
+
+def read_file_lines(path: Path) -> list[str]:
+    """Read lines from a file."""
+    with open(path) as f:
+        return f.readlines()
+
+
+def parse_requirement_line(line: str) -> tuple[str, str] | None:
+    """Parse a requirement line and return (package, original_line) or None.
+
+    Handles formats like:
+    - package==1.2.3
+    - package==1.2.3  # comment
+    - package>=1.2.3,<2.0.0
+    """
+    original_line = line.strip()
+
+    # Extract the part before any comment for parsing
+    parse_line = line
+    if "#" in parse_line:
+        parse_line = parse_line[: parse_line.index("#")]
+
+    parse_line = parse_line.strip()
+    if not parse_line:
+        return None
+
+    # Use regex to extract package name
+    # This matches package names followed by version operators
+    match = re.match(r"^([a-zA-Z0-9_-]+)(==|>=|<=|>|<|!=|~=)(.+)$", parse_line)
+    if match:
+        return (match.group(1), original_line)  # Return package name and original line
+
+    return None
+
+
+def get_clang_tidy_version_from_requirements() -> str:
+    """Get clang-tidy version from requirements_dev.txt"""
+    requirements_path = Path(__file__).parent.parent / "requirements_dev.txt"
+    lines = read_file_lines(requirements_path)
+
+    for line in lines:
+        parsed = parse_requirement_line(line)
+        if parsed and parsed[0] == "clang-tidy":
+            # Return the original line (preserves comments)
+            return parsed[1]
+
+    return "clang-tidy version not found"
+
+
+def extract_platformio_flags() -> str:
+    """Extract clang-tidy related flags from platformio.ini"""
+    flags: list[str] = []
+    in_clangtidy_section = False
+
+    platformio_path = Path(__file__).parent.parent / "platformio.ini"
+    lines = read_file_lines(platformio_path)
+    for line in lines:
+        line = line.strip()
+        if line.startswith("[flags:clangtidy]"):
+            in_clangtidy_section = True
+            continue
+        elif line.startswith("[") and in_clangtidy_section:
+            break
+        elif in_clangtidy_section and line and not line.startswith("#"):
+            flags.append(line)
+
+    return "\n".join(sorted(flags))
+
+
+def read_file_bytes(path: Path) -> bytes:
+    """Read bytes from a file."""
+    with open(path, "rb") as f:
+        return f.read()
+
+
+def calculate_clang_tidy_hash() -> str:
+    """Calculate hash of clang-tidy configuration and version"""
+    hasher = hashlib.sha256()
+
+    # Hash .clang-tidy file
+    clang_tidy_path = Path(__file__).parent.parent / ".clang-tidy"
+    content = read_file_bytes(clang_tidy_path)
+    hasher.update(content)
+
+    # Hash clang-tidy version from requirements_dev.txt
+    version = get_clang_tidy_version_from_requirements()
+    hasher.update(version.encode())
+
+    # Hash relevant platformio.ini sections
+    pio_flags = extract_platformio_flags()
+    hasher.update(pio_flags.encode())
+
+    return hasher.hexdigest()
+
+
+def read_stored_hash() -> str | None:
+    """Read the stored hash from file"""
+    hash_file = Path(__file__).parent.parent / ".clang-tidy.hash"
+    if hash_file.exists():
+        lines = read_file_lines(hash_file)
+        return lines[0].strip() if lines else None
+    return None
+
+
+def write_file_content(path: Path, content: str) -> None:
+    """Write content to a file."""
+    with open(path, "w") as f:
+        f.write(content)
+
+
+def write_hash(hash_value: str) -> None:
+    """Write hash to file"""
+    hash_file = Path(__file__).parent.parent / ".clang-tidy.hash"
+    write_file_content(hash_file, hash_value)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Manage clang-tidy configuration hash")
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Check if full scan needed (exit 0 if needed)",
+    )
+    parser.add_argument("--update", action="store_true", help="Update the hash file")
+    parser.add_argument(
+        "--update-if-changed",
+        action="store_true",
+        help="Update hash only if configuration changed (for pre-commit)",
+    )
+    parser.add_argument(
+        "--verify", action="store_true", help="Verify hash matches (for CI)"
+    )
+
+    args = parser.parse_args()
+
+    current_hash = calculate_clang_tidy_hash()
+    stored_hash = read_stored_hash()
+
+    if args.check:
+        # Exit 0 if full scan needed (hash changed or no hash file)
+        sys.exit(0 if current_hash != stored_hash else 1)
+
+    elif args.update:
+        write_hash(current_hash)
+        print(f"Hash updated: {current_hash}")
+
+    elif args.update_if_changed:
+        if current_hash != stored_hash:
+            write_hash(current_hash)
+            print(f"Clang-tidy hash updated: {current_hash}")
+            # Exit 0 so pre-commit can stage the file
+            sys.exit(0)
+        else:
+            print("Clang-tidy hash unchanged")
+            sys.exit(0)
+
+    elif args.verify:
+        if current_hash != stored_hash:
+            print("ERROR: Clang-tidy configuration has changed but hash not updated!")
+            print(f"Expected: {current_hash}")
+            print(f"Found: {stored_hash}")
+            print("\nPlease run: script/clang_tidy_hash.py --update")
+            sys.exit(1)
+        print("Hash verification passed")
+
+    else:
+        print(f"Current hash: {current_hash}")
+        print(f"Stored hash: {stored_hash}")
+        print(f"Match: {current_hash == stored_hash}")
+
+
+if __name__ == "__main__":
+    main()
--- a/script/helpers.py
+++ b/script/helpers.py
@ -1,8 +1,12 @@
+from __future__ import annotations
+
 import json
+import os
 import os.path
 from pathlib import Path
 import re
 import subprocess
+import time

 import colorama

@ -12,13 +16,13 @@ temp_folder = os.path.join(root_path, ".temp")
 temp_header_file = os.path.join(temp_folder, "all-include.cpp")


-def styled(color, msg, reset=True):
+def styled(color: str | tuple[str, ...], msg: str, reset: bool = True) -> str:
    prefix = "".join(color) if isinstance(color, tuple) else color
    suffix = colorama.Style.RESET_ALL if reset else ""
    return prefix + msg + suffix


-def print_error_for_file(file, body):
+def print_error_for_file(file: str, body: str | None) -> None:
    print(
        styled(colorama.Fore.GREEN, "### File ")
        + styled((colorama.Fore.GREEN, colorama.Style.BRIGHT), file)
@ -29,17 +33,22 @@ def print_error_for_file(file, body):
        print()


-def build_all_include():
+def build_all_include() -> None:
    # Build a cpp file that includes all header files in this repo.
    # Otherwise header-only integrations would not be tested by clang-tidy
-    headers = []
-    for path in walk_files(basepath):
-        filetypes = (".h",)
-        ext = os.path.splitext(path)[1]
-        if ext in filetypes:
-            path = os.path.relpath(path, root_path)
-            include_p = path.replace(os.path.sep, "/")
-            headers.append(f'#include "{include_p}"')
+
+    # Use git ls-files to find all .h files in the esphome directory
+    # This is much faster than walking the filesystem
+    cmd = ["git", "ls-files", "esphome/**/*.h"]
+    proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
+
+    # Process git output - git already returns paths relative to repo root
+    headers = [
+        f'#include "{include_p}"'
+        for line in proc.stdout.strip().split("\n")
+        if (include_p := line.replace(os.path.sep, "/"))
+    ]
+
    headers.sort()
    headers.append("")
    content = "\n".join(headers)
@ -48,29 +57,86 @@ def build_all_include():
    p.write_text(content, encoding="utf-8")


-def walk_files(path):
-    for root, _, files in os.walk(path):
-        for name in files:
-            yield os.path.join(root, name)
-
-
-def get_output(*args):
+def get_output(*args: str) -> str:
    with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
        output, _ = proc.communicate()
    return output.decode("utf-8")


-def get_err(*args):
+def get_err(*args: str) -> str:
    with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc:
        _, err = proc.communicate()
    return err.decode("utf-8")


-def splitlines_no_ends(string):
+def splitlines_no_ends(string: str) -> list[str]:
    return [s.strip() for s in string.splitlines()]


-def changed_files(branch="dev"):
+def _get_pr_number_from_github_env() -> str | None:
+    """Extract PR number from GitHub environment variables.
+
+    Returns:
+        PR number as string, or None if not found
+    """
+    # First try parsing GITHUB_REF (fastest)
+    github_ref = os.environ.get("GITHUB_REF", "")
+    if "/pull/" in github_ref:
+        return github_ref.split("/pull/")[1].split("/")[0]
+
+    # Fallback to GitHub event file
+    github_event_path = os.environ.get("GITHUB_EVENT_PATH")
+    if github_event_path and os.path.exists(github_event_path):
+        with open(github_event_path) as f:
+            event_data = json.load(f)
+            pr_data = event_data.get("pull_request", {})
+            if pr_number := pr_data.get("number"):
+                return str(pr_number)
+
+    return None
+
+
+def _get_changed_files_github_actions() -> list[str] | None:
+    """Get changed files in GitHub Actions environment.
+
+    Returns:
+        List of changed files, or None if should fall back to git method
+    """
+    event_name = os.environ.get("GITHUB_EVENT_NAME")
+
+    # For pull requests
+    if event_name == "pull_request":
+        pr_number = _get_pr_number_from_github_env()
+        if pr_number:
+            # Use GitHub CLI to get changed files directly
+            cmd = ["gh", "pr", "diff", pr_number, "--name-only"]
+            return _get_changed_files_from_command(cmd)
+
+    # For pushes (including squash-and-merge)
+    elif event_name == "push":
+        # For push events, we want to check what changed in this commit
+        try:
+            # Get the changed files in the last commit
+            return _get_changed_files_from_command(
+                ["git", "diff", "HEAD~1..HEAD", "--name-only"]
+            )
+        except:  # noqa: E722
+            # Fall back to the original method if this fails
+            pass
+
+    return None
+
+
+def changed_files(branch: str | None = None) -> list[str]:
+    # In GitHub Actions, we can use the API to get changed files more efficiently
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        github_files = _get_changed_files_github_actions()
+        if github_files is not None:
+            return github_files
+
+    # Original implementation for local development
+    if branch is None:
+        branch = "dev"
    check_remotes = ["upstream", "origin"]
    check_remotes.extend(splitlines_no_ends(get_output("git", "remote")))
    for remote in check_remotes:
@ -83,25 +149,159 @@ def changed_files(branch="dev"):
            pass
    else:
        raise ValueError("Git not configured")
-    command = ["git", "diff", merge_base, "--name-only"]
-    changed = splitlines_no_ends(get_output(*command))
-    changed = [os.path.relpath(f, os.getcwd()) for f in changed]
-    changed.sort()
-    return changed
+    return _get_changed_files_from_command(["git", "diff", merge_base, "--name-only"])


-def filter_changed(files):
+def _get_changed_files_from_command(command: list[str]) -> list[str]:
+    """Run a git command to get changed files and return them as a list."""
+    proc = subprocess.run(command, capture_output=True, text=True, check=False)
+    if proc.returncode != 0:
+        raise Exception(f"Command failed: {' '.join(command)}\nstderr: {proc.stderr}")
+
+    changed_files = splitlines_no_ends(proc.stdout)
+    changed_files = [os.path.relpath(f, os.getcwd()) for f in changed_files if f]
+    changed_files.sort()
+    return changed_files
+
+
+def get_changed_components() -> list[str] | None:
+    """Get list of changed components using list-components.py script.
+
+    This function:
+    1. First checks if any core files (esphome/core/*) changed - if so, returns None
+    2. Otherwise delegates to ./script/list-components.py --changed which:
+       - Analyzes all changed files
+       - Determines which components are affected (including dependencies)
+       - Returns a list of component names that need to be checked
+
+    Returns:
+        - None: Core files changed, need full scan
+        - Empty list: No components changed (only non-component files changed)
+        - List of strings: Names of components that need checking (e.g., ["wifi", "mqtt"])
+    """
+    # Check if any core files changed first
    changed = changed_files()
-    files = [f for f in files if f in changed]
-    print("Changed files:")
-    if not files:
-        print("    No changed files!")
-    for c in files:
-        print(f"    {c}")
+    if any(f.startswith("esphome/core/") for f in changed):
+        print("Core files changed - will run full clang-tidy scan")
+        return None
+
+    # Use list-components.py to get changed components
+    script_path = os.path.join(root_path, "script", "list-components.py")
+    cmd = [script_path, "--changed"]
+
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, check=True, close_fds=False
+        )
+        components = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
+        return components
+    except subprocess.CalledProcessError:
+        # If the script fails, fall back to full scan
+        print("Could not determine changed components - will run full clang-tidy scan")
+        return None
+
+
+def _filter_changed_ci(files: list[str]) -> list[str]:
+    """Filter files based on changed components in CI environment.
+
+    This function implements intelligent filtering to reduce CI runtime by only
+    checking files that could be affected by the changes. It handles three scenarios:
+
+    1. Core files changed (returns None from get_changed_components):
+       - Triggered when any file in esphome/core/ is modified
+       - Action: Check ALL files (full scan)
+       - Reason: Core files are used throughout the codebase
+
+    2. No components changed (returns empty list from get_changed_components):
+       - Triggered when only non-component files changed (e.g., scripts, configs)
+       - Action: Check only the specific non-component files that changed
+       - Example: If only script/clang-tidy changed, only check that file
+
+    3. Specific components changed (returns list of component names):
+       - Component detection done by: ./script/list-components.py --changed
+       - That script analyzes which components are affected by the changed files
+         INCLUDING their dependencies
+       - Action: Check ALL files in each component that list-components.py identifies
+       - Example: If wifi.cpp changed, list-components.py might return ["wifi", "network"]
+                 if network depends on wifi. We then check ALL files in both
+                 esphome/components/wifi/ and esphome/components/network/
+       - Reason: Component files often have interdependencies (headers, base classes)
+
+    Args:
+        files: List of all files that clang-tidy would normally check
+
+    Returns:
+        Filtered list of files to check
+    """
+    components = get_changed_components()
+    if components is None:
+        # Scenario 1: Core files changed or couldn't determine components
+        # Action: Return all files for full scan
+        return files
+
+    if not components:
+        # Scenario 2: No components changed - only non-component files changed
+        # Action: Check only the specific non-component files that changed
+        changed = changed_files()
+        files = [
+            f for f in files if f in changed and not f.startswith("esphome/components/")
+        ]
+        if not files:
+            print("No files changed")
+        return files
+
+    # Scenario 3: Specific components changed
+    # Action: Check ALL files in each changed component
+    # Convert component list to set for O(1) lookups
+    component_set = set(components)
+    print(f"Changed components: {', '.join(sorted(components))}")
+
+    # The 'files' parameter contains ALL files in the codebase that clang-tidy would check.
+    # We filter this down to only files in the changed components.
+    # We check ALL files in each changed component (not just the changed files)
+    # because changes in one file can affect other files in the same component.
+    filtered_files = []
+    for f in files:
+        if f.startswith("esphome/components/"):
+            # Check if file belongs to any of the changed components
+            parts = f.split("/")
+            if len(parts) >= 3 and parts[2] in component_set:
+                filtered_files.append(f)
+
+    return filtered_files
+
+
+def _filter_changed_local(files: list[str]) -> list[str]:
+    """Filter files based on git changes for local development.
+
+    Args:
+        files: List of all files to filter
+
+    Returns:
+        Filtered list of files to check
+    """
+    # For local development, just check changed files directly
+    changed = changed_files()
+    return [f for f in files if f in changed]
+
+
+def filter_changed(files: list[str]) -> list[str]:
+    """Filter files to only those that changed or are in changed components.
+
+    Args:
+        files: List of files to filter
+    """
+    # When running from CI, use component-based filtering
+    if os.environ.get("GITHUB_ACTIONS") == "true":
+        files = _filter_changed_ci(files)
+    else:
+        files = _filter_changed_local(files)
+
+    print_file_list(files, "Files to check after filtering:")
    return files


-def filter_grep(files, value):
+def filter_grep(files: list[str], value: str) -> list[str]:
    matched = []
    for file in files:
        with open(file, encoding="utf-8") as handle:
@ -111,7 +311,7 @@ def filter_grep(files, value):
    return matched


-def git_ls_files(patterns=None):
+def git_ls_files(patterns: list[str] | None = None) -> dict[str, int]:
    command = ["git", "ls-files", "-s"]
    if patterns is not None:
        command.extend(patterns)
@ -122,6 +322,9 @@ def git_ls_files(patterns=None):


 def load_idedata(environment):
+    start_time = time.time()
+    print(f"Loading IDE data for environment '{environment}'...")
+
    platformio_ini = Path(root_path) / "platformio.ini"
    temp_idedata = Path(temp_folder) / f"idedata-{environment}.json"
    changed = False
@ -142,7 +345,10 @@ def load_idedata(environment):
            changed = True

    if not changed:
-        return json.loads(temp_idedata.read_text())
+        data = json.loads(temp_idedata.read_text())
+        elapsed = time.time() - start_time
+        print(f"IDE data loaded from cache in {elapsed:.2f} seconds")
+        return data

    # ensure temp directory exists before running pio, as it writes sdkconfig to it
    Path(temp_folder).mkdir(exist_ok=True)
@ -158,6 +364,9 @@ def load_idedata(environment):
        match = re.search(r'{\s*".*}', stdout.decode("utf-8"))
        data = json.loads(match.group())
    temp_idedata.write_text(json.dumps(data, indent=2) + "\n")
+
+    elapsed = time.time() - start_time
+    print(f"IDE data generated and cached in {elapsed:.2f} seconds")
    return data


@ -196,6 +405,29 @@ def get_binary(name: str, version: str) -> str:
        raise


+def print_file_list(
+    files: list[str], title: str = "Files:", max_files: int = 20
+) -> None:
+    """Print a list of files with optional truncation for large lists.
+
+    Args:
+        files: List of file paths to print
+        title: Title to print before the list
+        max_files: Maximum number of files to show before truncating (default: 20)
+    """
+    print(title)
+    if not files:
+        print("    No files to check!")
+    elif len(files) <= max_files:
+        for f in sorted(files):
+            print(f"    {f}")
+    else:
+        sorted_files = sorted(files)
+        for f in sorted_files[:10]:
+            print(f"    {f}")
+        print(f"    ... and {len(files) - 10} more files")
+
+
 def get_usable_cpu_count() -> int:
    """Return the number of CPUs that can be used for processes.

--- a/tests/script/init.py
+++ b/tests/script/init.py
--- a/tests/script/test_clang_tidy_hash.py
+++ b/tests/script/test_clang_tidy_hash.py
@ -0,0 +1,359 @@
+"""Unit tests for script/clang_tidy_hash.py module."""
+
+import hashlib
+from pathlib import Path
+import sys
+from unittest.mock import Mock, patch
+
+import pytest
+
+# Add the script directory to Python path so we can import clang_tidy_hash
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "script"))
+
+import clang_tidy_hash  # noqa: E402
+
+
+@pytest.mark.parametrize(
+    ("file_content", "expected"),
+    [
+        (
+            "clang-tidy==18.1.5 # via -r requirements_dev.in\n",
+            "clang-tidy==18.1.5 # via -r requirements_dev.in",
+        ),
+        (
+            "other-package==1.0\nclang-tidy==17.0.0\nmore-packages==2.0\n",
+            "clang-tidy==17.0.0",
+        ),
+        (
+            "# comment\nclang-tidy==16.0.0  # some comment\n",
+            "clang-tidy==16.0.0  # some comment",
+        ),
+        ("no-clang-tidy-here==1.0\n", "clang-tidy version not found"),
+    ],
+)
+def test_get_clang_tidy_version_from_requirements(
+    file_content: str, expected: str
+) -> None:
+    """Test extracting clang-tidy version from various file formats."""
+    # Mock read_file_lines to return our test content
+    with patch("clang_tidy_hash.read_file_lines") as mock_read:
+        mock_read.return_value = file_content.splitlines(keepends=True)
+
+        result = clang_tidy_hash.get_clang_tidy_version_from_requirements()
+
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    ("platformio_content", "expected_flags"),
+    [
+        (
+            "[env:esp32]\n"
+            "platform = espressif32\n"
+            "\n"
+            "[flags:clangtidy]\n"
+            "build_flags = -Wall\n"
+            "extra_flags = -Wextra\n"
+            "\n"
+            "[env:esp8266]\n",
+            "build_flags = -Wall\nextra_flags = -Wextra",
+        ),
+        (
+            "[flags:clangtidy]\n# Comment line\nbuild_flags = -O2\n\n[next_section]\n",
+            "build_flags = -O2",
+        ),
+        (
+            "[flags:clangtidy]\nflag_c = -std=c99\nflag_b = -Wall\nflag_a = -O2\n",
+            "flag_a = -O2\nflag_b = -Wall\nflag_c = -std=c99",  # Sorted
+        ),
+        (
+            "[env:esp32]\nplatform = espressif32\n",  # No clangtidy section
+            "",
+        ),
+    ],
+)
+def test_extract_platformio_flags(platformio_content: str, expected_flags: str) -> None:
+    """Test extracting clang-tidy flags from platformio.ini."""
+    # Mock read_file_lines to return our test content
+    with patch("clang_tidy_hash.read_file_lines") as mock_read:
+        mock_read.return_value = platformio_content.splitlines(keepends=True)
+
+        result = clang_tidy_hash.extract_platformio_flags()
+
+    assert result == expected_flags
+
+
+def test_calculate_clang_tidy_hash() -> None:
+    """Test calculating hash from all configuration sources."""
+    clang_tidy_content = b"Checks: '-*,readability-*'\n"
+    requirements_version = "clang-tidy==18.1.5"
+    pio_flags = "build_flags = -Wall"
+
+    # Expected hash calculation
+    expected_hasher = hashlib.sha256()
+    expected_hasher.update(clang_tidy_content)
+    expected_hasher.update(requirements_version.encode())
+    expected_hasher.update(pio_flags.encode())
+    expected_hash = expected_hasher.hexdigest()
+
+    # Mock the dependencies
+    with (
+        patch("clang_tidy_hash.read_file_bytes", return_value=clang_tidy_content),
+        patch(
+            "clang_tidy_hash.get_clang_tidy_version_from_requirements",
+            return_value=requirements_version,
+        ),
+        patch("clang_tidy_hash.extract_platformio_flags", return_value=pio_flags),
+    ):
+        result = clang_tidy_hash.calculate_clang_tidy_hash()
+
+    assert result == expected_hash
+
+
+def test_read_stored_hash_exists(tmp_path: Path) -> None:
+    """Test reading hash when file exists."""
+    stored_hash = "abc123def456"
+    hash_file = tmp_path / ".clang-tidy.hash"
+    hash_file.write_text(f"{stored_hash}\n")
+
+    with (
+        patch("clang_tidy_hash.Path") as mock_path_class,
+        patch("clang_tidy_hash.read_file_lines", return_value=[f"{stored_hash}\n"]),
+    ):
+        # Mock the path calculation and exists check
+        mock_hash_file = Mock()
+        mock_hash_file.exists.return_value = True
+        mock_path_class.return_value.parent.parent.__truediv__.return_value = (
+            mock_hash_file
+        )
+
+        result = clang_tidy_hash.read_stored_hash()
+
+    assert result == stored_hash
+
+
+def test_read_stored_hash_not_exists() -> None:
+    """Test reading hash when file doesn't exist."""
+    with patch("clang_tidy_hash.Path") as mock_path_class:
+        # Mock the path calculation and exists check
+        mock_hash_file = Mock()
+        mock_hash_file.exists.return_value = False
+        mock_path_class.return_value.parent.parent.__truediv__.return_value = (
+            mock_hash_file
+        )
+
+        result = clang_tidy_hash.read_stored_hash()
+
+    assert result is None
+
+
+def test_write_hash() -> None:
+    """Test writing hash to file."""
+    hash_value = "abc123def456"
+
+    with patch("clang_tidy_hash.write_file_content") as mock_write:
+        clang_tidy_hash.write_hash(hash_value)
+
+        # Verify write_file_content was called with correct parameters
+        mock_write.assert_called_once()
+        args = mock_write.call_args[0]
+        assert str(args[0]).endswith(".clang-tidy.hash")
+        assert args[1] == hash_value
+
+
+@pytest.mark.parametrize(
+    ("args", "current_hash", "stored_hash", "expected_exit"),
+    [
+        (["--check"], "abc123", "abc123", 1),  # Hashes match, no scan needed
+        (["--check"], "abc123", "def456", 0),  # Hashes differ, scan needed
+        (["--check"], "abc123", None, 0),  # No stored hash, scan needed
+    ],
+)
+def test_main_check_mode(
+    args: list[str], current_hash: str, stored_hash: str | None, expected_exit: int
+) -> None:
+    """Test main function in check mode."""
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py"] + args),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+        pytest.raises(SystemExit) as exc_info,
+    ):
+        clang_tidy_hash.main()
+
+    assert exc_info.value.code == expected_exit
+
+
+def test_main_update_mode(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test main function in update mode."""
+    current_hash = "abc123"
+
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py", "--update"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.write_hash") as mock_write,
+    ):
+        clang_tidy_hash.main()
+
+    mock_write.assert_called_once_with(current_hash)
+    captured = capsys.readouterr()
+    assert f"Hash updated: {current_hash}" in captured.out
+
+
+@pytest.mark.parametrize(
+    ("current_hash", "stored_hash"),
+    [
+        ("abc123", "def456"),  # Hash changed, should update
+        ("abc123", None),  # No stored hash, should update
+    ],
+)
+def test_main_update_if_changed_mode_update(
+    current_hash: str, stored_hash: str | None, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Test main function in update-if-changed mode when update is needed."""
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py", "--update-if-changed"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+        patch("clang_tidy_hash.write_hash") as mock_write,
+        pytest.raises(SystemExit) as exc_info,
+    ):
+        clang_tidy_hash.main()
+
+    assert exc_info.value.code == 0
+    mock_write.assert_called_once_with(current_hash)
+    captured = capsys.readouterr()
+    assert "Clang-tidy hash updated" in captured.out
+
+
+def test_main_update_if_changed_mode_no_update(
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Test main function in update-if-changed mode when no update is needed."""
+    current_hash = "abc123"
+    stored_hash = "abc123"
+
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py", "--update-if-changed"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+        patch("clang_tidy_hash.write_hash") as mock_write,
+        pytest.raises(SystemExit) as exc_info,
+    ):
+        clang_tidy_hash.main()
+
+    assert exc_info.value.code == 0
+    mock_write.assert_not_called()
+    captured = capsys.readouterr()
+    assert "Clang-tidy hash unchanged" in captured.out
+
+
+def test_main_verify_mode_success(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test main function in verify mode when verification passes."""
+    current_hash = "abc123"
+    stored_hash = "abc123"
+
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py", "--verify"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+    ):
+        clang_tidy_hash.main()
+        captured = capsys.readouterr()
+        assert "Hash verification passed" in captured.out
+
+
+@pytest.mark.parametrize(
+    ("current_hash", "stored_hash"),
+    [
+        ("abc123", "def456"),  # Hashes differ, verification fails
+        ("abc123", None),  # No stored hash, verification fails
+    ],
+)
+def test_main_verify_mode_failure(
+    current_hash: str, stored_hash: str | None, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Test main function in verify mode when verification fails."""
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py", "--verify"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+        pytest.raises(SystemExit) as exc_info,
+    ):
+        clang_tidy_hash.main()
+
+    assert exc_info.value.code == 1
+    captured = capsys.readouterr()
+    assert "ERROR: Clang-tidy configuration has changed" in captured.out
+
+
+def test_main_default_mode(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test main function in default mode (no arguments)."""
+    current_hash = "abc123"
+    stored_hash = "def456"
+
+    with (
+        patch("sys.argv", ["clang_tidy_hash.py"]),
+        patch("clang_tidy_hash.calculate_clang_tidy_hash", return_value=current_hash),
+        patch("clang_tidy_hash.read_stored_hash", return_value=stored_hash),
+    ):
+        clang_tidy_hash.main()
+
+    captured = capsys.readouterr()
+    assert f"Current hash: {current_hash}" in captured.out
+    assert f"Stored hash: {stored_hash}" in captured.out
+    assert "Match: False" in captured.out
+
+
+def test_read_file_lines(tmp_path: Path) -> None:
+    """Test read_file_lines helper function."""
+    test_file = tmp_path / "test.txt"
+    test_content = "line1\nline2\nline3\n"
+    test_file.write_text(test_content)
+
+    result = clang_tidy_hash.read_file_lines(test_file)
+
+    assert result == ["line1\n", "line2\n", "line3\n"]
+
+
+def test_read_file_bytes(tmp_path: Path) -> None:
+    """Test read_file_bytes helper function."""
+    test_file = tmp_path / "test.bin"
+    test_content = b"binary content\x00\xff"
+    test_file.write_bytes(test_content)
+
+    result = clang_tidy_hash.read_file_bytes(test_file)
+
+    assert result == test_content
+
+
+def test_write_file_content(tmp_path: Path) -> None:
+    """Test write_file_content helper function."""
+    test_file = tmp_path / "test.txt"
+    test_content = "test content"
+
+    clang_tidy_hash.write_file_content(test_file, test_content)
+
+    assert test_file.read_text() == test_content
+
+
+@pytest.mark.parametrize(
+    ("line", "expected"),
+    [
+        ("clang-tidy==18.1.5", ("clang-tidy", "clang-tidy==18.1.5")),
+        (
+            "clang-tidy==18.1.5  # comment",
+            ("clang-tidy", "clang-tidy==18.1.5  # comment"),
+        ),
+        ("some-package>=1.0,<2.0", ("some-package", "some-package>=1.0,<2.0")),
+        ("pkg_with-dashes==1.0", ("pkg_with-dashes", "pkg_with-dashes==1.0")),
+        ("# just a comment", None),
+        ("", None),
+        ("   ", None),
+        ("invalid line without version", None),
+    ],
+)
+def test_parse_requirement_line(line: str, expected: tuple[str, str] | None) -> None:
+    """Test parsing individual requirement lines."""
+    result = clang_tidy_hash.parse_requirement_line(line)
+    assert result == expected
--- a/tests/script/test_helpers.py
+++ b/tests/script/test_helpers.py
@ -0,0 +1,812 @@
+"""Unit tests for script/helpers.py module."""
+
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+from unittest.mock import Mock, patch
+
+import pytest
+from pytest import MonkeyPatch
+
+# Add the script directory to Python path so we can import helpers
+sys.path.insert(
+    0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "script"))
+)
+
+import helpers  # noqa: E402
+
+changed_files = helpers.changed_files
+filter_changed = helpers.filter_changed
+get_changed_components = helpers.get_changed_components
+_get_changed_files_from_command = helpers._get_changed_files_from_command
+_get_pr_number_from_github_env = helpers._get_pr_number_from_github_env
+_get_changed_files_github_actions = helpers._get_changed_files_github_actions
+_filter_changed_ci = helpers._filter_changed_ci
+_filter_changed_local = helpers._filter_changed_local
+build_all_include = helpers.build_all_include
+print_file_list = helpers.print_file_list
+
+
+@pytest.mark.parametrize(
+    ("github_ref", "expected_pr_number"),
+    [
+        ("refs/pull/1234/merge", "1234"),
+        ("refs/pull/5678/head", "5678"),
+        ("refs/pull/999/merge", "999"),
+        ("refs/heads/main", None),
+        ("", None),
+    ],
+)
+def test_get_pr_number_from_github_env_ref(
+    monkeypatch: MonkeyPatch, github_ref: str, expected_pr_number: str | None
+) -> None:
+    """Test extracting PR number from GITHUB_REF."""
+    monkeypatch.setenv("GITHUB_REF", github_ref)
+    # Make sure GITHUB_EVENT_PATH is not set
+    monkeypatch.delenv("GITHUB_EVENT_PATH", raising=False)
+
+    result = _get_pr_number_from_github_env()
+
+    assert result == expected_pr_number
+
+
+def test_get_pr_number_from_github_env_event_file(
+    monkeypatch: MonkeyPatch, tmp_path: Path
+) -> None:
+    """Test extracting PR number from GitHub event file."""
+    # No PR number in ref
+    monkeypatch.setenv("GITHUB_REF", "refs/heads/feature-branch")
+
+    event_file = tmp_path / "event.json"
+    event_data = {"pull_request": {"number": 5678}}
+    event_file.write_text(json.dumps(event_data))
+    monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file))
+
+    result = _get_pr_number_from_github_env()
+
+    assert result == "5678"
+
+
+def test_get_pr_number_from_github_env_no_pr(
+    monkeypatch: MonkeyPatch, tmp_path: Path
+) -> None:
+    """Test when no PR number is available."""
+    monkeypatch.setenv("GITHUB_REF", "refs/heads/main")
+
+    event_file = tmp_path / "event.json"
+    event_data = {"push": {"head_commit": {"id": "abc123"}}}
+    event_file.write_text(json.dumps(event_data))
+    monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file))
+
+    result = _get_pr_number_from_github_env()
+
+    assert result is None
+
+
+@pytest.mark.parametrize(
+    ("github_ref", "expected_pr_number"),
+    [
+        ("refs/pull/1234/merge", "1234"),
+        ("refs/pull/5678/head", "5678"),
+        ("refs/pull/999/merge", "999"),
+    ],
+)
+def test_github_actions_pull_request_with_pr_number_in_ref(
+    monkeypatch: MonkeyPatch, github_ref: str, expected_pr_number: str
+) -> None:
+    """Test PR detection via GITHUB_REF."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
+    monkeypatch.setenv("GITHUB_REF", github_ref)
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with patch("helpers._get_changed_files_from_command") as mock_get:
+        mock_get.return_value = expected_files
+
+        result = changed_files()
+
+        mock_get.assert_called_once_with(
+            ["gh", "pr", "diff", expected_pr_number, "--name-only"]
+        )
+        assert result == expected_files
+
+
+def test_github_actions_pull_request_with_event_file(
+    monkeypatch: MonkeyPatch, tmp_path: Path
+) -> None:
+    """Test PR detection via GitHub event file."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
+    monkeypatch.setenv("GITHUB_REF", "refs/heads/feature-branch")
+
+    event_file = tmp_path / "event.json"
+    event_data = {"pull_request": {"number": 5678}}
+    event_file.write_text(json.dumps(event_data))
+    monkeypatch.setenv("GITHUB_EVENT_PATH", str(event_file))
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with patch("helpers._get_changed_files_from_command") as mock_get:
+        mock_get.return_value = expected_files
+
+        result = changed_files()
+
+        mock_get.assert_called_once_with(["gh", "pr", "diff", "5678", "--name-only"])
+        assert result == expected_files
+
+
+def test_github_actions_push_event(monkeypatch: MonkeyPatch) -> None:
+    """Test push event handling."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with patch("helpers._get_changed_files_from_command") as mock_get:
+        mock_get.return_value = expected_files
+
+        result = changed_files()
+
+        mock_get.assert_called_once_with(["git", "diff", "HEAD~1..HEAD", "--name-only"])
+        assert result == expected_files
+
+
+def test_get_changed_files_github_actions_pull_request(
+    monkeypatch: MonkeyPatch,
+) -> None:
+    """Test _get_changed_files_github_actions for pull request event."""
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with (
+        patch("helpers._get_pr_number_from_github_env", return_value="1234"),
+        patch("helpers._get_changed_files_from_command") as mock_get,
+    ):
+        mock_get.return_value = expected_files
+
+        result = _get_changed_files_github_actions()
+
+        mock_get.assert_called_once_with(["gh", "pr", "diff", "1234", "--name-only"])
+        assert result == expected_files
+
+
+def test_get_changed_files_github_actions_pull_request_no_pr_number(
+    monkeypatch: MonkeyPatch,
+) -> None:
+    """Test _get_changed_files_github_actions when no PR number is found."""
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "pull_request")
+
+    with patch("helpers._get_pr_number_from_github_env", return_value=None):
+        result = _get_changed_files_github_actions()
+
+        assert result is None
+
+
+def test_get_changed_files_github_actions_push(monkeypatch: MonkeyPatch) -> None:
+    """Test _get_changed_files_github_actions for push event."""
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with patch("helpers._get_changed_files_from_command") as mock_get:
+        mock_get.return_value = expected_files
+
+        result = _get_changed_files_github_actions()
+
+        mock_get.assert_called_once_with(["git", "diff", "HEAD~1..HEAD", "--name-only"])
+        assert result == expected_files
+
+
+def test_get_changed_files_github_actions_push_fallback(
+    monkeypatch: MonkeyPatch,
+) -> None:
+    """Test _get_changed_files_github_actions fallback for push event."""
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
+
+    with patch("helpers._get_changed_files_from_command") as mock_get:
+        mock_get.side_effect = Exception("Failed")
+
+        result = _get_changed_files_github_actions()
+
+        assert result is None
+
+
+def test_get_changed_files_github_actions_other_event(monkeypatch: MonkeyPatch) -> None:
+    """Test _get_changed_files_github_actions for other event types."""
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "workflow_dispatch")
+
+    result = _get_changed_files_github_actions()
+
+    assert result is None
+
+
+def test_github_actions_push_event_fallback(monkeypatch: MonkeyPatch) -> None:
+    """Test push event fallback to git merge-base."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+    monkeypatch.setenv("GITHUB_EVENT_NAME", "push")
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with (
+        patch("helpers._get_changed_files_from_command") as mock_get,
+        patch("helpers.get_output") as mock_output,
+    ):
+        # First call fails, triggering fallback
+        mock_get.side_effect = [
+            Exception("Failed"),
+            expected_files,
+        ]
+
+        mock_output.side_effect = [
+            "origin\nupstream\n",  # git remote
+            "abc123\n",  # merge base
+        ]
+
+        result = changed_files()
+
+        assert mock_get.call_count == 2
+        assert result == expected_files
+
+
+@pytest.mark.parametrize(
+    ("branch", "merge_base"),
+    [
+        (None, "abc123"),  # Default branch (dev)
+        ("release", "def456"),
+        ("beta", "ghi789"),
+    ],
+)
+def test_local_development_branches(
+    monkeypatch: MonkeyPatch, branch: str | None, merge_base: str
+) -> None:
+    """Test local development with different branches."""
+    monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
+
+    expected_files = ["file1.py", "file2.cpp"]
+
+    with (
+        patch("helpers.get_output") as mock_output,
+        patch("helpers._get_changed_files_from_command") as mock_get,
+    ):
+        if branch is None:
+            # For default branch, helpers.get_output is called twice (git remote and merge-base)
+            mock_output.side_effect = [
+                "origin\nupstream\n",  # git remote
+                f"{merge_base}\n",  # merge base for upstream/dev
+            ]
+        else:
+            # For custom branch, may need more calls if trying multiple remotes
+            mock_output.side_effect = [
+                "origin\nupstream\n",  # git remote
+                Exception("not found"),  # upstream/{branch} may fail
+                f"{merge_base}\n",  # merge base for origin/{branch}
+            ]
+
+        mock_get.return_value = expected_files
+
+        result = changed_files(branch)
+
+        mock_get.assert_called_once_with(["git", "diff", merge_base, "--name-only"])
+        assert result == expected_files
+
+
+def test_local_development_no_remotes_configured(monkeypatch: MonkeyPatch) -> None:
+    """Test error when no git remotes are configured."""
+    monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
+
+    with patch("helpers.get_output") as mock_output:
+        # The function calls get_output multiple times:
+        # 1. First to get list of remotes: git remote
+        # 2. Then for each remote it tries: git merge-base
+        # We simulate having some remotes but all merge-base attempts fail
+        def side_effect_func(*args):
+            if args == ("git", "remote"):
+                return "origin\nupstream\n"
+            else:
+                # All merge-base attempts fail
+                raise Exception("Command failed")
+
+        mock_output.side_effect = side_effect_func
+
+        with pytest.raises(ValueError, match="Git not configured"):
+            changed_files()
+
+
+@pytest.mark.parametrize(
+    ("stdout", "expected"),
+    [
+        ("file1.py\nfile2.cpp\n\n", ["file1.py", "file2.cpp"]),
+        ("\n\n", []),
+        ("single.py\n", ["single.py"]),
+        (
+            "path/to/file.cpp\nanother/path.h\n",
+            ["another/path.h", "path/to/file.cpp"],
+        ),  # Sorted
+    ],
+)
+def test_get_changed_files_from_command_successful(
+    stdout: str, expected: list[str]
+) -> None:
+    """Test successful command execution with various outputs."""
+    mock_result = Mock()
+    mock_result.returncode = 0
+    mock_result.stdout = stdout
+
+    with patch("subprocess.run", return_value=mock_result):
+        result = _get_changed_files_from_command(["git", "diff"])
+
+        # Normalize paths to forward slashes for comparison
+        # since os.path.relpath returns OS-specific separators
+        normalized_result = [f.replace(os.sep, "/") for f in result]
+        assert normalized_result == expected
+
+
+@pytest.mark.parametrize(
+    ("returncode", "stderr"),
+    [
+        (1, "Error: command failed"),
+        (128, "fatal: not a git repository"),
+        (2, "Unknown error"),
+    ],
+)
+def test_get_changed_files_from_command_failed(returncode: int, stderr: str) -> None:
+    """Test command failure handling."""
+    mock_result = Mock()
+    mock_result.returncode = returncode
+    mock_result.stderr = stderr
+
+    with patch("subprocess.run", return_value=mock_result):
+        with pytest.raises(Exception) as exc_info:
+            _get_changed_files_from_command(["git", "diff"])
+        assert "Command failed" in str(exc_info.value)
+        assert stderr in str(exc_info.value)
+
+
+def test_get_changed_files_from_command_relative_paths() -> None:
+    """Test that paths are made relative to current directory."""
+    mock_result = Mock()
+    mock_result.returncode = 0
+    mock_result.stdout = "/some/project/file1.py\n/some/project/sub/file2.cpp\n"
+
+    with (
+        patch("subprocess.run", return_value=mock_result),
+        patch(
+            "os.path.relpath", side_effect=["file1.py", "sub/file2.cpp"]
+        ) as mock_relpath,
+        patch("os.getcwd", return_value="/some/project"),
+    ):
+        result = _get_changed_files_from_command(["git", "diff"])
+
+        # Check relpath was called with correct arguments
+        assert mock_relpath.call_count == 2
+        assert result == ["file1.py", "sub/file2.cpp"]
+
+
+@pytest.mark.parametrize(
+    "changed_files_list",
+    [
+        ["esphome/core/component.h", "esphome/components/wifi/wifi.cpp"],
+        ["esphome/core/helpers.cpp"],
+        ["esphome/core/application.h", "esphome/core/defines.h"],
+    ],
+)
+def test_get_changed_components_core_files_trigger_full_scan(
+    changed_files_list: list[str],
+) -> None:
+    """Test that core file changes trigger full scan without calling subprocess."""
+    with patch("helpers.changed_files") as mock_changed:
+        mock_changed.return_value = changed_files_list
+
+        # Should return None without calling subprocess
+        result = get_changed_components()
+        assert result is None
+
+
+@pytest.mark.parametrize(
+    ("changed_files_list", "expected"),
+    [
+        # Only component files changed
+        (
+            ["esphome/components/wifi/wifi.cpp", "esphome/components/api/api.cpp"],
+            ["wifi", "api"],
+        ),
+        # Non-component files only
+        (["README.md", "script/clang-tidy"], []),
+        # Single component
+        (["esphome/components/mqtt/mqtt_client.cpp"], ["mqtt"]),
+    ],
+)
+def test_get_changed_components_returns_component_list(
+    changed_files_list: list[str], expected: list[str]
+) -> None:
+    """Test component detection returns correct component list."""
+    with patch("helpers.changed_files") as mock_changed:
+        mock_changed.return_value = changed_files_list
+
+        mock_result = Mock()
+        mock_result.stdout = "\n".join(expected) + "\n" if expected else "\n"
+
+        with patch("subprocess.run", return_value=mock_result):
+            result = get_changed_components()
+            assert result == expected
+
+
+def test_get_changed_components_script_failure() -> None:
+    """Test fallback to full scan when script fails."""
+    with patch("helpers.changed_files") as mock_changed:
+        mock_changed.return_value = ["esphome/components/wifi/wifi_component.cpp"]
+
+        with patch("subprocess.run") as mock_run:
+            mock_run.side_effect = subprocess.CalledProcessError(1, "cmd")
+
+            result = get_changed_components()
+
+            assert result is None  # None means full scan
+
+
+@pytest.mark.parametrize(
+    ("components", "all_files", "expected_files"),
+    [
+        # Core files changed (full scan)
+        (
+            None,
+            ["esphome/components/wifi/wifi.cpp", "esphome/core/helpers.cpp"],
+            ["esphome/components/wifi/wifi.cpp", "esphome/core/helpers.cpp"],
+        ),
+        # Specific components
+        (
+            ["wifi", "api"],
+            [
+                "esphome/components/wifi/wifi.cpp",
+                "esphome/components/wifi/wifi.h",
+                "esphome/components/api/api.cpp",
+                "esphome/components/mqtt/mqtt.cpp",
+            ],
+            [
+                "esphome/components/wifi/wifi.cpp",
+                "esphome/components/wifi/wifi.h",
+                "esphome/components/api/api.cpp",
+            ],
+        ),
+        # No components changed
+        (
+            [],
+            ["esphome/components/wifi/wifi.cpp", "script/clang-tidy"],
+            ["script/clang-tidy"],  # Only non-component changed files
+        ),
+    ],
+)
+def test_filter_changed_ci_mode(
+    monkeypatch: MonkeyPatch,
+    components: list[str] | None,
+    all_files: list[str],
+    expected_files: list[str],
+) -> None:
+    """Test filter_changed in CI mode with different component scenarios."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+
+    with patch("helpers.get_changed_components") as mock_components:
+        mock_components.return_value = components
+
+        if components == []:
+            # No components changed scenario needs changed_files mock
+            with patch("helpers.changed_files") as mock_changed:
+                mock_changed.return_value = ["script/clang-tidy", "README.md"]
+                result = filter_changed(all_files)
+        else:
+            result = filter_changed(all_files)
+
+        assert set(result) == set(expected_files)
+
+
+def test_filter_changed_local_mode(monkeypatch: MonkeyPatch) -> None:
+    """Test filter_changed in local mode filters files directly."""
+    monkeypatch.delenv("GITHUB_ACTIONS", raising=False)
+
+    all_files = [
+        "esphome/components/wifi/wifi.cpp",
+        "esphome/components/api/api.cpp",
+        "esphome/core/helpers.cpp",
+    ]
+
+    with patch("helpers.changed_files") as mock_changed:
+        mock_changed.return_value = [
+            "esphome/components/wifi/wifi.cpp",
+            "esphome/core/helpers.cpp",
+        ]
+
+        result = filter_changed(all_files)
+
+        # Should only include files that actually changed
+        expected = ["esphome/components/wifi/wifi.cpp", "esphome/core/helpers.cpp"]
+        assert set(result) == set(expected)
+
+
+def test_filter_changed_component_path_parsing(monkeypatch: MonkeyPatch) -> None:
+    """Test correct parsing of component paths."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+
+    all_files = [
+        "esphome/components/wifi/wifi_component.cpp",
+        "esphome/components/wifi_info/wifi_info_text_sensor.cpp",  # Different component
+        "esphome/components/api/api_server.cpp",
+        "esphome/components/api/custom_api_device.h",
+    ]
+
+    with patch("helpers.get_changed_components") as mock_components:
+        mock_components.return_value = ["wifi"]  # Only wifi, not wifi_info
+
+        result = filter_changed(all_files)
+
+        # Should only include files from wifi component, not wifi_info
+        expected = ["esphome/components/wifi/wifi_component.cpp"]
+        assert result == expected
+
+
+def test_filter_changed_prints_output(
+    monkeypatch: MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """Test that appropriate messages are printed."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+
+    all_files = ["esphome/components/wifi/wifi_component.cpp"]
+
+    with patch("helpers.get_changed_components") as mock_components:
+        mock_components.return_value = ["wifi"]
+
+        filter_changed(all_files)
+
+        # Check that output was produced (not checking exact messages)
+        captured = capsys.readouterr()
+        assert len(captured.out) > 0
+
+
+@pytest.mark.parametrize(
+    ("files", "expected_empty"),
+    [
+        ([], True),
+        (["file.cpp"], False),
+    ],
+    ids=["empty_files", "non_empty_files"],
+)
+def test_filter_changed_empty_file_handling(
+    monkeypatch: MonkeyPatch, files: list[str], expected_empty: bool
+) -> None:
+    """Test handling of empty file lists."""
+    monkeypatch.setenv("GITHUB_ACTIONS", "true")
+
+    with patch("helpers.get_changed_components") as mock_components:
+        mock_components.return_value = ["wifi"]
+
+        result = filter_changed(files)
+
+        # Both cases should be empty:
+        # - Empty files list -> empty result
+        # - file.cpp doesn't match esphome/components/wifi/* pattern -> filtered out
+        assert len(result) == 0
+
+
+def test_filter_changed_ci_full_scan() -> None:
+    """Test _filter_changed_ci when core files changed (full scan)."""
+    all_files = ["esphome/components/wifi/wifi.cpp", "esphome/core/helpers.cpp"]
+
+    with patch("helpers.get_changed_components", return_value=None):
+        result = _filter_changed_ci(all_files)
+
+    # Should return all files for full scan
+    assert result == all_files
+
+
+def test_filter_changed_ci_no_components_changed() -> None:
+    """Test _filter_changed_ci when no components changed."""
+    all_files = ["esphome/components/wifi/wifi.cpp", "script/clang-tidy", "README.md"]
+
+    with (
+        patch("helpers.get_changed_components", return_value=[]),
+        patch("helpers.changed_files", return_value=["script/clang-tidy", "README.md"]),
+    ):
+        result = _filter_changed_ci(all_files)
+
+    # Should only include non-component files that changed
+    assert set(result) == {"script/clang-tidy", "README.md"}
+
+
+def test_filter_changed_ci_specific_components() -> None:
+    """Test _filter_changed_ci with specific components changed."""
+    all_files = [
+        "esphome/components/wifi/wifi.cpp",
+        "esphome/components/wifi/wifi.h",
+        "esphome/components/api/api.cpp",
+        "esphome/components/mqtt/mqtt.cpp",
+    ]
+
+    with patch("helpers.get_changed_components", return_value=["wifi", "api"]):
+        result = _filter_changed_ci(all_files)
+
+    # Should include all files from wifi and api components
+    expected = [
+        "esphome/components/wifi/wifi.cpp",
+        "esphome/components/wifi/wifi.h",
+        "esphome/components/api/api.cpp",
+    ]
+    assert set(result) == set(expected)
+
+
+def test_filter_changed_local() -> None:
+    """Test _filter_changed_local filters based on git changes."""
+    all_files = [
+        "esphome/components/wifi/wifi.cpp",
+        "esphome/components/api/api.cpp",
+        "esphome/core/helpers.cpp",
+    ]
+
+    with patch("helpers.changed_files") as mock_changed:
+        mock_changed.return_value = [
+            "esphome/components/wifi/wifi.cpp",
+            "esphome/core/helpers.cpp",
+        ]
+
+        result = _filter_changed_local(all_files)
+
+    # Should only include files that actually changed
+    expected = ["esphome/components/wifi/wifi.cpp", "esphome/core/helpers.cpp"]
+    assert set(result) == set(expected)
+
+
+def test_build_all_include_with_git(tmp_path: Path) -> None:
+    """Test build_all_include using git ls-files."""
+    # Mock git output
+    git_output = "esphome/core/component.h\nesphome/components/wifi/wifi.h\nesphome/components/api/api.h\n"
+
+    mock_proc = Mock()
+    mock_proc.returncode = 0
+    mock_proc.stdout = git_output
+
+    with (
+        patch("subprocess.run", return_value=mock_proc),
+        patch("helpers.temp_header_file", str(tmp_path / "all-include.cpp")),
+    ):
+        build_all_include()
+
+    # Check the generated file
+    include_file = tmp_path / "all-include.cpp"
+    assert include_file.exists()
+
+    content = include_file.read_text()
+    expected_lines = [
+        '#include "esphome/components/api/api.h"',
+        '#include "esphome/components/wifi/wifi.h"',
+        '#include "esphome/core/component.h"',
+        "",  # Empty line at end
+    ]
+    assert content == "\n".join(expected_lines)
+
+
+def test_build_all_include_empty_output(tmp_path: Path) -> None:
+    """Test build_all_include with empty git output."""
+    # Mock git returning empty output
+    mock_proc = Mock()
+    mock_proc.returncode = 0
+    mock_proc.stdout = ""
+
+    with (
+        patch("subprocess.run", return_value=mock_proc),
+        patch("helpers.temp_header_file", str(tmp_path / "all-include.cpp")),
+    ):
+        build_all_include()
+
+    # Check the generated file
+    include_file = tmp_path / "all-include.cpp"
+    assert include_file.exists()
+
+    content = include_file.read_text()
+    # When git output is empty, the list comprehension filters out empty strings,
+    # then we append "" to get [""], which joins to just ""
+    assert content == ""
+
+
+def test_build_all_include_creates_directory(tmp_path: Path) -> None:
+    """Test that build_all_include creates the temp directory if needed."""
+    # Use a subdirectory that doesn't exist
+    temp_file = tmp_path / "subdir" / "all-include.cpp"
+
+    mock_proc = Mock()
+    mock_proc.returncode = 0
+    mock_proc.stdout = "esphome/core/test.h\n"
+
+    with (
+        patch("subprocess.run", return_value=mock_proc),
+        patch("helpers.temp_header_file", str(temp_file)),
+    ):
+        build_all_include()
+
+    # Check that directory was created
+    assert temp_file.parent.exists()
+    assert temp_file.exists()
+
+
+def test_print_file_list_empty(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test printing an empty file list."""
+    print_file_list([], "Test Files:")
+    captured = capsys.readouterr()
+
+    assert "Test Files:" in captured.out
+    assert "No files to check!" in captured.out
+
+
+def test_print_file_list_small(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test printing a small list of files (less than max_files)."""
+    files = ["file1.cpp", "file2.cpp", "file3.cpp"]
+    print_file_list(files, "Test Files:", max_files=20)
+    captured = capsys.readouterr()
+
+    assert "Test Files:" in captured.out
+    assert "    file1.cpp" in captured.out
+    assert "    file2.cpp" in captured.out
+    assert "    file3.cpp" in captured.out
+    assert "... and" not in captured.out
+
+
+def test_print_file_list_exact_max_files(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test printing exactly max_files number of files."""
+    files = [f"file{i}.cpp" for i in range(20)]
+    print_file_list(files, "Test Files:", max_files=20)
+    captured = capsys.readouterr()
+
+    # All files should be shown
+    for i in range(20):
+        assert f"    file{i}.cpp" in captured.out
+    assert "... and" not in captured.out
+
+
+def test_print_file_list_large(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test printing a large list of files (more than max_files)."""
+    files = [f"file{i:03d}.cpp" for i in range(50)]
+    print_file_list(files, "Test Files:", max_files=20)
+    captured = capsys.readouterr()
+
+    assert "Test Files:" in captured.out
+    # First 10 files should be shown (sorted)
+    for i in range(10):
+        assert f"    file{i:03d}.cpp" in captured.out
+    # Files 10-49 should not be shown
+    assert "    file010.cpp" not in captured.out
+    assert "    file049.cpp" not in captured.out
+    # Should show count of remaining files
+    assert "... and 40 more files" in captured.out
+
+
+def test_print_file_list_unsorted(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test that files are sorted before printing."""
+    files = ["z_file.cpp", "a_file.cpp", "m_file.cpp"]
+    print_file_list(files, "Test Files:", max_files=20)
+    captured = capsys.readouterr()
+
+    lines = captured.out.strip().split("\n")
+    # Check order in output
+    assert lines[1] == "    a_file.cpp"
+    assert lines[2] == "    m_file.cpp"
+    assert lines[3] == "    z_file.cpp"
+
+
+def test_print_file_list_custom_max_files(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test with custom max_files parameter."""
+    files = [f"file{i}.cpp" for i in range(15)]
+    print_file_list(files, "Test Files:", max_files=10)
+    captured = capsys.readouterr()
+
+    # Should truncate after 10 files
+    assert "... and 5 more files" in captured.out
+
+
+def test_print_file_list_default_title(capsys: pytest.CaptureFixture[str]) -> None:
+    """Test with default title."""
+    print_file_list(["test.cpp"])
+    captured = capsys.readouterr()
+
+    assert "Files:" in captured.out
+    assert "    test.cpp" in captured.out
				`@ -0,0 +1 @@`
				`a3cdfc378d28b53b416a1d5bf0ab9077ee18867f0d39436ea8013cf5a4ead87a`