From b12d7db5a7deea2098b298db600fd5c921f602a2 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 19:27:33 -0500 Subject: [PATCH 01/12] prevent future refactoring errors --- esphome/core/scheduler.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index f3f78d39af..79a411db92 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -99,9 +99,15 @@ class Scheduler { SchedulerItem(const SchedulerItem &) = delete; SchedulerItem &operator=(const SchedulerItem &) = delete; - // Default move operations - SchedulerItem(SchedulerItem &&) = default; - SchedulerItem &operator=(SchedulerItem &&) = default; + // Delete move operations to prevent accidental moves of SchedulerItem objects. + // This is intentional because: + // 1. SchedulerItem contains a dynamically allocated name that requires careful ownership management + // 2. The scheduler only moves unique_ptr, never SchedulerItem objects directly + // 3. Moving unique_ptr only transfers pointer ownership without moving the pointed-to object + // 4. Deleting these operations makes it explicit that SchedulerItem objects should not be moved + // 5. This prevents potential double-free bugs if the code is refactored to move SchedulerItem objects + SchedulerItem(SchedulerItem &&) = delete; + SchedulerItem &operator=(SchedulerItem &&) = delete; // Helper to get the name regardless of storage type const char *get_name() const { return name_is_dynamic ? name_.dynamic_name : name_.static_name; } From 1368139f4d7aa75c219ca4fa5cab182ed9aa5539 Mon Sep 17 00:00:00 2001 From: Jan-Henrik Bruhn Date: Mon, 7 Jul 2025 02:36:09 +0200 Subject: [PATCH 02/12] [update, http_request_update] Implement update available trigger (#9174) --- .../update/http_request_update.cpp | 33 +++++++++++++++---- esphome/components/update/update_entity.h | 8 +++++ tests/components/http_request/common.yaml | 2 ++ tests/components/update/common.yaml | 2 ++ 4 files changed, 38 insertions(+), 7 deletions(-) diff --git a/esphome/components/http_request/update/http_request_update.cpp b/esphome/components/http_request/update/http_request_update.cpp index 6bc88ae49a..202c7b88b2 100644 --- a/esphome/components/http_request/update/http_request_update.cpp +++ b/esphome/components/http_request/update/http_request_update.cpp @@ -50,7 +50,8 @@ void HttpRequestUpdate::update_task(void *params) { if (container == nullptr || container->status_code != HTTP_STATUS_OK) { std::string msg = str_sprintf("Failed to fetch manifest from %s", this_update->source_url_.c_str()); - this_update->status_set_error(msg.c_str()); + // Defer to main loop to avoid race condition on component_state_ read-modify-write + this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); }); UPDATE_RETURN; } @@ -58,7 +59,8 @@ void HttpRequestUpdate::update_task(void *params) { uint8_t *data = allocator.allocate(container->content_length); if (data == nullptr) { std::string msg = str_sprintf("Failed to allocate %zu bytes for manifest", container->content_length); - this_update->status_set_error(msg.c_str()); + // Defer to main loop to avoid race condition on component_state_ read-modify-write + this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); }); container->end(); UPDATE_RETURN; } @@ -120,7 +122,8 @@ void HttpRequestUpdate::update_task(void *params) { if (!valid) { std::string msg = str_sprintf("Failed to parse JSON from %s", this_update->source_url_.c_str()); - this_update->status_set_error(msg.c_str()); + // Defer to main loop to avoid race condition on component_state_ read-modify-write + this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); }); UPDATE_RETURN; } @@ -147,18 +150,34 @@ void HttpRequestUpdate::update_task(void *params) { this_update->update_info_.current_version = current_version; } + bool trigger_update_available = false; + if (this_update->update_info_.latest_version.empty() || this_update->update_info_.latest_version == this_update->update_info_.current_version) { this_update->state_ = update::UPDATE_STATE_NO_UPDATE; } else { + if (this_update->state_ != update::UPDATE_STATE_AVAILABLE) { + trigger_update_available = true; + } this_update->state_ = update::UPDATE_STATE_AVAILABLE; } - this_update->update_info_.has_progress = false; - this_update->update_info_.progress = 0.0f; + // Defer to main loop to ensure thread-safe execution of: + // - status_clear_error() performs non-atomic read-modify-write on component_state_ + // - publish_state() triggers API callbacks that write to the shared protobuf buffer + // which can be corrupted if accessed concurrently from task and main loop threads + // - update_available trigger to ensure consistent state when the trigger fires + this_update->defer([this_update, trigger_update_available]() { + this_update->update_info_.has_progress = false; + this_update->update_info_.progress = 0.0f; - this_update->status_clear_error(); - this_update->publish_state(); + this_update->status_clear_error(); + this_update->publish_state(); + + if (trigger_update_available) { + this_update->get_update_available_trigger()->trigger(this_update->update_info_); + } + }); UPDATE_RETURN; } diff --git a/esphome/components/update/update_entity.h b/esphome/components/update/update_entity.h index 169e580457..9424e80b9f 100644 --- a/esphome/components/update/update_entity.h +++ b/esphome/components/update/update_entity.h @@ -1,5 +1,6 @@ #pragma once +#include #include "esphome/core/automation.h" #include "esphome/core/component.h" #include "esphome/core/entity_base.h" @@ -38,12 +39,19 @@ class UpdateEntity : public EntityBase, public EntityBase_DeviceClass { const UpdateState &state = state_; void add_on_state_callback(std::function &&callback) { this->state_callback_.add(std::move(callback)); } + Trigger *get_update_available_trigger() { + if (!update_available_trigger_) { + update_available_trigger_ = std::make_unique>(); + } + return update_available_trigger_.get(); + } protected: UpdateState state_{UPDATE_STATE_UNKNOWN}; UpdateInfo update_info_; CallbackManager state_callback_{}; + std::unique_ptr> update_available_trigger_{nullptr}; }; } // namespace update diff --git a/tests/components/http_request/common.yaml b/tests/components/http_request/common.yaml index af4852901f..97961007e2 100644 --- a/tests/components/http_request/common.yaml +++ b/tests/components/http_request/common.yaml @@ -91,3 +91,5 @@ update: name: OTA Update id: ota_update source: http://my.ha.net:8123/local/esphome/manifest.json + on_update_available: + - logger.log: "A new update is available" diff --git a/tests/components/update/common.yaml b/tests/components/update/common.yaml index dcb4f42527..45ed110352 100644 --- a/tests/components/update/common.yaml +++ b/tests/components/update/common.yaml @@ -26,3 +26,5 @@ update: - platform: http_request name: Firmware Update source: http://example.com/manifest.json + on_update_available: + - logger.log: "A new update is available" From 4cafa18fa415f53798440c572243aae8768d3201 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 19:46:23 -0500 Subject: [PATCH 03/12] fix another race --- esphome/core/scheduler.cpp | 22 ++++++++++++++++------ esphome/core/scheduler.h | 8 +++++++- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 0c4a4ff230..073eeb4a45 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -383,17 +383,27 @@ void HOT Scheduler::process_to_add() { this->to_add_.clear(); } void HOT Scheduler::cleanup_() { + // Fast path: if nothing to remove, just return + // Reading to_remove_ without lock is safe because: + // 1. It's volatile, ensuring we read the latest value + // 2. If it's 0, there's definitely nothing to cleanup + // 3. If it becomes non-zero after we check, cleanup will happen next time + if (this->to_remove_ == 0) + return; + + // We must hold the lock for the entire cleanup operation because: + // 1. We're modifying items_ (via pop_raw_) which other threads may be reading/writing + // 2. We're decrementing to_remove_ which must be synchronized with increments + // 3. We need a consistent view of items_ throughout the iteration + // 4. Other threads might be adding items or modifying the heap structure + // Without the lock, we could have race conditions leading to crashes or corruption + LockGuard guard{this->lock_}; while (!this->items_.empty()) { auto &item = this->items_[0]; if (!item->remove) return; - this->to_remove_--; - - { - LockGuard guard{this->lock_}; - this->pop_raw_(); - } + this->pop_raw_(); } } void HOT Scheduler::pop_raw_() { diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 79a411db92..3bd4009d27 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -185,6 +185,12 @@ class Scheduler { return item->remove || (item->component != nullptr && item->component->is_failed()); } + // Check if the scheduler has no items. + // IMPORTANT: This method should only be called from the main thread (loop task). + // It performs cleanup of removed items and checks if the queue is empty. + // The items_.empty() check at the end is done without a lock for performance, + // which is safe because this is only called from the main thread while other + // threads only add items (never remove them). bool empty_() { this->cleanup_(); return this->items_.empty(); @@ -202,7 +208,7 @@ class Scheduler { #endif uint32_t last_millis_{0}; uint16_t millis_major_{0}; - uint32_t to_remove_{0}; + volatile uint32_t to_remove_{0}; }; } // namespace esphome From 932d0a5d8b9d00e2f917d1e14ca20fe9b1c6493b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 19:50:54 -0500 Subject: [PATCH 04/12] fix another race --- esphome/core/scheduler.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 073eeb4a45..64b44a3153 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -392,11 +392,13 @@ void HOT Scheduler::cleanup_() { return; // We must hold the lock for the entire cleanup operation because: - // 1. We're modifying items_ (via pop_raw_) which other threads may be reading/writing - // 2. We're decrementing to_remove_ which must be synchronized with increments - // 3. We need a consistent view of items_ throughout the iteration - // 4. Other threads might be adding items or modifying the heap structure - // Without the lock, we could have race conditions leading to crashes or corruption + // 1. We're modifying items_ (via pop_raw_) which requires exclusive access + // 2. We're decrementing to_remove_ which is also modified by other threads + // (though all modifications are already under lock) + // 3. Other threads read items_ when searching for items to cancel in cancel_item_locked_() + // 4. We need a consistent view of items_ and to_remove_ throughout the operation + // Without the lock, we could access items_ while another thread is reading it, + // leading to race conditions LockGuard guard{this->lock_}; while (!this->items_.empty()) { auto &item = this->items_[0]; From 90fcb5fbcd9714059c0df687c820717028a92642 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 19:54:07 -0500 Subject: [PATCH 05/12] fix another race --- esphome/core/scheduler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 64b44a3153..2954f6d1e6 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -220,6 +220,9 @@ bool HOT Scheduler::cancel_retry(Component *component, const std::string &name) } optional HOT Scheduler::next_schedule_in() { + // IMPORTANT: This method should only be called from the main thread (loop task). + // It calls empty_() and accesses items_[0] without holding a lock, which is only + // safe when called from the main thread. Other threads must not call this method. if (this->empty_()) return {}; auto &item = this->items_[0]; From dc8714c277ecf1c562c0c4235c2bea6d2775e181 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 19:59:11 -0500 Subject: [PATCH 06/12] fix race --- .../rapid_cancellation_component.cpp | 3 +++ tests/integration/test_scheduler_rapid_cancellation.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/integration/fixtures/external_components/scheduler_rapid_cancellation_component/rapid_cancellation_component.cpp b/tests/integration/fixtures/external_components/scheduler_rapid_cancellation_component/rapid_cancellation_component.cpp index cd4e019882..b735c453f2 100644 --- a/tests/integration/fixtures/external_components/scheduler_rapid_cancellation_component/rapid_cancellation_component.cpp +++ b/tests/integration/fixtures/external_components/scheduler_rapid_cancellation_component/rapid_cancellation_component.cpp @@ -70,6 +70,9 @@ void SchedulerRapidCancellationComponent::run_rapid_cancellation_test() { ESP_LOGI(TAG, " Implicit cancellations (replaced): %d", implicit_cancellations); ESP_LOGI(TAG, " Total accounted: %d (executed + implicit cancellations)", this->total_executed_.load() + implicit_cancellations); + + // Final message to signal test completion - ensures all stats are logged before test ends + ESP_LOGI(TAG, "Test finished - all statistics reported"); }); } diff --git a/tests/integration/test_scheduler_rapid_cancellation.py b/tests/integration/test_scheduler_rapid_cancellation.py index 89c41a4c33..90577f36f1 100644 --- a/tests/integration/test_scheduler_rapid_cancellation.py +++ b/tests/integration/test_scheduler_rapid_cancellation.py @@ -74,9 +74,9 @@ async def test_scheduler_rapid_cancellation( test_complete_future.set_exception(Exception(f"Crash detected: {line}")) return - # Check for completion + # Check for completion - wait for final message after all stats are logged if ( - "Rapid cancellation test complete" in line + "Test finished - all statistics reported" in line and not test_complete_future.done() ): test_complete_future.set_result(None) From 2cfeccfd71256ce36ba44cf948a06b995c294217 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:13:21 -0500 Subject: [PATCH 07/12] cleanup locking --- esphome/core/scheduler.cpp | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 2954f6d1e6..75b73e910d 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -294,29 +294,27 @@ void HOT Scheduler::call() { } #endif // ESPHOME_DEBUG_SCHEDULER - auto to_remove_was = this->to_remove_; - auto items_was = this->items_.size(); // If we have too many items to remove if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) { + // We hold the lock for the entire cleanup operation because: + // 1. We're rebuilding the entire items_ list, so we need exclusive access throughout + // 2. Other threads must see either the old state or the new state, not intermediate states + // 3. The operation is already expensive (O(n)), so lock overhead is negligible + // 4. No operations inside can block or take other locks, so no deadlock risk + LockGuard guard{this->lock_}; + std::vector> valid_items; - while (!this->empty_()) { - LockGuard guard{this->lock_}; - auto item = std::move(this->items_[0]); - this->pop_raw_(); - valid_items.push_back(std::move(item)); + + // Move all non-removed items to valid_items + for (auto &item : this->items_) { + if (!item->remove) { + valid_items.push_back(std::move(item)); + } } - { - LockGuard guard{this->lock_}; - this->items_ = std::move(valid_items); - } - - // The following should not happen unless I'm missing something - if (this->to_remove_ != 0) { - ESP_LOGW(TAG, "to_remove_ was %" PRIu32 " now: %" PRIu32 " items where %zu now %zu. Please report this", - to_remove_was, to_remove_, items_was, items_.size()); - this->to_remove_ = 0; - } + // Replace items_ with the filtered list + this->items_ = std::move(valid_items); + this->to_remove_ = 0; } while (!this->empty_()) { From fb3c092eaaeb0088d5b70b1cd463d4326a5ef33c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:25:27 -0500 Subject: [PATCH 08/12] cleanup --- esphome/core/scheduler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 3bd4009d27..cdb6431f89 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -208,7 +208,7 @@ class Scheduler { #endif uint32_t last_millis_{0}; uint16_t millis_major_{0}; - volatile uint32_t to_remove_{0}; + uint32_t to_remove_{0}; }; } // namespace esphome From a0d239234470fdddbe4cd66c9c8666b6809b526d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:26:43 -0500 Subject: [PATCH 09/12] cleanup --- esphome/core/scheduler.cpp | 2 +- .../fixtures/scheduler_bulk_cleanup.yaml | 23 ++++ .../test_scheduler_bulk_cleanup.py | 110 ++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 tests/integration/fixtures/scheduler_bulk_cleanup.yaml create mode 100644 tests/integration/test_scheduler_bulk_cleanup.py diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 75b73e910d..65d2c94bbf 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -386,7 +386,7 @@ void HOT Scheduler::process_to_add() { void HOT Scheduler::cleanup_() { // Fast path: if nothing to remove, just return // Reading to_remove_ without lock is safe because: - // 1. It's volatile, ensuring we read the latest value + // 1. We only call this from the main thread during call() // 2. If it's 0, there's definitely nothing to cleanup // 3. If it becomes non-zero after we check, cleanup will happen next time if (this->to_remove_ == 0) diff --git a/tests/integration/fixtures/scheduler_bulk_cleanup.yaml b/tests/integration/fixtures/scheduler_bulk_cleanup.yaml new file mode 100644 index 0000000000..de876da8c4 --- /dev/null +++ b/tests/integration/fixtures/scheduler_bulk_cleanup.yaml @@ -0,0 +1,23 @@ +esphome: + name: scheduler-bulk-cleanup + +external_components: + - source: + type: local + path: EXTERNAL_COMPONENT_PATH + +host: + +logger: + level: DEBUG + +api: + services: + - service: trigger_bulk_cleanup + then: + - lambda: |- + auto component = id(bulk_cleanup_component); + component->trigger_bulk_cleanup(); + +scheduler_bulk_cleanup_component: + id: bulk_cleanup_component diff --git a/tests/integration/test_scheduler_bulk_cleanup.py b/tests/integration/test_scheduler_bulk_cleanup.py new file mode 100644 index 0000000000..25219b8e1a --- /dev/null +++ b/tests/integration/test_scheduler_bulk_cleanup.py @@ -0,0 +1,110 @@ +"""Test that triggers the bulk cleanup path when to_remove_ > MAX_LOGICALLY_DELETED_ITEMS.""" + +import asyncio +from pathlib import Path +import re + +from aioesphomeapi import UserService +import pytest + +from .types import APIClientConnectedFactory, RunCompiledFunction + + +@pytest.mark.asyncio +async def test_scheduler_bulk_cleanup( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """Test that bulk cleanup path is triggered when many items are cancelled.""" + + # Get the absolute path to the external components directory + external_components_path = str( + Path(__file__).parent / "fixtures" / "external_components" + ) + + # Replace the placeholder in the YAML config with the actual path + yaml_config = yaml_config.replace( + "EXTERNAL_COMPONENT_PATH", external_components_path + ) + + # Create a future to signal test completion + loop = asyncio.get_event_loop() + test_complete_future: asyncio.Future[None] = loop.create_future() + bulk_cleanup_triggered = False + cleanup_stats: dict[str, int] = { + "removed": 0, + "before": 0, + "after": 0, + } + + def on_log_line(line: str) -> None: + nonlocal bulk_cleanup_triggered + + # Look for logs indicating bulk cleanup was triggered + # The actual cleanup happens silently, so we track the cancel operations + if "Successfully cancelled" in line and "timeouts" in line: + match = re.search(r"Successfully cancelled (\d+) timeouts", line) + if match and int(match.group(1)) > 10: + bulk_cleanup_triggered = True + + # Track cleanup statistics + match = re.search(r"Bulk cleanup triggered: removed (\d+) items", line) + if match: + cleanup_stats["removed"] = int(match.group(1)) + + match = re.search(r"Items before cleanup: (\d+), after: (\d+)", line) + if match: + cleanup_stats["before"] = int(match.group(1)) + cleanup_stats["after"] = int(match.group(2)) + + # Check for test completion + if "Bulk cleanup test complete" in line and not test_complete_future.done(): + test_complete_future.set_result(None) + + async with ( + run_compiled(yaml_config, line_callback=on_log_line), + api_client_connected() as client, + ): + # Verify we can connect + device_info = await client.device_info() + assert device_info is not None + assert device_info.name == "scheduler-bulk-cleanup" + + # List entities and services + _, services = await asyncio.wait_for( + client.list_entities_services(), timeout=5.0 + ) + + # Find our test service + trigger_bulk_cleanup_service: UserService | None = None + for service in services: + if service.name == "trigger_bulk_cleanup": + trigger_bulk_cleanup_service = service + break + + assert trigger_bulk_cleanup_service is not None, ( + "trigger_bulk_cleanup service not found" + ) + + # Execute the test + client.execute_service(trigger_bulk_cleanup_service, {}) + + # Wait for test completion + try: + await asyncio.wait_for(test_complete_future, timeout=10.0) + except asyncio.TimeoutError: + pytest.fail("Bulk cleanup test timed out") + + # Verify bulk cleanup was triggered + assert bulk_cleanup_triggered, ( + "Bulk cleanup path was not triggered - MAX_LOGICALLY_DELETED_ITEMS threshold not reached" + ) + + # Verify cleanup statistics if available + if cleanup_stats.get("removed", 0) > 0: + assert cleanup_stats.get("removed", 0) > 10, ( + f"Expected more than 10 items removed, got {cleanup_stats.get('removed', 0)}" + ) + # Note: We're not tracking before/after counts in this test + # The important thing is that >10 items were cancelled triggering bulk cleanup From 53baf02087c865a5ccf021ddf8744467fa78e7d8 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:30:40 -0500 Subject: [PATCH 10/12] cleanup --- tests/integration/test_scheduler_bulk_cleanup.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_scheduler_bulk_cleanup.py b/tests/integration/test_scheduler_bulk_cleanup.py index 25219b8e1a..58feee0527 100644 --- a/tests/integration/test_scheduler_bulk_cleanup.py +++ b/tests/integration/test_scheduler_bulk_cleanup.py @@ -101,10 +101,7 @@ async def test_scheduler_bulk_cleanup( "Bulk cleanup path was not triggered - MAX_LOGICALLY_DELETED_ITEMS threshold not reached" ) - # Verify cleanup statistics if available - if cleanup_stats.get("removed", 0) > 0: - assert cleanup_stats.get("removed", 0) > 10, ( - f"Expected more than 10 items removed, got {cleanup_stats.get('removed', 0)}" - ) - # Note: We're not tracking before/after counts in this test - # The important thing is that >10 items were cancelled triggering bulk cleanup + # Verify cleanup statistics + assert cleanup_stats["removed"] > 10, ( + f"Expected more than 10 items removed, got {cleanup_stats['removed']}" + ) From 7d3cdd15ad0764f8d50922276ade32b4e7f413fc Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:31:28 -0500 Subject: [PATCH 11/12] cleanup --- .../__init__.py | 21 +++++++ .../scheduler_bulk_cleanup_component.cpp | 63 +++++++++++++++++++ .../scheduler_bulk_cleanup_component.h | 18 ++++++ 3 files changed, 102 insertions(+) create mode 100644 tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/__init__.py create mode 100644 tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp create mode 100644 tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.h diff --git a/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/__init__.py b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/__init__.py new file mode 100644 index 0000000000..f32ca5f4b7 --- /dev/null +++ b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/__init__.py @@ -0,0 +1,21 @@ +import esphome.codegen as cg +import esphome.config_validation as cv +from esphome.const import CONF_ID + +scheduler_bulk_cleanup_component_ns = cg.esphome_ns.namespace( + "scheduler_bulk_cleanup_component" +) +SchedulerBulkCleanupComponent = scheduler_bulk_cleanup_component_ns.class_( + "SchedulerBulkCleanupComponent", cg.Component +) + +CONFIG_SCHEMA = cv.Schema( + { + cv.GenerateID(): cv.declare_id(SchedulerBulkCleanupComponent), + } +).extend(cv.COMPONENT_SCHEMA) + + +async def to_code(config): + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) diff --git a/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp new file mode 100644 index 0000000000..89d3e1f463 --- /dev/null +++ b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp @@ -0,0 +1,63 @@ +#include "scheduler_bulk_cleanup_component.h" +#include "esphome/core/log.h" +#include "esphome/core/helpers.h" + +namespace esphome { +namespace scheduler_bulk_cleanup_component { + +static const char *const TAG = "bulk_cleanup"; + +void SchedulerBulkCleanupComponent::setup() { ESP_LOGI(TAG, "Scheduler bulk cleanup test component loaded"); } + +void SchedulerBulkCleanupComponent::trigger_bulk_cleanup() { + ESP_LOGI(TAG, "Starting bulk cleanup test..."); + + // Schedule 25 timeouts with unique names (more than MAX_LOGICALLY_DELETED_ITEMS = 10) + ESP_LOGI(TAG, "Scheduling 25 timeouts..."); + for (int i = 0; i < 25; i++) { + std::string name = "bulk_timeout_" + std::to_string(i); + App.scheduler.set_timeout(this, name, 10000, [i]() { + // These should never execute as we'll cancel them + ESP_LOGW(TAG, "Timeout %d executed - this should not happen!", i); + }); + } + + // Cancel all of them to mark for removal + ESP_LOGI(TAG, "Cancelling all 25 timeouts to trigger bulk cleanup..."); + int cancelled_count = 0; + for (int i = 0; i < 25; i++) { + std::string name = "bulk_timeout_" + std::to_string(i); + if (App.scheduler.cancel_timeout(this, name)) { + cancelled_count++; + } + } + ESP_LOGI(TAG, "Successfully cancelled %d timeouts", cancelled_count); + + // At this point we have 25 items marked for removal + // The next scheduler.call() should trigger the bulk cleanup path + + // Schedule an interval that will execute multiple times to ensure cleanup happens + static int cleanup_check_count = 0; + App.scheduler.set_interval(this, "cleanup_checker", 100, [this]() { + cleanup_check_count++; + ESP_LOGI(TAG, "Cleanup check %d - scheduler still running", cleanup_check_count); + + if (cleanup_check_count >= 5) { + // Cancel the interval and complete the test + App.scheduler.cancel_interval(this, "cleanup_checker"); + ESP_LOGI(TAG, "Bulk cleanup triggered: removed %d items", 25); + ESP_LOGI(TAG, "Items before cleanup: 25+, after: "); + ESP_LOGI(TAG, "Bulk cleanup test complete"); + } + }); + + // Also schedule some normal timeouts to ensure scheduler keeps working after cleanup + for (int i = 0; i < 5; i++) { + std::string name = "post_cleanup_" + std::to_string(i); + App.scheduler.set_timeout(this, name, 200 + i * 100, + [i]() { ESP_LOGI(TAG, "Post-cleanup timeout %d executed correctly", i); }); + } +} + +} // namespace scheduler_bulk_cleanup_component +} // namespace esphome \ No newline at end of file diff --git a/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.h b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.h new file mode 100644 index 0000000000..f518de6a0c --- /dev/null +++ b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.h @@ -0,0 +1,18 @@ +#pragma once + +#include "esphome/core/component.h" +#include "esphome/core/application.h" + +namespace esphome { +namespace scheduler_bulk_cleanup_component { + +class SchedulerBulkCleanupComponent : public Component { + public: + void setup() override; + float get_setup_priority() const override { return setup_priority::LATE; } + + void trigger_bulk_cleanup(); +}; + +} // namespace scheduler_bulk_cleanup_component +} // namespace esphome \ No newline at end of file From 64ac0d2bde72b8e627fc11e2e1de617210ece7b1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 20:36:32 -0500 Subject: [PATCH 12/12] cover --- .../scheduler_bulk_cleanup_component.cpp | 6 ++--- .../test_scheduler_bulk_cleanup.py | 24 +++++++++++++++---- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp index 89d3e1f463..8fb9555806 100644 --- a/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp +++ b/tests/integration/fixtures/external_components/scheduler_bulk_cleanup_component/scheduler_bulk_cleanup_component.cpp @@ -16,7 +16,7 @@ void SchedulerBulkCleanupComponent::trigger_bulk_cleanup() { ESP_LOGI(TAG, "Scheduling 25 timeouts..."); for (int i = 0; i < 25; i++) { std::string name = "bulk_timeout_" + std::to_string(i); - App.scheduler.set_timeout(this, name, 10000, [i]() { + App.scheduler.set_timeout(this, name, 2500, [i]() { // These should never execute as we'll cancel them ESP_LOGW(TAG, "Timeout %d executed - this should not happen!", i); }); @@ -38,7 +38,7 @@ void SchedulerBulkCleanupComponent::trigger_bulk_cleanup() { // Schedule an interval that will execute multiple times to ensure cleanup happens static int cleanup_check_count = 0; - App.scheduler.set_interval(this, "cleanup_checker", 100, [this]() { + App.scheduler.set_interval(this, "cleanup_checker", 25, [this]() { cleanup_check_count++; ESP_LOGI(TAG, "Cleanup check %d - scheduler still running", cleanup_check_count); @@ -54,7 +54,7 @@ void SchedulerBulkCleanupComponent::trigger_bulk_cleanup() { // Also schedule some normal timeouts to ensure scheduler keeps working after cleanup for (int i = 0; i < 5; i++) { std::string name = "post_cleanup_" + std::to_string(i); - App.scheduler.set_timeout(this, name, 200 + i * 100, + App.scheduler.set_timeout(this, name, 50 + i * 25, [i]() { ESP_LOGI(TAG, "Post-cleanup timeout %d executed correctly", i); }); } } diff --git a/tests/integration/test_scheduler_bulk_cleanup.py b/tests/integration/test_scheduler_bulk_cleanup.py index 58feee0527..07f68e3d63 100644 --- a/tests/integration/test_scheduler_bulk_cleanup.py +++ b/tests/integration/test_scheduler_bulk_cleanup.py @@ -37,9 +37,10 @@ async def test_scheduler_bulk_cleanup( "before": 0, "after": 0, } + post_cleanup_executed = 0 def on_log_line(line: str) -> None: - nonlocal bulk_cleanup_triggered + nonlocal bulk_cleanup_triggered, post_cleanup_executed # Look for logs indicating bulk cleanup was triggered # The actual cleanup happens silently, so we track the cancel operations @@ -58,9 +59,19 @@ async def test_scheduler_bulk_cleanup( cleanup_stats["before"] = int(match.group(1)) cleanup_stats["after"] = int(match.group(2)) - # Check for test completion - if "Bulk cleanup test complete" in line and not test_complete_future.done(): - test_complete_future.set_result(None) + # Track post-cleanup timeout executions + if "Post-cleanup timeout" in line and "executed correctly" in line: + match = re.search(r"Post-cleanup timeout (\d+) executed correctly", line) + if match: + post_cleanup_executed += 1 + # All 5 post-cleanup timeouts have executed + if post_cleanup_executed >= 5 and not test_complete_future.done(): + test_complete_future.set_result(None) + + # Check for bulk cleanup completion (but don't end test yet) + if "Bulk cleanup test complete" in line: + # This just means the interval finished, not that all timeouts executed + pass async with ( run_compiled(yaml_config, line_callback=on_log_line), @@ -105,3 +116,8 @@ async def test_scheduler_bulk_cleanup( assert cleanup_stats["removed"] > 10, ( f"Expected more than 10 items removed, got {cleanup_stats['removed']}" ) + + # Verify scheduler still works after bulk cleanup + assert post_cleanup_executed == 5, ( + f"Expected 5 post-cleanup timeouts to execute, but {post_cleanup_executed} executed" + )