From b00adbddceca26ddd1c68bec621ba37b02faa231 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 6 Jul 2025 10:40:44 -0500 Subject: [PATCH] fix race --- esphome/core/scheduler.cpp | 82 +++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 525525dbc3..2d54077dc3 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -58,6 +58,31 @@ static void validate_static_string(const char *name) { // iterating over them from the loop task is fine; but iterating from any other context requires the lock to be held to // avoid the main thread modifying the list while it is being accessed. +// Helper to cancel items by name - must be called with lock held +bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name, SchedulerItem::Type type) { + bool ret = false; + + for (auto &it : this->items_) { + const char *item_name = it->get_name(); + if (it->component == component && item_name != nullptr && strcmp(name, item_name) == 0 && it->type == type && + !it->remove) { + this->to_remove_++; + it->remove = true; + ret = true; + } + } + for (auto &it : this->to_add_) { + const char *item_name = it->get_name(); + if (it->component == component && item_name != nullptr && strcmp(name, item_name) == 0 && it->type == type && + !it->remove) { + it->remove = true; + ret = true; + } + } + + return ret; +} + // Common implementation for both timeout and interval void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type type, bool is_static_string, const void *name_ptr, uint32_t delay, std::function func) { @@ -66,7 +91,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type is_static_string ? static_cast(name_ptr) : static_cast(name_ptr)->c_str(); if (delay == SCHEDULER_DONT_RUN) { - // Cancel existing timer if name is not empty + // Still need to cancel existing timer if name is not empty if (name_cstr != nullptr && name_cstr[0] != '\0') { this->cancel_item_(component, name_cstr, type); } @@ -111,7 +136,16 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type } #endif - this->push_(std::move(item)); + { + LockGuard guard{this->lock_}; + // If name is provided, do atomic cancel-and-add + if (name_cstr != nullptr && name_cstr[0] != '\0') { + // Cancel existing items + this->cancel_item_locked_(component, name_cstr, type); + } + // Add new item directly to to_add_ (not using push_ to avoid double-locking) + this->to_add_.push_back(std::move(item)); + } } void HOT Scheduler::set_timeout(Component *component, const char *name, uint32_t timeout, std::function func) { @@ -242,10 +276,10 @@ void HOT Scheduler::call() { } #endif // ESPHOME_DEBUG_SCHEDULER - auto to_remove_was = to_remove_; + auto to_remove_was = this->to_remove_; auto items_was = this->items_.size(); // If we have too many items to remove - if (to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) { + if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) { std::vector> valid_items; while (!this->empty_()) { LockGuard guard{this->lock_}; @@ -260,10 +294,10 @@ void HOT Scheduler::call() { } // The following should not happen unless I'm missing something - if (to_remove_ != 0) { + if (this->to_remove_ != 0) { ESP_LOGW(TAG, "to_remove_ was %" PRIu32 " now: %" PRIu32 " items where %zu now %zu. Please report this", to_remove_was, to_remove_, items_was, items_.size()); - to_remove_ = 0; + this->to_remove_ = 0; } } @@ -304,26 +338,23 @@ void HOT Scheduler::call() { } { - this->lock_.lock(); + LockGuard guard{this->lock_}; // new scope, item from before might have been moved in the vector auto item = std::move(this->items_[0]); - // Only pop after function call, this ensures we were reachable // during the function call and know if we were cancelled. this->pop_raw_(); - this->lock_.unlock(); - if (item->remove) { // We were removed/cancelled in the function call, stop - to_remove_--; + this->to_remove_--; continue; } if (item->type == SchedulerItem::INTERVAL) { item->next_execution_ = now + item->interval; - this->push_(std::move(item)); + this->to_add_.push_back(std::move(item)); } } } @@ -348,7 +379,7 @@ void HOT Scheduler::cleanup_() { if (!item->remove) return; - to_remove_--; + this->to_remove_--; { LockGuard guard{this->lock_}; @@ -360,10 +391,6 @@ void HOT Scheduler::pop_raw_() { std::pop_heap(this->items_.begin(), this->items_.end(), SchedulerItem::cmp); this->items_.pop_back(); } -void HOT Scheduler::push_(std::unique_ptr item) { - LockGuard guard{this->lock_}; - this->to_add_.push_back(std::move(item)); -} // Common implementation for cancel operations bool HOT Scheduler::cancel_item_common_(Component *component, bool is_static_string, const void *name_ptr, SchedulerItem::Type type) { @@ -377,26 +404,7 @@ bool HOT Scheduler::cancel_item_common_(Component *component, bool is_static_str // obtain lock because this function iterates and can be called from non-loop task context LockGuard guard{this->lock_}; - bool ret = false; - - for (auto &it : this->items_) { - const char *item_name = it->get_name(); - if (it->component == component && item_name != nullptr && strcmp(name_cstr, item_name) == 0 && it->type == type && - !it->remove) { - to_remove_++; - it->remove = true; - ret = true; - } - } - for (auto &it : this->to_add_) { - const char *item_name = it->get_name(); - if (it->component == component && item_name != nullptr && strcmp(name_cstr, item_name) == 0 && it->type == type) { - it->remove = true; - ret = true; - } - } - - return ret; + return this->cancel_item_locked_(component, name_cstr, type); } bool HOT Scheduler::cancel_item_(Component *component, const std::string &name, Scheduler::SchedulerItem::Type type) {