diff --git a/esphome/components/esp32/__init__.py b/esphome/components/esp32/__init__.py index c772a3438c..6ddb579733 100644 --- a/esphome/components/esp32/__init__.py +++ b/esphome/components/esp32/__init__.py @@ -31,6 +31,7 @@ from esphome.const import ( KEY_TARGET_FRAMEWORK, KEY_TARGET_PLATFORM, PLATFORM_ESP32, + CoreModel, __version__, ) from esphome.core import CORE, HexInt, TimePeriod @@ -713,6 +714,7 @@ async def to_code(config): cg.add_define("ESPHOME_BOARD", config[CONF_BOARD]) cg.add_build_flag(f"-DUSE_ESP32_VARIANT_{config[CONF_VARIANT]}") cg.add_define("ESPHOME_VARIANT", VARIANT_FRIENDLY[config[CONF_VARIANT]]) + cg.add_define(CoreModel.MULTI_ATOMICS) cg.add_platformio_option("lib_ldf_mode", "off") cg.add_platformio_option("lib_compat_mode", "strict") diff --git a/esphome/components/esp8266/__init__.py b/esphome/components/esp8266/__init__.py index 01b20bdcb1..d08d7121b7 100644 --- a/esphome/components/esp8266/__init__.py +++ b/esphome/components/esp8266/__init__.py @@ -15,6 +15,7 @@ from esphome.const import ( KEY_TARGET_FRAMEWORK, KEY_TARGET_PLATFORM, PLATFORM_ESP8266, + CoreModel, ) from esphome.core import CORE, coroutine_with_priority from esphome.helpers import copy_file_if_changed @@ -187,6 +188,7 @@ async def to_code(config): cg.set_cpp_standard("gnu++20") cg.add_define("ESPHOME_BOARD", config[CONF_BOARD]) cg.add_define("ESPHOME_VARIANT", "ESP8266") + cg.add_define(CoreModel.SINGLE) cg.add_platformio_option("extra_scripts", ["post:post_build.py"]) diff --git a/esphome/components/host/__init__.py b/esphome/components/host/__init__.py index a67d73fbb7..2d77f2f7ab 100644 --- a/esphome/components/host/__init__.py +++ b/esphome/components/host/__init__.py @@ -7,6 +7,7 @@ from esphome.const import ( KEY_TARGET_FRAMEWORK, KEY_TARGET_PLATFORM, PLATFORM_HOST, + CoreModel, ) from esphome.core import CORE @@ -43,6 +44,7 @@ async def to_code(config): cg.add_define("USE_ESPHOME_HOST_MAC_ADDRESS", config[CONF_MAC_ADDRESS].parts) cg.add_build_flag("-std=gnu++20") cg.add_define("ESPHOME_BOARD", "host") + cg.add_define(CoreModel.MULTI_ATOMICS) cg.add_platformio_option("platform", "platformio/native") cg.add_platformio_option("lib_ldf_mode", "off") cg.add_platformio_option("lib_compat_mode", "strict") diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py index 17d5d46ffd..7f2a0bc0a5 100644 --- a/esphome/components/libretiny/__init__.py +++ b/esphome/components/libretiny/__init__.py @@ -20,6 +20,7 @@ from esphome.const import ( KEY_FRAMEWORK_VERSION, KEY_TARGET_FRAMEWORK, KEY_TARGET_PLATFORM, + CoreModel, __version__, ) from esphome.core import CORE @@ -260,6 +261,7 @@ async def component_to_code(config): cg.add_build_flag(f"-DUSE_LIBRETINY_VARIANT_{config[CONF_FAMILY]}") cg.add_define("ESPHOME_BOARD", config[CONF_BOARD]) cg.add_define("ESPHOME_VARIANT", FAMILY_FRIENDLY[config[CONF_FAMILY]]) + cg.add_define(CoreModel.MULTI_NO_ATOMICS) # force using arduino framework cg.add_platformio_option("framework", "arduino") diff --git a/esphome/components/rp2040/__init__.py b/esphome/components/rp2040/__init__.py index 0fa299ce5c..28c3bbd70c 100644 --- a/esphome/components/rp2040/__init__.py +++ b/esphome/components/rp2040/__init__.py @@ -16,6 +16,7 @@ from esphome.const import ( KEY_TARGET_FRAMEWORK, KEY_TARGET_PLATFORM, PLATFORM_RP2040, + CoreModel, ) from esphome.core import CORE, EsphomeError, coroutine_with_priority from esphome.helpers import copy_file_if_changed, mkdir_p, read_file, write_file @@ -171,6 +172,7 @@ async def to_code(config): cg.set_cpp_standard("gnu++20") cg.add_define("ESPHOME_BOARD", config[CONF_BOARD]) cg.add_define("ESPHOME_VARIANT", "RP2040") + cg.add_define(CoreModel.SINGLE) cg.add_platformio_option("extra_scripts", ["post:post_build.py"]) diff --git a/esphome/const.py b/esphome/const.py index 7da19a8c1b..627b6bac18 100644 --- a/esphome/const.py +++ b/esphome/const.py @@ -35,6 +35,14 @@ class Framework(StrEnum): ZEPHYR = "zephyr" +class CoreModel(StrEnum): + """Core model identifiers for ESPHome scheduler.""" + + SINGLE = "ESPHOME_CORES_SINGLE" + MULTI_NO_ATOMICS = "ESPHOME_CORES_MULTI_NO_ATOMICS" + MULTI_ATOMICS = "ESPHOME_CORES_MULTI_ATOMICS" + + class PlatformFramework(Enum): """Combined platform-framework identifiers with tuple values.""" diff --git a/esphome/core/defines.h b/esphome/core/defines.h index 7ddb3436cd..19d380bd29 100644 --- a/esphome/core/defines.h +++ b/esphome/core/defines.h @@ -15,6 +15,9 @@ #define ESPHOME_VARIANT "ESP32" #define ESPHOME_DEBUG_SCHEDULER +// Default threading model for static analysis (ESP32 is multi-core with atomics) +#define ESPHOME_CORES_MULTI_ATOMICS + // logger #define ESPHOME_LOG_LEVEL ESPHOME_LOG_LEVEL_VERY_VERBOSE diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index 7a0c08e1f0..d6d99f82c8 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -54,7 +54,7 @@ static void validate_static_string(const char *name) { ESP_LOGW(TAG, "WARNING: Scheduler name '%s' at %p might be on heap (static ref at %p)", name, name, static_str); } } -#endif +#endif /* ESPHOME_DEBUG_SCHEDULER */ // A note on locking: the `lock_` lock protects the `items_` and `to_add_` containers. It must be taken when writing to // them (i.e. when adding/removing items, but not when changing items). As items are only deleted from the loop task, @@ -82,9 +82,9 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type item->callback = std::move(func); item->remove = false; -#if !defined(USE_ESP8266) && !defined(USE_RP2040) +#ifndef ESPHOME_CORES_SINGLE // Special handling for defer() (delay = 0, type = TIMEOUT) - // ESP8266 and RP2040 are excluded because they don't need thread-safe defer handling + // Single-core platforms don't need thread-safe defer handling if (delay == 0 && type == SchedulerItem::TIMEOUT) { // Put in defer queue for guaranteed FIFO execution LockGuard guard{this->lock_}; @@ -92,7 +92,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type this->defer_queue_.push_back(std::move(item)); return; } -#endif +#endif /* not ESPHOME_CORES_SINGLE */ // Get fresh timestamp for new timer/interval - ensures accurate scheduling const auto now = this->millis_64_(millis()); // Fresh millis() call @@ -123,7 +123,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type ESP_LOGD(TAG, "set_%s(name='%s/%s', %s=%" PRIu32 ", offset=%" PRIu32 ")", type_str, item->get_source(), name_cstr ? name_cstr : "(null)", type_str, delay, static_cast(item->next_execution_ - now)); } -#endif +#endif /* ESPHOME_DEBUG_SCHEDULER */ LockGuard guard{this->lock_}; // If name is provided, do atomic cancel-and-add @@ -231,7 +231,7 @@ optional HOT Scheduler::next_schedule_in(uint32_t now) { return item->next_execution_ - now_64; } void HOT Scheduler::call(uint32_t now) { -#if !defined(USE_ESP8266) && !defined(USE_RP2040) +#ifndef ESPHOME_CORES_SINGLE // Process defer queue first to guarantee FIFO execution order for deferred items. // Previously, defer() used the heap which gave undefined order for equal timestamps, // causing race conditions on multi-core systems (ESP32, BK7200). @@ -239,8 +239,7 @@ void HOT Scheduler::call(uint32_t now) { // - Deferred items (delay=0) go directly to defer_queue_ in set_timer_common_ // - Items execute in exact order they were deferred (FIFO guarantee) // - No deferred items exist in to_add_, so processing order doesn't affect correctness - // ESP8266 and RP2040 don't use this queue - they fall back to the heap-based approach - // (ESP8266: single-core, RP2040: empty mutex implementation). + // Single-core platforms don't use this queue and fall back to the heap-based approach. // // Note: Items cancelled via cancel_item_locked_() are marked with remove=true but still // processed here. They are removed from the queue normally via pop_front() but skipped @@ -262,7 +261,7 @@ void HOT Scheduler::call(uint32_t now) { this->execute_item_(item.get(), now); } } -#endif +#endif /* not ESPHOME_CORES_SINGLE */ // Convert the fresh timestamp from main loop to 64-bit for scheduler operations const auto now_64 = this->millis_64_(now); // 'now' from parameter - fresh from Application::loop() @@ -274,13 +273,15 @@ void HOT Scheduler::call(uint32_t now) { if (now_64 - last_print > 2000) { last_print = now_64; std::vector> old_items; -#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY) - ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%u, %" PRIu32 ")", this->items_.size(), now_64, - this->millis_major_, this->last_millis_.load(std::memory_order_relaxed)); -#else - ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%u, %" PRIu32 ")", this->items_.size(), now_64, +#ifdef ESPHOME_CORES_MULTI_ATOMICS + const auto last_dbg = this->last_millis_.load(std::memory_order_relaxed); + const auto major_dbg = this->millis_major_.load(std::memory_order_relaxed); + ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%" PRIu16 ", %" PRIu32 ")", this->items_.size(), now_64, + major_dbg, last_dbg); +#else /* not ESPHOME_CORES_MULTI_ATOMICS */ + ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%" PRIu16 ", %" PRIu32 ")", this->items_.size(), now_64, this->millis_major_, this->last_millis_); -#endif +#endif /* else ESPHOME_CORES_MULTI_ATOMICS */ while (!this->empty_()) { std::unique_ptr item; { @@ -305,7 +306,7 @@ void HOT Scheduler::call(uint32_t now) { std::make_heap(this->items_.begin(), this->items_.end(), SchedulerItem::cmp); } } -#endif // ESPHOME_DEBUG_SCHEDULER +#endif /* ESPHOME_DEBUG_SCHEDULER */ // If we have too many items to remove if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) { @@ -352,7 +353,7 @@ void HOT Scheduler::call(uint32_t now) { ESP_LOGV(TAG, "Running %s '%s/%s' with interval=%" PRIu32 " next_execution=%" PRIu64 " (now=%" PRIu64 ")", item->get_type_str(), item->get_source(), item_name ? item_name : "(null)", item->interval, item->next_execution_, now_64); -#endif +#endif /* ESPHOME_DEBUG_SCHEDULER */ // Warning: During callback(), a lot of stuff can happen, including: // - timeouts/intervals get added, potentially invalidating vector pointers @@ -460,7 +461,7 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c size_t total_cancelled = 0; // Check all containers for matching items -#if !defined(USE_ESP8266) && !defined(USE_RP2040) +#ifndef ESPHOME_CORES_SINGLE // Only check defer queue for timeouts (intervals never go there) if (type == SchedulerItem::TIMEOUT) { for (auto &item : this->defer_queue_) { @@ -470,7 +471,7 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c } } } -#endif +#endif /* not ESPHOME_CORES_SINGLE */ // Cancel items in the main heap for (auto &item : this->items_) { @@ -495,24 +496,53 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c uint64_t Scheduler::millis_64_(uint32_t now) { // THREAD SAFETY NOTE: - // This function can be called from multiple threads simultaneously on ESP32/LibreTiny. - // On single-threaded platforms (ESP8266, RP2040), atomics are not needed. + // This function has three implementations, based on the precompiler flags + // - ESPHOME_CORES_SINGLE - Runs on single-core platforms (ESP8266, RP2040, etc.) + // - ESPHOME_CORES_MULTI_NO_ATOMICS - Runs on multi-core platforms without atomics (LibreTiny) + // - ESPHOME_CORES_MULTI_ATOMICS - Runs on multi-core platforms with atomics (ESP32, HOST, etc.) + // + // Make sure all changes are synchronized if you edit this function. // // IMPORTANT: Always pass fresh millis() values to this function. The implementation // handles out-of-order timestamps between threads, but minimizing time differences // helps maintain accuracy. // - // The implementation handles the 32-bit rollover (every 49.7 days) by: - // 1. Using a lock when detecting rollover to ensure atomic update - // 2. Restricting normal updates to forward movement within the same epoch - // This prevents race conditions at the rollover boundary without requiring - // 64-bit atomics or locking on every call. -#ifdef USE_LIBRETINY - // LibreTiny: Multi-threaded but lacks atomic operation support - // TODO: If LibreTiny ever adds atomic support, remove this entire block and - // let it fall through to the atomic-based implementation below - // We need to use a lock when near the rollover boundary to prevent races +#ifdef ESPHOME_CORES_SINGLE + // This is the single core implementation. + // + // Single-core platforms have no concurrency, so this is a simple implementation + // that just tracks 32-bit rollover (every 49.7 days) without any locking or atomics. + + uint16_t major = this->millis_major_; + uint32_t last = this->last_millis_; + + // Check for rollover + if (now < last && (last - now) > HALF_MAX_UINT32) { + this->millis_major_++; + major++; +#ifdef ESPHOME_DEBUG_SCHEDULER + ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last); +#endif /* ESPHOME_DEBUG_SCHEDULER */ + } + + // Only update if time moved forward + if (now > last) { + this->last_millis_ = now; + } + + // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time + return now + (static_cast(major) << 32); + +#elif defined(ESPHOME_CORES_MULTI_NO_ATOMICS) + // This is the multi core no atomics implementation. + // + // Without atomics, this implementation uses locks more aggressively: + // 1. Always locks when near the rollover boundary (within 10 seconds) + // 2. Always locks when detecting a large backwards jump + // 3. Updates without lock in normal forward progression (accepting minor races) + // This is less efficient but necessary without atomic operations. + uint16_t major = this->millis_major_; uint32_t last = this->last_millis_; // Define a safe window around the rollover point (10 seconds) @@ -531,9 +561,10 @@ uint64_t Scheduler::millis_64_(uint32_t now) { if (now < last && (last - now) > HALF_MAX_UINT32) { // True rollover detected (happens every ~49.7 days) this->millis_major_++; + major++; #ifdef ESPHOME_DEBUG_SCHEDULER ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last); -#endif +#endif /* ESPHOME_DEBUG_SCHEDULER */ } // Update last_millis_ while holding lock this->last_millis_ = now; @@ -549,58 +580,76 @@ uint64_t Scheduler::millis_64_(uint32_t now) { // If now <= last and we're not near rollover, don't update // This minimizes backwards time movement -#elif !defined(USE_ESP8266) && !defined(USE_RP2040) - // Multi-threaded platforms with atomic support (ESP32) - uint32_t last = this->last_millis_.load(std::memory_order_relaxed); + // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time + return now + (static_cast(major) << 32); - // If we might be near a rollover (large backwards jump), take the lock for the entire operation - // This ensures rollover detection and last_millis_ update are atomic together - if (now < last && (last - now) > HALF_MAX_UINT32) { - // Potential rollover - need lock for atomic rollover detection + update - LockGuard guard{this->lock_}; - // Re-read with lock held - last = this->last_millis_.load(std::memory_order_relaxed); +#elif defined(ESPHOME_CORES_MULTI_ATOMICS) + // This is the multi core with atomics implementation. + // + // Uses atomic operations with acquire/release semantics to ensure coherent + // reads of millis_major_ and last_millis_ across cores. Features: + // 1. Epoch-coherency retry loop to handle concurrent updates + // 2. Lock only taken for actual rollover detection and update + // 3. Lock-free CAS updates for normal forward time progression + // 4. Memory ordering ensures cores see consistent time values + for (;;) { + uint16_t major = this->millis_major_.load(std::memory_order_acquire); + + /* + * Acquire so that if we later decide **not** to take the lock we still + * observe a `millis_major_` value coherent with the loaded `last_millis_`. + * The acquire load ensures any later read of `millis_major_` sees its + * corresponding increment. + */ + uint32_t last = this->last_millis_.load(std::memory_order_acquire); + + // If we might be near a rollover (large backwards jump), take the lock for the entire operation + // This ensures rollover detection and last_millis_ update are atomic together if (now < last && (last - now) > HALF_MAX_UINT32) { - // True rollover detected (happens every ~49.7 days) - this->millis_major_++; + // Potential rollover - need lock for atomic rollover detection + update + LockGuard guard{this->lock_}; + // Re-read with lock held; mutex already provides ordering + last = this->last_millis_.load(std::memory_order_relaxed); + + if (now < last && (last - now) > HALF_MAX_UINT32) { + // True rollover detected (happens every ~49.7 days) + this->millis_major_.fetch_add(1, std::memory_order_relaxed); + major++; #ifdef ESPHOME_DEBUG_SCHEDULER - ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last); -#endif - } - // Update last_millis_ while holding lock to prevent races - this->last_millis_.store(now, std::memory_order_relaxed); - } else { - // Normal case: Try lock-free update, but only allow forward movement within same epoch - // This prevents accidentally moving backwards across a rollover boundary - while (now > last && (now - last) < HALF_MAX_UINT32) { - if (this->last_millis_.compare_exchange_weak(last, now, std::memory_order_relaxed)) { - break; + ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last); +#endif /* ESPHOME_DEBUG_SCHEDULER */ + } + /* + * Update last_millis_ while holding the lock to prevent races + * Publish the new low-word *after* bumping `millis_major_` (done above) + * so readers never see a mismatched pair. + */ + this->last_millis_.store(now, std::memory_order_release); + } else { + // Normal case: Try lock-free update, but only allow forward movement within same epoch + // This prevents accidentally moving backwards across a rollover boundary + while (now > last && (now - last) < HALF_MAX_UINT32) { + if (this->last_millis_.compare_exchange_weak(last, now, + std::memory_order_release, // success + std::memory_order_relaxed)) { // failure + break; + } + // CAS failure means no data was published; relaxed is fine + // last is automatically updated by compare_exchange_weak if it fails } - // last is automatically updated by compare_exchange_weak if it fails } + uint16_t major_end = this->millis_major_.load(std::memory_order_relaxed); + if (major_end == major) + return now + (static_cast(major) << 32); } + // Unreachable - the loop always returns when major_end == major + __builtin_unreachable(); #else - // Single-threaded platforms (ESP8266, RP2040): No atomics needed - uint32_t last = this->last_millis_; - - // Check for rollover - if (now < last && (last - now) > HALF_MAX_UINT32) { - this->millis_major_++; -#ifdef ESPHOME_DEBUG_SCHEDULER - ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last); +#error \ + "No platform threading model defined. One of ESPHOME_CORES_SINGLE, ESPHOME_CORES_MULTI_NO_ATOMICS, or ESPHOME_CORES_MULTI_ATOMICS must be defined." #endif - } - - // Only update if time moved forward - if (now > last) { - this->last_millis_ = now; - } -#endif - - // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time - return now + (static_cast(this->millis_major_) << 32); } bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr &a, diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 64df2f2bb0..b539b26949 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -1,10 +1,11 @@ #pragma once +#include "esphome/core/defines.h" #include #include #include #include -#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY) +#ifdef ESPHOME_CORES_MULTI_ATOMICS #include #endif @@ -204,23 +205,40 @@ class Scheduler { Mutex lock_; std::vector> items_; std::vector> to_add_; -#if !defined(USE_ESP8266) && !defined(USE_RP2040) - // ESP8266 and RP2040 don't need the defer queue because: - // ESP8266: Single-core with no preemptive multitasking - // RP2040: Currently has empty mutex implementation in ESPHome - // Both platforms save 40 bytes of RAM by excluding this +#ifndef ESPHOME_CORES_SINGLE + // Single-core platforms don't need the defer queue and save 40 bytes of RAM std::deque> defer_queue_; // FIFO queue for defer() calls -#endif -#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY) - // Multi-threaded platforms with atomic support: last_millis_ needs atomic for lock-free updates +#endif /* ESPHOME_CORES_SINGLE */ + uint32_t to_remove_{0}; + +#ifdef ESPHOME_CORES_MULTI_ATOMICS + /* + * Multi-threaded platforms with atomic support: last_millis_ needs atomic for lock-free updates + * + * MEMORY-ORDERING NOTE + * -------------------- + * `last_millis_` and `millis_major_` form a single 64-bit timestamp split in half. + * Writers publish `last_millis_` with memory_order_release and readers use + * memory_order_acquire. This ensures that once a reader sees the new low word, + * it also observes the corresponding increment of `millis_major_`. + */ std::atomic last_millis_{0}; -#else +#else /* not ESPHOME_CORES_MULTI_ATOMICS */ // Platforms without atomic support or single-threaded platforms uint32_t last_millis_{0}; -#endif - // millis_major_ is protected by lock when incrementing +#endif /* else ESPHOME_CORES_MULTI_ATOMICS */ + + /* + * Upper 16 bits of the 64-bit millis counter. Incremented only while holding + * `lock_`; read concurrently. Atomic (relaxed) avoids a formal data race. + * Ordering relative to `last_millis_` is provided by its release store and the + * corresponding acquire loads. + */ +#ifdef ESPHOME_CORES_MULTI_ATOMICS + std::atomic millis_major_{0}; +#else /* not ESPHOME_CORES_MULTI_ATOMICS */ uint16_t millis_major_{0}; - uint32_t to_remove_{0}; +#endif /* else ESPHOME_CORES_MULTI_ATOMICS */ }; } // namespace esphome