diff --git a/esphome/components/esp32/__init__.py b/esphome/components/esp32/__init__.py
index c772a3438c..6ddb579733 100644
--- a/esphome/components/esp32/__init__.py
+++ b/esphome/components/esp32/__init__.py
@@ -31,6 +31,7 @@ from esphome.const import (
     KEY_TARGET_FRAMEWORK,
     KEY_TARGET_PLATFORM,
     PLATFORM_ESP32,
+    CoreModel,
     __version__,
 )
 from esphome.core import CORE, HexInt, TimePeriod
@@ -713,6 +714,7 @@ async def to_code(config):
     cg.add_define("ESPHOME_BOARD", config[CONF_BOARD])
     cg.add_build_flag(f"-DUSE_ESP32_VARIANT_{config[CONF_VARIANT]}")
     cg.add_define("ESPHOME_VARIANT", VARIANT_FRIENDLY[config[CONF_VARIANT]])
+    cg.add_define(CoreModel.MULTI_ATOMICS)
 
     cg.add_platformio_option("lib_ldf_mode", "off")
     cg.add_platformio_option("lib_compat_mode", "strict")
diff --git a/esphome/components/esp8266/__init__.py b/esphome/components/esp8266/__init__.py
index 01b20bdcb1..d08d7121b7 100644
--- a/esphome/components/esp8266/__init__.py
+++ b/esphome/components/esp8266/__init__.py
@@ -15,6 +15,7 @@ from esphome.const import (
     KEY_TARGET_FRAMEWORK,
     KEY_TARGET_PLATFORM,
     PLATFORM_ESP8266,
+    CoreModel,
 )
 from esphome.core import CORE, coroutine_with_priority
 from esphome.helpers import copy_file_if_changed
@@ -187,6 +188,7 @@ async def to_code(config):
     cg.set_cpp_standard("gnu++20")
     cg.add_define("ESPHOME_BOARD", config[CONF_BOARD])
     cg.add_define("ESPHOME_VARIANT", "ESP8266")
+    cg.add_define(CoreModel.SINGLE)
 
     cg.add_platformio_option("extra_scripts", ["post:post_build.py"])
 
diff --git a/esphome/components/host/__init__.py b/esphome/components/host/__init__.py
index a67d73fbb7..2d77f2f7ab 100644
--- a/esphome/components/host/__init__.py
+++ b/esphome/components/host/__init__.py
@@ -7,6 +7,7 @@ from esphome.const import (
     KEY_TARGET_FRAMEWORK,
     KEY_TARGET_PLATFORM,
     PLATFORM_HOST,
+    CoreModel,
 )
 from esphome.core import CORE
 
@@ -43,6 +44,7 @@ async def to_code(config):
     cg.add_define("USE_ESPHOME_HOST_MAC_ADDRESS", config[CONF_MAC_ADDRESS].parts)
     cg.add_build_flag("-std=gnu++20")
     cg.add_define("ESPHOME_BOARD", "host")
+    cg.add_define(CoreModel.MULTI_ATOMICS)
     cg.add_platformio_option("platform", "platformio/native")
     cg.add_platformio_option("lib_ldf_mode", "off")
     cg.add_platformio_option("lib_compat_mode", "strict")
diff --git a/esphome/components/libretiny/__init__.py b/esphome/components/libretiny/__init__.py
index 17d5d46ffd..7f2a0bc0a5 100644
--- a/esphome/components/libretiny/__init__.py
+++ b/esphome/components/libretiny/__init__.py
@@ -20,6 +20,7 @@ from esphome.const import (
     KEY_FRAMEWORK_VERSION,
     KEY_TARGET_FRAMEWORK,
     KEY_TARGET_PLATFORM,
+    CoreModel,
     __version__,
 )
 from esphome.core import CORE
@@ -260,6 +261,7 @@ async def component_to_code(config):
     cg.add_build_flag(f"-DUSE_LIBRETINY_VARIANT_{config[CONF_FAMILY]}")
     cg.add_define("ESPHOME_BOARD", config[CONF_BOARD])
     cg.add_define("ESPHOME_VARIANT", FAMILY_FRIENDLY[config[CONF_FAMILY]])
+    cg.add_define(CoreModel.MULTI_NO_ATOMICS)
 
     # force using arduino framework
     cg.add_platformio_option("framework", "arduino")
diff --git a/esphome/components/rp2040/__init__.py b/esphome/components/rp2040/__init__.py
index 0fa299ce5c..28c3bbd70c 100644
--- a/esphome/components/rp2040/__init__.py
+++ b/esphome/components/rp2040/__init__.py
@@ -16,6 +16,7 @@ from esphome.const import (
     KEY_TARGET_FRAMEWORK,
     KEY_TARGET_PLATFORM,
     PLATFORM_RP2040,
+    CoreModel,
 )
 from esphome.core import CORE, EsphomeError, coroutine_with_priority
 from esphome.helpers import copy_file_if_changed, mkdir_p, read_file, write_file
@@ -171,6 +172,7 @@ async def to_code(config):
     cg.set_cpp_standard("gnu++20")
     cg.add_define("ESPHOME_BOARD", config[CONF_BOARD])
     cg.add_define("ESPHOME_VARIANT", "RP2040")
+    cg.add_define(CoreModel.SINGLE)
 
     cg.add_platformio_option("extra_scripts", ["post:post_build.py"])
 
diff --git a/esphome/const.py b/esphome/const.py
index 7da19a8c1b..627b6bac18 100644
--- a/esphome/const.py
+++ b/esphome/const.py
@@ -35,6 +35,14 @@ class Framework(StrEnum):
     ZEPHYR = "zephyr"
 
 
+class CoreModel(StrEnum):
+    """Core model identifiers for ESPHome scheduler."""
+
+    SINGLE = "ESPHOME_CORES_SINGLE"
+    MULTI_NO_ATOMICS = "ESPHOME_CORES_MULTI_NO_ATOMICS"
+    MULTI_ATOMICS = "ESPHOME_CORES_MULTI_ATOMICS"
+
+
 class PlatformFramework(Enum):
     """Combined platform-framework identifiers with tuple values."""
 
diff --git a/esphome/core/defines.h b/esphome/core/defines.h
index 7ddb3436cd..19d380bd29 100644
--- a/esphome/core/defines.h
+++ b/esphome/core/defines.h
@@ -15,6 +15,9 @@
 #define ESPHOME_VARIANT "ESP32"
 #define ESPHOME_DEBUG_SCHEDULER
 
+// Default threading model for static analysis (ESP32 is multi-core with atomics)
+#define ESPHOME_CORES_MULTI_ATOMICS
+
 // logger
 #define ESPHOME_LOG_LEVEL ESPHOME_LOG_LEVEL_VERY_VERBOSE
 
diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp
index 7a0c08e1f0..d6d99f82c8 100644
--- a/esphome/core/scheduler.cpp
+++ b/esphome/core/scheduler.cpp
@@ -54,7 +54,7 @@ static void validate_static_string(const char *name) {
     ESP_LOGW(TAG, "WARNING: Scheduler name '%s' at %p might be on heap (static ref at %p)", name, name, static_str);
   }
 }
-#endif
+#endif /* ESPHOME_DEBUG_SCHEDULER */
 
 // A note on locking: the `lock_` lock protects the `items_` and `to_add_` containers. It must be taken when writing to
 // them (i.e. when adding/removing items, but not when changing items). As items are only deleted from the loop task,
@@ -82,9 +82,9 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
   item->callback = std::move(func);
   item->remove = false;
 
-#if !defined(USE_ESP8266) && !defined(USE_RP2040)
+#ifndef ESPHOME_CORES_SINGLE
   // Special handling for defer() (delay = 0, type = TIMEOUT)
-  // ESP8266 and RP2040 are excluded because they don't need thread-safe defer handling
+  // Single-core platforms don't need thread-safe defer handling
   if (delay == 0 && type == SchedulerItem::TIMEOUT) {
     // Put in defer queue for guaranteed FIFO execution
     LockGuard guard{this->lock_};
@@ -92,7 +92,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
     this->defer_queue_.push_back(std::move(item));
     return;
   }
-#endif
+#endif /* not ESPHOME_CORES_SINGLE */
 
   // Get fresh timestamp for new timer/interval - ensures accurate scheduling
   const auto now = this->millis_64_(millis());  // Fresh millis() call
@@ -123,7 +123,7 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
     ESP_LOGD(TAG, "set_%s(name='%s/%s', %s=%" PRIu32 ", offset=%" PRIu32 ")", type_str, item->get_source(),
              name_cstr ? name_cstr : "(null)", type_str, delay, static_cast<uint32_t>(item->next_execution_ - now));
   }
-#endif
+#endif /* ESPHOME_DEBUG_SCHEDULER */
 
   LockGuard guard{this->lock_};
   // If name is provided, do atomic cancel-and-add
@@ -231,7 +231,7 @@ optional<uint32_t> HOT Scheduler::next_schedule_in(uint32_t now) {
   return item->next_execution_ - now_64;
 }
 void HOT Scheduler::call(uint32_t now) {
-#if !defined(USE_ESP8266) && !defined(USE_RP2040)
+#ifndef ESPHOME_CORES_SINGLE
   // Process defer queue first to guarantee FIFO execution order for deferred items.
   // Previously, defer() used the heap which gave undefined order for equal timestamps,
   // causing race conditions on multi-core systems (ESP32, BK7200).
@@ -239,8 +239,7 @@ void HOT Scheduler::call(uint32_t now) {
   // - Deferred items (delay=0) go directly to defer_queue_ in set_timer_common_
   // - Items execute in exact order they were deferred (FIFO guarantee)
   // - No deferred items exist in to_add_, so processing order doesn't affect correctness
-  // ESP8266 and RP2040 don't use this queue - they fall back to the heap-based approach
-  // (ESP8266: single-core, RP2040: empty mutex implementation).
+  // Single-core platforms don't use this queue and fall back to the heap-based approach.
   //
   // Note: Items cancelled via cancel_item_locked_() are marked with remove=true but still
   // processed here. They are removed from the queue normally via pop_front() but skipped
@@ -262,7 +261,7 @@ void HOT Scheduler::call(uint32_t now) {
       this->execute_item_(item.get(), now);
     }
   }
-#endif
+#endif /* not ESPHOME_CORES_SINGLE */
 
   // Convert the fresh timestamp from main loop to 64-bit for scheduler operations
   const auto now_64 = this->millis_64_(now);  // 'now' from parameter - fresh from Application::loop()
@@ -274,13 +273,15 @@ void HOT Scheduler::call(uint32_t now) {
   if (now_64 - last_print > 2000) {
     last_print = now_64;
     std::vector<std::unique_ptr<SchedulerItem>> old_items;
-#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY)
-    ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%u, %" PRIu32 ")", this->items_.size(), now_64,
-             this->millis_major_, this->last_millis_.load(std::memory_order_relaxed));
-#else
-    ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%u, %" PRIu32 ")", this->items_.size(), now_64,
+#ifdef ESPHOME_CORES_MULTI_ATOMICS
+    const auto last_dbg = this->last_millis_.load(std::memory_order_relaxed);
+    const auto major_dbg = this->millis_major_.load(std::memory_order_relaxed);
+    ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%" PRIu16 ", %" PRIu32 ")", this->items_.size(), now_64,
+             major_dbg, last_dbg);
+#else  /* not ESPHOME_CORES_MULTI_ATOMICS */
+    ESP_LOGD(TAG, "Items: count=%zu, now=%" PRIu64 " (%" PRIu16 ", %" PRIu32 ")", this->items_.size(), now_64,
              this->millis_major_, this->last_millis_);
-#endif
+#endif /* else ESPHOME_CORES_MULTI_ATOMICS */
     while (!this->empty_()) {
       std::unique_ptr<SchedulerItem> item;
       {
@@ -305,7 +306,7 @@ void HOT Scheduler::call(uint32_t now) {
       std::make_heap(this->items_.begin(), this->items_.end(), SchedulerItem::cmp);
     }
   }
-#endif  // ESPHOME_DEBUG_SCHEDULER
+#endif /* ESPHOME_DEBUG_SCHEDULER */
 
   // If we have too many items to remove
   if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) {
@@ -352,7 +353,7 @@ void HOT Scheduler::call(uint32_t now) {
       ESP_LOGV(TAG, "Running %s '%s/%s' with interval=%" PRIu32 " next_execution=%" PRIu64 " (now=%" PRIu64 ")",
                item->get_type_str(), item->get_source(), item_name ? item_name : "(null)", item->interval,
                item->next_execution_, now_64);
-#endif
+#endif /* ESPHOME_DEBUG_SCHEDULER */
 
       // Warning: During callback(), a lot of stuff can happen, including:
       //  - timeouts/intervals get added, potentially invalidating vector pointers
@@ -460,7 +461,7 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c
   size_t total_cancelled = 0;
 
   // Check all containers for matching items
-#if !defined(USE_ESP8266) && !defined(USE_RP2040)
+#ifndef ESPHOME_CORES_SINGLE
   // Only check defer queue for timeouts (intervals never go there)
   if (type == SchedulerItem::TIMEOUT) {
     for (auto &item : this->defer_queue_) {
@@ -470,7 +471,7 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c
       }
     }
   }
-#endif
+#endif /* not ESPHOME_CORES_SINGLE */
 
   // Cancel items in the main heap
   for (auto &item : this->items_) {
@@ -495,24 +496,53 @@ bool HOT Scheduler::cancel_item_locked_(Component *component, const char *name_c
 
 uint64_t Scheduler::millis_64_(uint32_t now) {
   // THREAD SAFETY NOTE:
-  // This function can be called from multiple threads simultaneously on ESP32/LibreTiny.
-  // On single-threaded platforms (ESP8266, RP2040), atomics are not needed.
+  // This function has three implementations, based on the precompiler flags
+  // - ESPHOME_CORES_SINGLE - Runs on single-core platforms (ESP8266, RP2040, etc.)
+  // - ESPHOME_CORES_MULTI_NO_ATOMICS - Runs on multi-core platforms without atomics (LibreTiny)
+  // - ESPHOME_CORES_MULTI_ATOMICS - Runs on multi-core platforms with atomics (ESP32, HOST, etc.)
+  //
+  // Make sure all changes are synchronized if you edit this function.
   //
   // IMPORTANT: Always pass fresh millis() values to this function. The implementation
   // handles out-of-order timestamps between threads, but minimizing time differences
   // helps maintain accuracy.
   //
-  // The implementation handles the 32-bit rollover (every 49.7 days) by:
-  // 1. Using a lock when detecting rollover to ensure atomic update
-  // 2. Restricting normal updates to forward movement within the same epoch
-  // This prevents race conditions at the rollover boundary without requiring
-  // 64-bit atomics or locking on every call.
 
-#ifdef USE_LIBRETINY
-  // LibreTiny: Multi-threaded but lacks atomic operation support
-  // TODO: If LibreTiny ever adds atomic support, remove this entire block and
-  // let it fall through to the atomic-based implementation below
-  // We need to use a lock when near the rollover boundary to prevent races
+#ifdef ESPHOME_CORES_SINGLE
+  // This is the single core implementation.
+  //
+  // Single-core platforms have no concurrency, so this is a simple implementation
+  // that just tracks 32-bit rollover (every 49.7 days) without any locking or atomics.
+
+  uint16_t major = this->millis_major_;
+  uint32_t last = this->last_millis_;
+
+  // Check for rollover
+  if (now < last && (last - now) > HALF_MAX_UINT32) {
+    this->millis_major_++;
+    major++;
+#ifdef ESPHOME_DEBUG_SCHEDULER
+    ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last);
+#endif /* ESPHOME_DEBUG_SCHEDULER */
+  }
+
+  // Only update if time moved forward
+  if (now > last) {
+    this->last_millis_ = now;
+  }
+
+  // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time
+  return now + (static_cast<uint64_t>(major) << 32);
+
+#elif defined(ESPHOME_CORES_MULTI_NO_ATOMICS)
+  // This is the multi core no atomics implementation.
+  //
+  // Without atomics, this implementation uses locks more aggressively:
+  // 1. Always locks when near the rollover boundary (within 10 seconds)
+  // 2. Always locks when detecting a large backwards jump
+  // 3. Updates without lock in normal forward progression (accepting minor races)
+  // This is less efficient but necessary without atomic operations.
+  uint16_t major = this->millis_major_;
   uint32_t last = this->last_millis_;
 
   // Define a safe window around the rollover point (10 seconds)
@@ -531,9 +561,10 @@ uint64_t Scheduler::millis_64_(uint32_t now) {
     if (now < last && (last - now) > HALF_MAX_UINT32) {
       // True rollover detected (happens every ~49.7 days)
       this->millis_major_++;
+      major++;
 #ifdef ESPHOME_DEBUG_SCHEDULER
       ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last);
-#endif
+#endif /* ESPHOME_DEBUG_SCHEDULER */
     }
     // Update last_millis_ while holding lock
     this->last_millis_ = now;
@@ -549,58 +580,76 @@ uint64_t Scheduler::millis_64_(uint32_t now) {
   // If now <= last and we're not near rollover, don't update
   // This minimizes backwards time movement
 
-#elif !defined(USE_ESP8266) && !defined(USE_RP2040)
-  // Multi-threaded platforms with atomic support (ESP32)
-  uint32_t last = this->last_millis_.load(std::memory_order_relaxed);
+  // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time
+  return now + (static_cast<uint64_t>(major) << 32);
 
-  // If we might be near a rollover (large backwards jump), take the lock for the entire operation
-  // This ensures rollover detection and last_millis_ update are atomic together
-  if (now < last && (last - now) > HALF_MAX_UINT32) {
-    // Potential rollover - need lock for atomic rollover detection + update
-    LockGuard guard{this->lock_};
-    // Re-read with lock held
-    last = this->last_millis_.load(std::memory_order_relaxed);
+#elif defined(ESPHOME_CORES_MULTI_ATOMICS)
+  // This is the multi core with atomics implementation.
+  //
+  // Uses atomic operations with acquire/release semantics to ensure coherent
+  // reads of millis_major_ and last_millis_ across cores. Features:
+  // 1. Epoch-coherency retry loop to handle concurrent updates
+  // 2. Lock only taken for actual rollover detection and update
+  // 3. Lock-free CAS updates for normal forward time progression
+  // 4. Memory ordering ensures cores see consistent time values
 
+  for (;;) {
+    uint16_t major = this->millis_major_.load(std::memory_order_acquire);
+
+    /*
+     * Acquire so that if we later decide **not** to take the lock we still
+     * observe a `millis_major_` value coherent with the loaded `last_millis_`.
+     * The acquire load ensures any later read of `millis_major_` sees its
+     * corresponding increment.
+     */
+    uint32_t last = this->last_millis_.load(std::memory_order_acquire);
+
+    // If we might be near a rollover (large backwards jump), take the lock for the entire operation
+    // This ensures rollover detection and last_millis_ update are atomic together
     if (now < last && (last - now) > HALF_MAX_UINT32) {
-      // True rollover detected (happens every ~49.7 days)
-      this->millis_major_++;
+      // Potential rollover - need lock for atomic rollover detection + update
+      LockGuard guard{this->lock_};
+      // Re-read with lock held; mutex already provides ordering
+      last = this->last_millis_.load(std::memory_order_relaxed);
+
+      if (now < last && (last - now) > HALF_MAX_UINT32) {
+        // True rollover detected (happens every ~49.7 days)
+        this->millis_major_.fetch_add(1, std::memory_order_relaxed);
+        major++;
 #ifdef ESPHOME_DEBUG_SCHEDULER
-      ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last);
-#endif
-    }
-    // Update last_millis_ while holding lock to prevent races
-    this->last_millis_.store(now, std::memory_order_relaxed);
-  } else {
-    // Normal case: Try lock-free update, but only allow forward movement within same epoch
-    // This prevents accidentally moving backwards across a rollover boundary
-    while (now > last && (now - last) < HALF_MAX_UINT32) {
-      if (this->last_millis_.compare_exchange_weak(last, now, std::memory_order_relaxed)) {
-        break;
+        ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last);
+#endif /* ESPHOME_DEBUG_SCHEDULER */
+      }
+      /*
+       * Update last_millis_ while holding the lock to prevent races
+       * Publish the new low-word *after* bumping `millis_major_` (done above)
+       * so readers never see a mismatched pair.
+       */
+      this->last_millis_.store(now, std::memory_order_release);
+    } else {
+      // Normal case: Try lock-free update, but only allow forward movement within same epoch
+      // This prevents accidentally moving backwards across a rollover boundary
+      while (now > last && (now - last) < HALF_MAX_UINT32) {
+        if (this->last_millis_.compare_exchange_weak(last, now,
+                                                     std::memory_order_release,     // success
+                                                     std::memory_order_relaxed)) {  // failure
+          break;
+        }
+        // CAS failure means no data was published; relaxed is fine
+        // last is automatically updated by compare_exchange_weak if it fails
       }
-      // last is automatically updated by compare_exchange_weak if it fails
     }
+    uint16_t major_end = this->millis_major_.load(std::memory_order_relaxed);
+    if (major_end == major)
+      return now + (static_cast<uint64_t>(major) << 32);
   }
+  // Unreachable - the loop always returns when major_end == major
+  __builtin_unreachable();
 
 #else
-  // Single-threaded platforms (ESP8266, RP2040): No atomics needed
-  uint32_t last = this->last_millis_;
-
-  // Check for rollover
-  if (now < last && (last - now) > HALF_MAX_UINT32) {
-    this->millis_major_++;
-#ifdef ESPHOME_DEBUG_SCHEDULER
-    ESP_LOGD(TAG, "Detected true 32-bit rollover at %" PRIu32 "ms (was %" PRIu32 ")", now, last);
+#error \
+    "No platform threading model defined. One of ESPHOME_CORES_SINGLE, ESPHOME_CORES_MULTI_NO_ATOMICS, or ESPHOME_CORES_MULTI_ATOMICS must be defined."
 #endif
-  }
-
-  // Only update if time moved forward
-  if (now > last) {
-    this->last_millis_ = now;
-  }
-#endif
-
-  // Combine major (high 32 bits) and now (low 32 bits) into 64-bit time
-  return now + (static_cast<uint64_t>(this->millis_major_) << 32);
 }
 
 bool HOT Scheduler::SchedulerItem::cmp(const std::unique_ptr<SchedulerItem> &a,
diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h
index 64df2f2bb0..b539b26949 100644
--- a/esphome/core/scheduler.h
+++ b/esphome/core/scheduler.h
@@ -1,10 +1,11 @@
 #pragma once
 
+#include "esphome/core/defines.h"
 #include <vector>
 #include <memory>
 #include <cstring>
 #include <deque>
-#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY)
+#ifdef ESPHOME_CORES_MULTI_ATOMICS
 #include <atomic>
 #endif
 
@@ -204,23 +205,40 @@ class Scheduler {
   Mutex lock_;
   std::vector<std::unique_ptr<SchedulerItem>> items_;
   std::vector<std::unique_ptr<SchedulerItem>> to_add_;
-#if !defined(USE_ESP8266) && !defined(USE_RP2040)
-  // ESP8266 and RP2040 don't need the defer queue because:
-  // ESP8266: Single-core with no preemptive multitasking
-  // RP2040: Currently has empty mutex implementation in ESPHome
-  // Both platforms save 40 bytes of RAM by excluding this
+#ifndef ESPHOME_CORES_SINGLE
+  // Single-core platforms don't need the defer queue and save 40 bytes of RAM
   std::deque<std::unique_ptr<SchedulerItem>> defer_queue_;  // FIFO queue for defer() calls
-#endif
-#if !defined(USE_ESP8266) && !defined(USE_RP2040) && !defined(USE_LIBRETINY)
-  // Multi-threaded platforms with atomic support: last_millis_ needs atomic for lock-free updates
+#endif                                                      /* ESPHOME_CORES_SINGLE */
+  uint32_t to_remove_{0};
+
+#ifdef ESPHOME_CORES_MULTI_ATOMICS
+  /*
+   * Multi-threaded platforms with atomic support: last_millis_ needs atomic for lock-free updates
+   *
+   * MEMORY-ORDERING NOTE
+   * --------------------
+   * `last_millis_` and `millis_major_` form a single 64-bit timestamp split in half.
+   * Writers publish `last_millis_` with memory_order_release and readers use
+   * memory_order_acquire. This ensures that once a reader sees the new low word,
+   * it also observes the corresponding increment of `millis_major_`.
+   */
   std::atomic<uint32_t> last_millis_{0};
-#else
+#else  /* not ESPHOME_CORES_MULTI_ATOMICS */
   // Platforms without atomic support or single-threaded platforms
   uint32_t last_millis_{0};
-#endif
-  // millis_major_ is protected by lock when incrementing
+#endif /* else ESPHOME_CORES_MULTI_ATOMICS */
+
+  /*
+   * Upper 16 bits of the 64-bit millis counter. Incremented only while holding
+   * `lock_`; read concurrently. Atomic (relaxed) avoids a formal data race.
+   * Ordering relative to `last_millis_` is provided by its release store and the
+   * corresponding acquire loads.
+   */
+#ifdef ESPHOME_CORES_MULTI_ATOMICS
+  std::atomic<uint16_t> millis_major_{0};
+#else  /* not ESPHOME_CORES_MULTI_ATOMICS */
   uint16_t millis_major_{0};
-  uint32_t to_remove_{0};
+#endif /* else ESPHOME_CORES_MULTI_ATOMICS */
 };
 
 }  // namespace esphome