From cc87b32206e602adc1a3c34f659ca5498fdd269a Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sat, 28 Sep 2024 10:02:05 -0400 Subject: [PATCH 1/6] Support PWM phase shifts on ESP8266 Use the phase-locked soft PWM from the Arduino core to implement the same PWM phase management as ESP32s are using. The soft PWM code is vendored in, as it was previously, to add the NMI workaround from #4035. Completes #4034 --- .../src/core_esp8266_waveform_phase.cpp | 478 ++++++++++++ .../src/core_esp8266_waveform_pwm.cpp | 717 ------------------ wled00/bus_manager.cpp | 46 +- 3 files changed, 507 insertions(+), 734 deletions(-) create mode 100644 lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp delete mode 100644 lib/ESP8266PWM/src/core_esp8266_waveform_pwm.cpp diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp new file mode 100644 index 000000000..b846091bf --- /dev/null +++ b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp @@ -0,0 +1,478 @@ +/* esp8266_waveform imported from platform source code + Modified for WLED to work around a fault in the NMI handling, + which can result in the system locking up and hard WDT crashes. + + Imported from https://github.com/esp8266/Arduino/blob/7e0d20e2b9034994f573a236364e0aef17fd66de/cores/esp8266/core_esp8266_waveform_phase.cpp +*/ + + +/* + esp8266_waveform - General purpose waveform generation and control, + supporting outputs on all pins in parallel. + + Copyright (c) 2018 Earle F. Philhower, III. All rights reserved. + Copyright (c) 2020 Dirk O. Kaar. + + The core idea is to have a programmable waveform generator with a unique + high and low period (defined in microseconds or CPU clock cycles). TIMER1 is + set to 1-shot mode and is always loaded with the time until the next edge + of any live waveforms. + + Up to one waveform generator per pin supported. + + Each waveform generator is synchronized to the ESP clock cycle counter, not the + timer. This allows for removing interrupt jitter and delay as the counter + always increments once per 80MHz clock. Changes to a waveform are + contiguous and only take effect on the next waveform transition, + allowing for smooth transitions. + + This replaces older tone(), analogWrite(), and the Servo classes. + + Everywhere in the code where "ccy" or "ccys" is used, it means ESP.getCycleCount() + clock cycle time, or an interval measured in clock cycles, but not TIMER1 + cycles (which may be 2 CPU clock cycles @ 160MHz). + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "core_esp8266_waveform.h" +#include +#include "debug.h" +#include "ets_sys.h" +#include + + +// ----- @willmmiles begin patch ----- +// Linker magic +extern "C" void usePWMFixedNMI(void) {}; + +// NMI crash workaround +// Sometimes the NMI fails to return, stalling the CPU. When this happens, +// the next NMI gets a return address /inside the NMI handler function/. +// We work around this by caching the last NMI return address, and restoring +// the epc3 and eps3 registers to the previous values if the observed epc3 +// happens to be pointing to the _NMILevelVector function. +extern "C" void _NMILevelVector(); +extern "C" void _UserExceptionVector_1(); // the next function after _NMILevelVector +static inline IRAM_ATTR void nmiCrashWorkaround() { + static uintptr_t epc3_backup, eps3_backup; + + uintptr_t epc3, eps3; + __asm__ __volatile__("rsr %0,epc3; rsr %1,eps3":"=a"(epc3),"=a" (eps3)); + if ((epc3 < (uintptr_t) &_NMILevelVector) || (epc3 >= (uintptr_t) &_UserExceptionVector_1)) { + // Address is good; save backup + epc3_backup = epc3; + eps3_backup = eps3; + } else { + // Address is inside the NMI handler -- restore from backup + __asm__ __volatile__("wsr %0,epc3; wsr %1,eps3"::"a"(epc3_backup),"a"(eps3_backup)); + } +} +// ----- @willmmiles end patch ----- + + +// No-op calls to override the PWM implementation +extern "C" void _setPWMFreq_weak(uint32_t freq) { (void) freq; } +extern "C" IRAM_ATTR bool _stopPWM_weak(int pin) { (void) pin; return false; } +extern "C" bool _setPWM_weak(int pin, uint32_t val, uint32_t range) { (void) pin; (void) val; (void) range; return false; } + + +// Timer is 80MHz fixed. 160MHz CPU frequency need scaling. +constexpr bool ISCPUFREQ160MHZ = clockCyclesPerMicrosecond() == 160; +// Maximum delay between IRQs, Timer1, <= 2^23 / 80MHz +constexpr int32_t MAXIRQTICKSCCYS = microsecondsToClockCycles(10000); +// Maximum servicing time for any single IRQ +constexpr uint32_t ISRTIMEOUTCCYS = microsecondsToClockCycles(18); +// The latency between in-ISR rearming of the timer and the earliest firing +constexpr int32_t IRQLATENCYCCYS = microsecondsToClockCycles(2); +// The SDK and hardware take some time to actually get to our NMI code +constexpr int32_t DELTAIRQCCYS = ISCPUFREQ160MHZ ? + microsecondsToClockCycles(2) >> 1 : microsecondsToClockCycles(2); + +// for INFINITE, the NMI proceeds on the waveform without expiry deadline. +// for EXPIRES, the NMI expires the waveform automatically on the expiry ccy. +// for UPDATEEXPIRY, the NMI recomputes the exact expiry ccy and transitions to EXPIRES. +// for INIT, the NMI initializes nextPeriodCcy, and if expiryCcy != 0 includes UPDATEEXPIRY. +enum class WaveformMode : uint8_t {INFINITE = 0, EXPIRES = 1, UPDATEEXPIRY = 2, INIT = 3}; + +// Waveform generator can create tones, PWM, and servos +typedef struct { + uint32_t nextPeriodCcy; // ESP clock cycle when a period begins. If WaveformMode::INIT, temporarily holds positive phase offset ccy count + uint32_t endDutyCcy; // ESP clock cycle when going from duty to off + int32_t dutyCcys; // Set next off cycle at low->high to maintain phase + int32_t adjDutyCcys; // Temporary correction for next period + int32_t periodCcys; // Set next phase cycle at low->high to maintain phase + uint32_t expiryCcy; // For time-limited waveform, the CPU clock cycle when this waveform must stop. If WaveformMode::UPDATE, temporarily holds relative ccy count + WaveformMode mode; + int8_t alignPhase; // < 0 no phase alignment, otherwise starts waveform in relative phase offset to given pin + bool autoPwm; // perform PWM duty to idle cycle ratio correction under high load at the expense of precise timings +} Waveform; + +namespace { + + static struct { + Waveform pins[17]; // State of all possible pins + uint32_t states = 0; // Is the pin high or low, updated in NMI so no access outside the NMI code + uint32_t enabled = 0; // Is it actively running, updated in NMI so no access outside the NMI code + + // Enable lock-free by only allowing updates to waveform.states and waveform.enabled from IRQ service routine + int32_t toSetBits = 0; // Message to the NMI handler to start/modify exactly one waveform + int32_t toDisableBits = 0; // Message to the NMI handler to disable exactly one pin from waveform generation + + uint32_t(*timer1CB)() = nullptr; + + bool timer1Running = false; + + uint32_t nextEventCcy; + } waveform; + +} + +// Interrupt on/off control +static IRAM_ATTR void timer1Interrupt(); + +// Non-speed critical bits +#pragma GCC optimize ("Os") + +static void initTimer() { + timer1_disable(); + ETS_FRC_TIMER1_INTR_ATTACH(NULL, NULL); + ETS_FRC_TIMER1_NMI_INTR_ATTACH(timer1Interrupt); + timer1_enable(TIM_DIV1, TIM_EDGE, TIM_SINGLE); + waveform.timer1Running = true; + timer1_write(IRQLATENCYCCYS); // Cause an interrupt post-haste +} + +static void IRAM_ATTR deinitTimer() { + ETS_FRC_TIMER1_NMI_INTR_ATTACH(NULL); + timer1_disable(); + timer1_isr_init(); + waveform.timer1Running = false; +} + +extern "C" { + +// Set a callback. Pass in NULL to stop it +void setTimer1Callback_weak(uint32_t (*fn)()) { + waveform.timer1CB = fn; + std::atomic_thread_fence(std::memory_order_acq_rel); + if (!waveform.timer1Running && fn) { + initTimer(); + } else if (waveform.timer1Running && !fn && !waveform.enabled) { + deinitTimer(); + } +} + +// Start up a waveform on a pin, or change the current one. Will change to the new +// waveform smoothly on next low->high transition. For immediate change, stopWaveform() +// first, then it will immediately begin. +int startWaveformClockCycles_weak(uint8_t pin, uint32_t highCcys, uint32_t lowCcys, + uint32_t runTimeCcys, int8_t alignPhase, uint32_t phaseOffsetCcys, bool autoPwm) { + uint32_t periodCcys = highCcys + lowCcys; + if (periodCcys < MAXIRQTICKSCCYS) { + if (!highCcys) { + periodCcys = (MAXIRQTICKSCCYS / periodCcys) * periodCcys; + } + else if (!lowCcys) { + highCcys = periodCcys = (MAXIRQTICKSCCYS / periodCcys) * periodCcys; + } + } + // sanity checks, including mixed signed/unsigned arithmetic safety + if ((pin > 16) || isFlashInterfacePin(pin) || (alignPhase > 16) || + static_cast(periodCcys) <= 0 || + static_cast(highCcys) < 0 || static_cast(lowCcys) < 0) { + return false; + } + Waveform& wave = waveform.pins[pin]; + wave.dutyCcys = highCcys; + wave.adjDutyCcys = 0; + wave.periodCcys = periodCcys; + wave.autoPwm = autoPwm; + + std::atomic_thread_fence(std::memory_order_acquire); + const uint32_t pinBit = 1UL << pin; + if (!(waveform.enabled & pinBit)) { + // wave.nextPeriodCcy and wave.endDutyCcy are initialized by the ISR + wave.nextPeriodCcy = phaseOffsetCcys; + wave.expiryCcy = runTimeCcys; // in WaveformMode::INIT, temporarily hold relative cycle count + wave.mode = WaveformMode::INIT; + wave.alignPhase = (alignPhase < 0) ? -1 : alignPhase; + if (!wave.dutyCcys) { + // If initially at zero duty cycle, force GPIO off + if (pin == 16) { + GP16O = 0; + } + else { + GPOC = pinBit; + } + } + std::atomic_thread_fence(std::memory_order_release); + waveform.toSetBits = 1UL << pin; + std::atomic_thread_fence(std::memory_order_release); + if (!waveform.timer1Running) { + initTimer(); + } + else if (T1V > IRQLATENCYCCYS) { + // Must not interfere if Timer is due shortly + timer1_write(IRQLATENCYCCYS); + } + } + else { + wave.mode = WaveformMode::INFINITE; // turn off possible expiry to make update atomic from NMI + std::atomic_thread_fence(std::memory_order_release); + wave.expiryCcy = runTimeCcys; // in WaveformMode::UPDATEEXPIRY, temporarily hold relative cycle count + if (runTimeCcys) { + wave.mode = WaveformMode::UPDATEEXPIRY; + std::atomic_thread_fence(std::memory_order_release); + waveform.toSetBits = 1UL << pin; + } + } + std::atomic_thread_fence(std::memory_order_acq_rel); + while (waveform.toSetBits) { + esp_yield(); // Wait for waveform to update + std::atomic_thread_fence(std::memory_order_acquire); + } + return true; +} + +// Stops a waveform on a pin +IRAM_ATTR int stopWaveform_weak(uint8_t pin) { + // Can't possibly need to stop anything if there is no timer active + if (!waveform.timer1Running) { + return false; + } + // If user sends in a pin >16 but <32, this will always point to a 0 bit + // If they send >=32, then the shift will result in 0 and it will also return false + std::atomic_thread_fence(std::memory_order_acquire); + const uint32_t pinBit = 1UL << pin; + if (waveform.enabled & pinBit) { + waveform.toDisableBits = 1UL << pin; + std::atomic_thread_fence(std::memory_order_release); + // Must not interfere if Timer is due shortly + if (T1V > IRQLATENCYCCYS) { + timer1_write(IRQLATENCYCCYS); + } + while (waveform.toDisableBits) { + /* no-op */ // Can't delay() since stopWaveform may be called from an IRQ + std::atomic_thread_fence(std::memory_order_acquire); + } + } + if (!waveform.enabled && !waveform.timer1CB) { + deinitTimer(); + } + return true; +} + +}; + +// Speed critical bits +#pragma GCC optimize ("O2") + +// For dynamic CPU clock frequency switch in loop the scaling logic would have to be adapted. +// Using constexpr makes sure that the CPU clock frequency is compile-time fixed. +static inline IRAM_ATTR int32_t scaleCcys(const int32_t ccys, const bool isCPU2X) { + if (ISCPUFREQ160MHZ) { + return isCPU2X ? ccys : (ccys >> 1); + } + else { + return isCPU2X ? (ccys << 1) : ccys; + } +} + +static IRAM_ATTR void timer1Interrupt() { + // ----- @willmmiles begin patch ----- + nmiCrashWorkaround(); + // ----- @willmmiles end patch ----- + + const uint32_t isrStartCcy = ESP.getCycleCount(); + int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; + const bool isCPU2X = CPU2X & 1; + if ((waveform.toSetBits && !(waveform.enabled & waveform.toSetBits)) || waveform.toDisableBits) { + // Handle enable/disable requests from main app. + waveform.enabled = (waveform.enabled & ~waveform.toDisableBits) | waveform.toSetBits; // Set the requested waveforms on/off + // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) + waveform.toDisableBits = 0; + } + + if (waveform.toSetBits) { + const int toSetPin = __builtin_ffs(waveform.toSetBits) - 1; + Waveform& wave = waveform.pins[toSetPin]; + switch (wave.mode) { + case WaveformMode::INIT: + waveform.states &= ~waveform.toSetBits; // Clear the state of any just started + if (wave.alignPhase >= 0 && waveform.enabled & (1UL << wave.alignPhase)) { + wave.nextPeriodCcy = waveform.pins[wave.alignPhase].nextPeriodCcy + wave.nextPeriodCcy; + } + else { + wave.nextPeriodCcy = waveform.nextEventCcy; + } + if (!wave.expiryCcy) { + wave.mode = WaveformMode::INFINITE; + break; + } + // fall through + case WaveformMode::UPDATEEXPIRY: + // in WaveformMode::UPDATEEXPIRY, expiryCcy temporarily holds relative CPU cycle count + wave.expiryCcy = wave.nextPeriodCcy + scaleCcys(wave.expiryCcy, isCPU2X); + wave.mode = WaveformMode::EXPIRES; + break; + default: + break; + } + waveform.toSetBits = 0; + } + + // Exit the loop if the next event, if any, is sufficiently distant. + const uint32_t isrTimeoutCcy = isrStartCcy + ISRTIMEOUTCCYS; + uint32_t busyPins = waveform.enabled; + waveform.nextEventCcy = isrStartCcy + MAXIRQTICKSCCYS; + + uint32_t now = ESP.getCycleCount(); + uint32_t isrNextEventCcy = now; + while (busyPins) { + if (static_cast(isrNextEventCcy - now) > IRQLATENCYCCYS) { + waveform.nextEventCcy = isrNextEventCcy; + break; + } + isrNextEventCcy = waveform.nextEventCcy; + uint32_t loopPins = busyPins; + while (loopPins) { + const int pin = __builtin_ffsl(loopPins) - 1; + const uint32_t pinBit = 1UL << pin; + loopPins ^= pinBit; + + Waveform& wave = waveform.pins[pin]; + + if (clockDrift) { + wave.endDutyCcy += clockDrift; + wave.nextPeriodCcy += clockDrift; + wave.expiryCcy += clockDrift; + } + + uint32_t waveNextEventCcy = (waveform.states & pinBit) ? wave.endDutyCcy : wave.nextPeriodCcy; + if (WaveformMode::EXPIRES == wave.mode && + static_cast(waveNextEventCcy - wave.expiryCcy) >= 0 && + static_cast(now - wave.expiryCcy) >= 0) { + // Disable any waveforms that are done + waveform.enabled ^= pinBit; + busyPins ^= pinBit; + } + else { + const int32_t overshootCcys = now - waveNextEventCcy; + if (overshootCcys >= 0) { + const int32_t periodCcys = scaleCcys(wave.periodCcys, isCPU2X); + if (waveform.states & pinBit) { + // active configuration and forward are 100% duty + if (wave.periodCcys == wave.dutyCcys) { + wave.nextPeriodCcy += periodCcys; + wave.endDutyCcy = wave.nextPeriodCcy; + } + else { + if (wave.autoPwm) { + wave.adjDutyCcys += overshootCcys; + } + waveform.states ^= pinBit; + if (16 == pin) { + GP16O = 0; + } + else { + GPOC = pinBit; + } + } + waveNextEventCcy = wave.nextPeriodCcy; + } + else { + wave.nextPeriodCcy += periodCcys; + if (!wave.dutyCcys) { + wave.endDutyCcy = wave.nextPeriodCcy; + } + else { + int32_t dutyCcys = scaleCcys(wave.dutyCcys, isCPU2X); + if (dutyCcys <= wave.adjDutyCcys) { + dutyCcys >>= 1; + wave.adjDutyCcys -= dutyCcys; + } + else if (wave.adjDutyCcys) { + dutyCcys -= wave.adjDutyCcys; + wave.adjDutyCcys = 0; + } + wave.endDutyCcy = now + dutyCcys; + if (static_cast(wave.endDutyCcy - wave.nextPeriodCcy) > 0) { + wave.endDutyCcy = wave.nextPeriodCcy; + } + waveform.states |= pinBit; + if (16 == pin) { + GP16O = 1; + } + else { + GPOS = pinBit; + } + } + waveNextEventCcy = wave.endDutyCcy; + } + + if (WaveformMode::EXPIRES == wave.mode && static_cast(waveNextEventCcy - wave.expiryCcy) > 0) { + waveNextEventCcy = wave.expiryCcy; + } + } + + if (static_cast(waveNextEventCcy - isrTimeoutCcy) >= 0) { + busyPins ^= pinBit; + if (static_cast(waveform.nextEventCcy - waveNextEventCcy) > 0) { + waveform.nextEventCcy = waveNextEventCcy; + } + } + else if (static_cast(isrNextEventCcy - waveNextEventCcy) > 0) { + isrNextEventCcy = waveNextEventCcy; + } + } + now = ESP.getCycleCount(); + } + clockDrift = 0; + } + + int32_t callbackCcys = 0; + if (waveform.timer1CB) { + callbackCcys = scaleCcys(waveform.timer1CB(), isCPU2X); + } + now = ESP.getCycleCount(); + int32_t nextEventCcys = waveform.nextEventCcy - now; + // Account for unknown duration of timer1CB(). + if (waveform.timer1CB && nextEventCcys > callbackCcys) { + waveform.nextEventCcy = now + callbackCcys; + nextEventCcys = callbackCcys; + } + + // Timer is 80MHz fixed. 160MHz CPU frequency need scaling. + int32_t deltaIrqCcys = DELTAIRQCCYS; + int32_t irqLatencyCcys = IRQLATENCYCCYS; + if (isCPU2X) { + nextEventCcys >>= 1; + deltaIrqCcys >>= 1; + irqLatencyCcys >>= 1; + } + + // Firing timer too soon, the NMI occurs before ISR has returned. + if (nextEventCcys < irqLatencyCcys + deltaIrqCcys) { + waveform.nextEventCcy = now + IRQLATENCYCCYS + DELTAIRQCCYS; + nextEventCcys = irqLatencyCcys; + } + else { + nextEventCcys -= deltaIrqCcys; + } + + // Register access is fast and edge IRQ was configured before. + T1L = nextEventCcys; +} diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_pwm.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_pwm.cpp deleted file mode 100644 index 78c7160d9..000000000 --- a/lib/ESP8266PWM/src/core_esp8266_waveform_pwm.cpp +++ /dev/null @@ -1,717 +0,0 @@ -/* esp8266_waveform imported from platform source code - Modified for WLED to work around a fault in the NMI handling, - which can result in the system locking up and hard WDT crashes. - - Imported from https://github.com/esp8266/Arduino/blob/7e0d20e2b9034994f573a236364e0aef17fd66de/cores/esp8266/core_esp8266_waveform_pwm.cpp -*/ - -/* - esp8266_waveform - General purpose waveform generation and control, - supporting outputs on all pins in parallel. - - Copyright (c) 2018 Earle F. Philhower, III. All rights reserved. - - The core idea is to have a programmable waveform generator with a unique - high and low period (defined in microseconds or CPU clock cycles). TIMER1 - is set to 1-shot mode and is always loaded with the time until the next - edge of any live waveforms. - - Up to one waveform generator per pin supported. - - Each waveform generator is synchronized to the ESP clock cycle counter, not - the timer. This allows for removing interrupt jitter and delay as the - counter always increments once per 80MHz clock. Changes to a waveform are - contiguous and only take effect on the next waveform transition, - allowing for smooth transitions. - - This replaces older tone(), analogWrite(), and the Servo classes. - - Everywhere in the code where "cycles" is used, it means ESP.getCycleCount() - clock cycle count, or an interval measured in CPU clock cycles, but not - TIMER1 cycles (which may be 2 CPU clock cycles @ 160MHz). - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*/ - - -#include -#include -#include "ets_sys.h" -#include "core_esp8266_waveform.h" -#include "user_interface.h" - -extern "C" { - -// Linker magic -void usePWMFixedNMI() {}; - -// Maximum delay between IRQs -#define MAXIRQUS (10000) - -// Waveform generator can create tones, PWM, and servos -typedef struct { - uint32_t nextServiceCycle; // ESP cycle timer when a transition required - uint32_t expiryCycle; // For time-limited waveform, the cycle when this waveform must stop - uint32_t timeHighCycles; // Actual running waveform period (adjusted using desiredCycles) - uint32_t timeLowCycles; // - uint32_t desiredHighCycles; // Ideal waveform period to drive the error signal - uint32_t desiredLowCycles; // - uint32_t lastEdge; // Cycle when this generator last changed -} Waveform; - -class WVFState { -public: - Waveform waveform[17]; // State of all possible pins - uint32_t waveformState = 0; // Is the pin high or low, updated in NMI so no access outside the NMI code - uint32_t waveformEnabled = 0; // Is it actively running, updated in NMI so no access outside the NMI code - - // Enable lock-free by only allowing updates to waveformState and waveformEnabled from IRQ service routine - uint32_t waveformToEnable = 0; // Message to the NMI handler to start a waveform on a inactive pin - uint32_t waveformToDisable = 0; // Message to the NMI handler to disable a pin from waveform generation - - uint32_t waveformToChange = 0; // Mask of pin to change. One bit set in main app, cleared when effected in the NMI - uint32_t waveformNewHigh = 0; - uint32_t waveformNewLow = 0; - - uint32_t (*timer1CB)() = NULL; - - // Optimize the NMI inner loop by keeping track of the min and max GPIO that we - // are generating. In the common case (1 PWM) these may be the same pin and - // we can avoid looking at the other pins. - uint16_t startPin = 0; - uint16_t endPin = 0; -}; -static WVFState wvfState; - - -// Ensure everything is read/written to RAM -#define MEMBARRIER() { __asm__ volatile("" ::: "memory"); } - -// Non-speed critical bits -#pragma GCC optimize ("Os") - -// Interrupt on/off control -static IRAM_ATTR void timer1Interrupt(); -static bool timerRunning = false; - -static __attribute__((noinline)) void initTimer() { - if (!timerRunning) { - timer1_disable(); - ETS_FRC_TIMER1_INTR_ATTACH(NULL, NULL); - ETS_FRC_TIMER1_NMI_INTR_ATTACH(timer1Interrupt); - timer1_enable(TIM_DIV1, TIM_EDGE, TIM_SINGLE); - timerRunning = true; - timer1_write(microsecondsToClockCycles(10)); - } -} - -static IRAM_ATTR void forceTimerInterrupt() { - if (T1L > microsecondsToClockCycles(10)) { - T1L = microsecondsToClockCycles(10); - } -} - -// PWM implementation using special purpose state machine -// -// Keep an ordered list of pins with the delta in cycles between each -// element, with a terminal entry making up the remainder of the PWM -// period. With this method sum(all deltas) == PWM period clock cycles. -// -// At t=0 set all pins high and set the timeout for the 1st edge. -// On interrupt, if we're at the last element reset to t=0 state -// Otherwise, clear that pin down and set delay for next element -// and so forth. - -constexpr int maxPWMs = 8; - -// PWM machine state -typedef struct PWMState { - uint32_t mask; // Bitmask of active pins - uint32_t cnt; // How many entries - uint32_t idx; // Where the state machine is along the list - uint8_t pin[maxPWMs + 1]; - uint32_t delta[maxPWMs + 1]; - uint32_t nextServiceCycle; // Clock cycle for next step - struct PWMState *pwmUpdate; // Set by main code, cleared by ISR -} PWMState; - -static PWMState pwmState; -static uint32_t _pwmFreq = 1000; -static uint32_t _pwmPeriod = microsecondsToClockCycles(1000000UL) / _pwmFreq; - - -// If there are no more scheduled activities, shut down Timer 1. -// Otherwise, do nothing. -static IRAM_ATTR void disableIdleTimer() { - if (timerRunning && !wvfState.waveformEnabled && !pwmState.cnt && !wvfState.timer1CB) { - ETS_FRC_TIMER1_NMI_INTR_ATTACH(NULL); - timer1_disable(); - timer1_isr_init(); - timerRunning = false; - } -} - -// Notify the NMI that a new PWM state is available through the mailbox. -// Wait for mailbox to be emptied (either busy or delay() as needed) -static IRAM_ATTR void _notifyPWM(PWMState *p, bool idle) { - p->pwmUpdate = nullptr; - pwmState.pwmUpdate = p; - MEMBARRIER(); - forceTimerInterrupt(); - while (pwmState.pwmUpdate) { - if (idle) { - esp_yield(); - } - MEMBARRIER(); - } -} - -static void _addPWMtoList(PWMState &p, int pin, uint32_t val, uint32_t range); - - -// Called when analogWriteFreq() changed to update the PWM total period -//extern void _setPWMFreq_weak(uint32_t freq) __attribute__((weak)); -void _setPWMFreq_weak(uint32_t freq) { - _pwmFreq = freq; - - // Convert frequency into clock cycles - uint32_t cc = microsecondsToClockCycles(1000000UL) / freq; - - // Simple static adjustment to bring period closer to requested due to overhead - // Empirically determined as a constant PWM delay and a function of the number of PWMs -#if F_CPU == 80000000 - cc -= ((microsecondsToClockCycles(pwmState.cnt) * 13) >> 4) + 110; -#else - cc -= ((microsecondsToClockCycles(pwmState.cnt) * 10) >> 4) + 75; -#endif - - if (cc == _pwmPeriod) { - return; // No change - } - - _pwmPeriod = cc; - - if (pwmState.cnt) { - PWMState p; // The working copy since we can't edit the one in use - p.mask = 0; - p.cnt = 0; - for (uint32_t i = 0; i < pwmState.cnt; i++) { - auto pin = pwmState.pin[i]; - _addPWMtoList(p, pin, wvfState.waveform[pin].desiredHighCycles, wvfState.waveform[pin].desiredLowCycles); - } - // Update and wait for mailbox to be emptied - initTimer(); - _notifyPWM(&p, true); - disableIdleTimer(); - } -} -/* -static void _setPWMFreq_bound(uint32_t freq) __attribute__((weakref("_setPWMFreq_weak"))); -void _setPWMFreq(uint32_t freq) { - _setPWMFreq_bound(freq); -} -*/ - -// Helper routine to remove an entry from the state machine -// and clean up any marked-off entries -static void _cleanAndRemovePWM(PWMState *p, int pin) { - uint32_t leftover = 0; - uint32_t in, out; - for (in = 0, out = 0; in < p->cnt; in++) { - if ((p->pin[in] != pin) && (p->mask & (1<pin[in]))) { - p->pin[out] = p->pin[in]; - p->delta[out] = p->delta[in] + leftover; - leftover = 0; - out++; - } else { - leftover += p->delta[in]; - p->mask &= ~(1<pin[in]); - } - } - p->cnt = out; - // Final pin is never used: p->pin[out] = 0xff; - p->delta[out] = p->delta[in] + leftover; -} - - -// Disable PWM on a specific pin (i.e. when a digitalWrite or analogWrite(0%/100%)) -//extern bool _stopPWM_weak(uint8_t pin) __attribute__((weak)); -IRAM_ATTR bool _stopPWM_weak(uint8_t pin) { - if (!((1<= _pwmPeriod) { - cc = _pwmPeriod - 1; - } - - if (p.cnt == 0) { - // Starting up from scratch, special case 1st element and PWM period - p.pin[0] = pin; - p.delta[0] = cc; - // Final pin is never used: p.pin[1] = 0xff; - p.delta[1] = _pwmPeriod - cc; - } else { - uint32_t ttl = 0; - uint32_t i; - // Skip along until we're at the spot to insert - for (i=0; (i <= p.cnt) && (ttl + p.delta[i] < cc); i++) { - ttl += p.delta[i]; - } - // Shift everything out by one to make space for new edge - for (int32_t j = p.cnt; j >= (int)i; j--) { - p.pin[j + 1] = p.pin[j]; - p.delta[j + 1] = p.delta[j]; - } - int off = cc - ttl; // The delta from the last edge to the one we're inserting - p.pin[i] = pin; - p.delta[i] = off; // Add the delta to this new pin - p.delta[i + 1] -= off; // And subtract it from the follower to keep sum(deltas) constant - } - p.cnt++; - p.mask |= 1<= maxPWMs) { - return false; // No space left - } - - // Sanity check for all-on/off - uint32_t cc = (_pwmPeriod * val) / range; - if ((cc == 0) || (cc >= _pwmPeriod)) { - digitalWrite(pin, cc ? HIGH : LOW); - return true; - } - - _addPWMtoList(p, pin, val, range); - - // Set mailbox and wait for ISR to copy it over - initTimer(); - _notifyPWM(&p, true); - disableIdleTimer(); - - // Potentially recalculate the PWM period if we've added another pin - _setPWMFreq(_pwmFreq); - - return true; -} -/* -static bool _setPWM_bound(int pin, uint32_t val, uint32_t range) __attribute__((weakref("_setPWM_weak"))); -bool _setPWM(int pin, uint32_t val, uint32_t range) { - return _setPWM_bound(pin, val, range); -} -*/ - -// Start up a waveform on a pin, or change the current one. Will change to the new -// waveform smoothly on next low->high transition. For immediate change, stopWaveform() -// first, then it will immediately begin. -//extern int startWaveformClockCycles_weak(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles, int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) __attribute__((weak)); -int startWaveformClockCycles_weak(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles, - int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) { - (void) alignPhase; - (void) phaseOffsetUS; - (void) autoPwm; - - if ((pin > 16) || isFlashInterfacePin(pin) || (timeHighCycles == 0)) { - return false; - } - Waveform *wave = &wvfState.waveform[pin]; - wave->expiryCycle = runTimeCycles ? ESP.getCycleCount() + runTimeCycles : 0; - if (runTimeCycles && !wave->expiryCycle) { - wave->expiryCycle = 1; // expiryCycle==0 means no timeout, so avoid setting it - } - - _stopPWM(pin); // Make sure there's no PWM live here - - uint32_t mask = 1<timeHighCycles = timeHighCycles; - wave->desiredHighCycles = timeHighCycles; - wave->timeLowCycles = timeLowCycles; - wave->desiredLowCycles = timeLowCycles; - wave->lastEdge = 0; - wave->nextServiceCycle = ESP.getCycleCount() + microsecondsToClockCycles(1); - wvfState.waveformToEnable |= mask; - MEMBARRIER(); - initTimer(); - forceTimerInterrupt(); - while (wvfState.waveformToEnable) { - esp_yield(); // Wait for waveform to update - MEMBARRIER(); - } - } - - return true; -} -/* -static int startWaveformClockCycles_bound(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles, int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) __attribute__((weakref("startWaveformClockCycles_weak"))); -int startWaveformClockCycles(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles, int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) { - return startWaveformClockCycles_bound(pin, timeHighCycles, timeLowCycles, runTimeCycles, alignPhase, phaseOffsetUS, autoPwm); -} - - -// This version falls-thru to the proper startWaveformClockCycles call and is invariant across waveform generators -int startWaveform(uint8_t pin, uint32_t timeHighUS, uint32_t timeLowUS, uint32_t runTimeUS, - int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) { - return startWaveformClockCycles_bound(pin, - microsecondsToClockCycles(timeHighUS), microsecondsToClockCycles(timeLowUS), - microsecondsToClockCycles(runTimeUS), alignPhase, microsecondsToClockCycles(phaseOffsetUS), autoPwm); -} -*/ - -// Set a callback. Pass in NULL to stop it -//extern void setTimer1Callback_weak(uint32_t (*fn)()) __attribute__((weak)); -void setTimer1Callback_weak(uint32_t (*fn)()) { - wvfState.timer1CB = fn; - if (fn) { - initTimer(); - forceTimerInterrupt(); - } - disableIdleTimer(); -} -/* -static void setTimer1Callback_bound(uint32_t (*fn)()) __attribute__((weakref("setTimer1Callback_weak"))); -void setTimer1Callback(uint32_t (*fn)()) { - setTimer1Callback_bound(fn); -} -*/ - -// Stops a waveform on a pin -//extern int stopWaveform_weak(uint8_t pin) __attribute__((weak)); -IRAM_ATTR int stopWaveform_weak(uint8_t pin) { - // Can't possibly need to stop anything if there is no timer active - if (!timerRunning) { - return false; - } - // If user sends in a pin >16 but <32, this will always point to a 0 bit - // If they send >=32, then the shift will result in 0 and it will also return false - uint32_t mask = 1<= (uintptr_t) &_UserExceptionVector_1)) { - // Address is good; save backup - epc3_backup = epc3; - eps3_backup = eps3; - } else { - // Address is inside the NMI handler -- restore from backup - __asm__ __volatile__("wsr %0,epc3; wsr %1,eps3"::"a"(epc3_backup),"a"(eps3_backup)); - } -} -// ----- @willmmiles end patch ----- - - -// The SDK and hardware take some time to actually get to our NMI code, so -// decrement the next IRQ's timer value by a bit so we can actually catch the -// real CPU cycle counter we want for the waveforms. - -// The SDK also sometimes is running at a different speed the the Arduino core -// so the ESP cycle counter is actually running at a variable speed. -// adjust(x) takes care of adjusting a delta clock cycle amount accordingly. -#if F_CPU == 80000000 - #define DELTAIRQ (microsecondsToClockCycles(9)/4) - #define adjust(x) ((x) << (turbo ? 1 : 0)) -#else - #define DELTAIRQ (microsecondsToClockCycles(9)/8) - #define adjust(x) ((x) >> 0) -#endif - -// When the time to the next edge is greater than this, RTI and set another IRQ to minimize CPU usage -#define MINIRQTIME microsecondsToClockCycles(6) - -static IRAM_ATTR void timer1Interrupt() { - // ----- @willmmiles begin patch ----- - nmiCrashWorkaround(); - // ----- @willmmiles end patch ----- - - // Flag if the core is at 160 MHz, for use by adjust() - bool turbo = (*(uint32_t*)0x3FF00014) & 1 ? true : false; - - uint32_t nextEventCycle = GetCycleCountIRQ() + microsecondsToClockCycles(MAXIRQUS); - uint32_t timeoutCycle = GetCycleCountIRQ() + microsecondsToClockCycles(14); - - if (wvfState.waveformToEnable || wvfState.waveformToDisable) { - // Handle enable/disable requests from main app - wvfState.waveformEnabled = (wvfState.waveformEnabled & ~wvfState.waveformToDisable) | wvfState.waveformToEnable; // Set the requested waveforms on/off - wvfState.waveformState &= ~wvfState.waveformToEnable; // And clear the state of any just started - wvfState.waveformToEnable = 0; - wvfState.waveformToDisable = 0; - // No mem barrier. Globals must be written to RAM on ISR exit. - // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) - wvfState.startPin = __builtin_ffs(wvfState.waveformEnabled) - 1; - // Find the last bit by subtracting off GCC's count-leading-zeros (no offset in this one) - wvfState.endPin = 32 - __builtin_clz(wvfState.waveformEnabled); - } else if (!pwmState.cnt && pwmState.pwmUpdate) { - // Start up the PWM generator by copying from the mailbox - pwmState.cnt = 1; - pwmState.idx = 1; // Ensure copy this cycle, cause it to start at t=0 - pwmState.nextServiceCycle = GetCycleCountIRQ(); // Do it this loop! - // No need for mem barrier here. Global must be written by IRQ exit - } - - bool done = false; - if (wvfState.waveformEnabled || pwmState.cnt) { - do { - nextEventCycle = GetCycleCountIRQ() + microsecondsToClockCycles(MAXIRQUS); - - // PWM state machine implementation - if (pwmState.cnt) { - int32_t cyclesToGo; - do { - cyclesToGo = pwmState.nextServiceCycle - GetCycleCountIRQ(); - if (cyclesToGo < 0) { - if (pwmState.idx == pwmState.cnt) { // Start of pulses, possibly copy new - if (pwmState.pwmUpdate) { - // Do the memory copy from temp to global and clear mailbox - pwmState = *(PWMState*)pwmState.pwmUpdate; - } - GPOS = pwmState.mask; // Set all active pins high - if (pwmState.mask & (1<<16)) { - GP16O = 1; - } - pwmState.idx = 0; - } else { - do { - // Drop the pin at this edge - if (pwmState.mask & (1<expiryCycle) { - int32_t expiryToGo = wave->expiryCycle - now; - if (expiryToGo < 0) { - // Done, remove! - if (i == 16) { - GP16O = 0; - } - GPOC = mask; - wvfState.waveformEnabled &= ~mask; - continue; - } - } - - // Check for toggles - int32_t cyclesToGo = wave->nextServiceCycle - now; - if (cyclesToGo < 0) { - uint32_t nextEdgeCycles; - uint32_t desired = 0; - uint32_t *timeToUpdate; - wvfState.waveformState ^= mask; - if (wvfState.waveformState & mask) { - if (i == 16) { - GP16O = 1; - } - GPOS = mask; - - if (wvfState.waveformToChange & mask) { - // Copy over next full-cycle timings - wave->timeHighCycles = wvfState.waveformNewHigh; - wave->desiredHighCycles = wvfState.waveformNewHigh; - wave->timeLowCycles = wvfState.waveformNewLow; - wave->desiredLowCycles = wvfState.waveformNewLow; - wave->lastEdge = 0; - wvfState.waveformToChange = 0; - } - if (wave->lastEdge) { - desired = wave->desiredLowCycles; - timeToUpdate = &wave->timeLowCycles; - } - nextEdgeCycles = wave->timeHighCycles; - } else { - if (i == 16) { - GP16O = 0; - } - GPOC = mask; - desired = wave->desiredHighCycles; - timeToUpdate = &wave->timeHighCycles; - nextEdgeCycles = wave->timeLowCycles; - } - if (desired) { - desired = adjust(desired); - int32_t err = desired - (now - wave->lastEdge); - if (abs(err) < desired) { // If we've lost > the entire phase, ignore this error signal - err /= 2; - *timeToUpdate += err; - } - } - nextEdgeCycles = adjust(nextEdgeCycles); - wave->nextServiceCycle = now + nextEdgeCycles; - wave->lastEdge = now; - } - nextEventCycle = earliest(nextEventCycle, wave->nextServiceCycle); - } - - // Exit the loop if we've hit the fixed runtime limit or the next event is known to be after that timeout would occur - uint32_t now = GetCycleCountIRQ(); - int32_t cycleDeltaNextEvent = nextEventCycle - now; - int32_t cyclesLeftTimeout = timeoutCycle - now; - done = (cycleDeltaNextEvent > MINIRQTIME) || (cyclesLeftTimeout < 0); - } while (!done); - } // if (wvfState.waveformEnabled) - - if (wvfState.timer1CB) { - nextEventCycle = earliest(nextEventCycle, GetCycleCountIRQ() + wvfState.timer1CB()); - } - - int32_t nextEventCycles = nextEventCycle - GetCycleCountIRQ(); - - if (nextEventCycles < MINIRQTIME) { - nextEventCycles = MINIRQTIME; - } - nextEventCycles -= DELTAIRQ; - - // Do it here instead of global function to save time and because we know it's edge-IRQ - T1L = nextEventCycles >> (turbo ? 1 : 0); -} - -}; diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index 5b948b9c4..5dcb7f948 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -16,6 +16,9 @@ #define LEDC_MUTEX_UNLOCK() #endif #endif +#ifdef ESP8266 +#include "core_esp8266_waveform.h" +#endif #include "const.h" #include "pin_manager.h" #include "bus_wrapper.h" @@ -466,10 +469,7 @@ BusPwm::BusPwm(BusConfig &bc) for (unsigned i = 0; i < numPins; i++) pins[i] = {(int8_t)bc.pins[i], true}; if (!PinManager::allocateMultiplePins(pins, numPins, PinOwner::BusPwm)) return; -#ifdef ESP8266 - analogWriteRange((1<<_depth)-1); - analogWriteFreq(_frequency); -#else +#ifdef ARDUINO_ARCH_ESP32 // for 2 pin PWM CCT strip pinManager will make sure both LEDC channels are in the same speed group and sharing the same timer _ledcStart = PinManager::allocateLedc(numPins); if (_ledcStart == 255) { //no more free LEDC channels @@ -560,12 +560,23 @@ uint32_t BusPwm::getPixelColor(uint16_t pix) const { void BusPwm::show() { if (!_valid) return; + const unsigned numPins = getPins(); +#ifdef ESP8266 + const unsigned analogPeriod = F_CPU / _frequency; + const unsigned maxBri = analogPeriod; // compute to clock cycle accuracy + constexpr bool dithering = false; + constexpr unsigned bitShift = 7; // 2^7 clocks for dead time +#else // if _needsRefresh is true (UI hack) we are using dithering (credit @dedehai & @zalatnaicsongor) // https://github.com/Aircoookie/WLED/pull/4115 and https://github.com/zalatnaicsongor/WLED/pull/1) const bool dithering = _needsRefresh; // avoid working with bitfield - const unsigned numPins = getPins(); const unsigned maxBri = (1<<_depth); // possible values: 16384 (14), 8192 (13), 4096 (12), 2048 (11), 1024 (10), 512 (9) and 256 (8) - [[maybe_unused]] const unsigned bitShift = dithering * 4; // if dithering, _depth is 12 bit but LEDC channel is set to 8 bit (using 4 fractional bits) + const unsigned bitShift = dithering * 4; // if dithering, _depth is 12 bit but LEDC channel is set to 8 bit (using 4 fractional bits) +#endif + // add dead time between signals (when using dithering, two full 8bit pulses are required) + // this is needed for 2CH, but also adds some slack for ESP8266 which has less precise + // PWM timing. + const int deadTime = (1+dithering) << bitShift; // use CIE brightness formula (cubic) to fit (or approximate linearity of) human eye perceived brightness // the formula is based on 12 bit resolution as there is no need for greater precision @@ -591,19 +602,19 @@ void BusPwm::show() { // also mandatory that both channels use the same timer (pinManager takes care of that). for (unsigned i = 0; i < numPins; i++) { unsigned duty = (_data[i] * pwmBri) / 255; - #ifdef ESP8266 - if (_reversed) duty = maxBri - duty; - analogWrite(_pins[i], duty); - #else - int deadTime = 0; + if (_type == TYPE_ANALOG_2CH && Bus::getCCTBlend() == 0) { - // add dead time between signals (when using dithering, two full 8bit pulses are required) - deadTime = (1+dithering) << bitShift; // we only need to take care of shortening the signal at (almost) full brightness otherwise pulses may overlap - if (_bri >= 254 && duty >= maxBri / 2 && duty < maxBri) duty -= deadTime << 1; // shorten duty of larger signal except if full on - if (_reversed) deadTime = -deadTime; // need to invert dead time to make phaseshift go the opposite way so low signals dont overlap + if (_bri >= 254 && duty >= maxBri / 2 && duty < maxBri) { + duty -= deadTime << 1; // shorten duty of larger signal except if full on + } } if (_reversed) duty = maxBri - duty; + + #ifdef ESP8266 + stopWaveform(_pins[i]); + startWaveformClockCycles(_pins[i], duty, analogPeriod - duty, 0, i ? _pins[0] : -1, hPoint, false); + #else unsigned channel = _ledcStart + i; unsigned gr = channel/8; // high/low speed group unsigned ch = channel%8; // group channel @@ -612,9 +623,10 @@ void BusPwm::show() { LEDC.channel_group[gr].channel[ch].duty.duty = duty << ((!dithering)*4); // lowest 4 bits are used for dithering, shift by 4 bits if not using dithering LEDC.channel_group[gr].channel[ch].hpoint.hpoint = hPoint >> bitShift; // hPoint is at _depth resolution (needs shifting if dithering) ledc_update_duty((ledc_mode_t)gr, (ledc_channel_t)ch); - hPoint += duty + deadTime; // offset to cascade the signals - if (hPoint >= maxBri) hPoint = 0; // offset it out of bounds, reset #endif + + hPoint += duty + (_reversed ? -1 : 1) * deadTime; // offset to cascade the signals + if (hPoint >= maxBri) hPoint -= maxBri; // offset is out of bounds, reset } } From fe4b668107fc5a91a1d66743cefbce185c10b5d2 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sat, 28 Sep 2024 23:12:03 -0400 Subject: [PATCH 2/6] Slightly reduce PWM jankiness --- .../src/core_esp8266_waveform_phase.cpp | 33 +++++++++++++++---- wled00/bus_manager.cpp | 2 +- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp index b846091bf..12a12066c 100644 --- a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp +++ b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp @@ -104,18 +104,20 @@ constexpr int32_t DELTAIRQCCYS = ISCPUFREQ160MHZ ? // for INFINITE, the NMI proceeds on the waveform without expiry deadline. // for EXPIRES, the NMI expires the waveform automatically on the expiry ccy. // for UPDATEEXPIRY, the NMI recomputes the exact expiry ccy and transitions to EXPIRES. +// for UPDATEPHASE, the NMI recomputes the target timings // for INIT, the NMI initializes nextPeriodCcy, and if expiryCcy != 0 includes UPDATEEXPIRY. -enum class WaveformMode : uint8_t {INFINITE = 0, EXPIRES = 1, UPDATEEXPIRY = 2, INIT = 3}; +enum class WaveformMode : uint8_t {INFINITE = 0, EXPIRES = 1, UPDATEEXPIRY = 2, UPDATEPHASE = 3, INIT = 4}; // Waveform generator can create tones, PWM, and servos typedef struct { - uint32_t nextPeriodCcy; // ESP clock cycle when a period begins. If WaveformMode::INIT, temporarily holds positive phase offset ccy count + uint32_t nextPeriodCcy; // ESP clock cycle when a period begins. uint32_t endDutyCcy; // ESP clock cycle when going from duty to off int32_t dutyCcys; // Set next off cycle at low->high to maintain phase int32_t adjDutyCcys; // Temporary correction for next period int32_t periodCcys; // Set next phase cycle at low->high to maintain phase uint32_t expiryCcy; // For time-limited waveform, the CPU clock cycle when this waveform must stop. If WaveformMode::UPDATE, temporarily holds relative ccy count WaveformMode mode; + uint32_t phaseCcy; // positive phase offset ccy count int8_t alignPhase; // < 0 no phase alignment, otherwise starts waveform in relative phase offset to given pin bool autoPwm; // perform PWM duty to idle cycle ratio correction under high load at the expense of precise timings } Waveform; @@ -200,15 +202,15 @@ int startWaveformClockCycles_weak(uint8_t pin, uint32_t highCcys, uint32_t lowCc wave.adjDutyCcys = 0; wave.periodCcys = periodCcys; wave.autoPwm = autoPwm; + wave.alignPhase = (alignPhase < 0) ? -1 : alignPhase; + wave.phaseCcy = phaseOffsetCcys; std::atomic_thread_fence(std::memory_order_acquire); const uint32_t pinBit = 1UL << pin; if (!(waveform.enabled & pinBit)) { // wave.nextPeriodCcy and wave.endDutyCcy are initialized by the ISR - wave.nextPeriodCcy = phaseOffsetCcys; wave.expiryCcy = runTimeCcys; // in WaveformMode::INIT, temporarily hold relative cycle count wave.mode = WaveformMode::INIT; - wave.alignPhase = (alignPhase < 0) ? -1 : alignPhase; if (!wave.dutyCcys) { // If initially at zero duty cycle, force GPIO off if (pin == 16) { @@ -232,11 +234,16 @@ int startWaveformClockCycles_weak(uint8_t pin, uint32_t highCcys, uint32_t lowCc else { wave.mode = WaveformMode::INFINITE; // turn off possible expiry to make update atomic from NMI std::atomic_thread_fence(std::memory_order_release); - wave.expiryCcy = runTimeCcys; // in WaveformMode::UPDATEEXPIRY, temporarily hold relative cycle count if (runTimeCcys) { + wave.expiryCcy = runTimeCcys; // in WaveformMode::UPDATEEXPIRY, temporarily hold relative cycle count wave.mode = WaveformMode::UPDATEEXPIRY; std::atomic_thread_fence(std::memory_order_release); waveform.toSetBits = 1UL << pin; + } else if (alignPhase) { + // @willmmiles new feature + wave.mode = WaveformMode::UPDATEPHASE; // recalculate start + std::atomic_thread_fence(std::memory_order_release); + waveform.toSetBits = 1UL << pin; } } std::atomic_thread_fence(std::memory_order_acq_rel); @@ -292,12 +299,13 @@ static inline IRAM_ATTR int32_t scaleCcys(const int32_t ccys, const bool isCPU2X } static IRAM_ATTR void timer1Interrupt() { + const uint32_t isrStartCcy = ESP.getCycleCount(); + int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; + // ----- @willmmiles begin patch ----- nmiCrashWorkaround(); // ----- @willmmiles end patch ----- - const uint32_t isrStartCcy = ESP.getCycleCount(); - int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; const bool isCPU2X = CPU2X & 1; if ((waveform.toSetBits && !(waveform.enabled & waveform.toSetBits)) || waveform.toDisableBits) { // Handle enable/disable requests from main app. @@ -328,6 +336,15 @@ static IRAM_ATTR void timer1Interrupt() { wave.expiryCcy = wave.nextPeriodCcy + scaleCcys(wave.expiryCcy, isCPU2X); wave.mode = WaveformMode::EXPIRES; break; + // @willmmiles new feature + case WaveformMode::UPDATEPHASE: + // in WaveformMode::UPDATEPHASE, we recalculate the targets without adjusting the state + if (wave.alignPhase >= 0 && waveform.enabled & (1UL << wave.alignPhase)) { + auto& align_wave = waveform.pins[wave.alignPhase]; + // Go back one cycle + wave.nextPeriodCcy = align_wave.nextPeriodCcy - scaleCcys(align_wave.periodCcys, isCPU2X) + scaleCcys(wave.phaseCcy, isCPU2X); + wave.endDutyCcy = wave.nextPeriodCcy + scaleCcys(wave.dutyCcys, isCPU2X); + } default: break; } @@ -355,11 +372,13 @@ static IRAM_ATTR void timer1Interrupt() { Waveform& wave = waveform.pins[pin]; +/* @willmmiles - wtf? We don't want to accumulate drift if (clockDrift) { wave.endDutyCcy += clockDrift; wave.nextPeriodCcy += clockDrift; wave.expiryCcy += clockDrift; } +*/ uint32_t waveNextEventCcy = (waveform.states & pinBit) ? wave.endDutyCcy : wave.nextPeriodCcy; if (WaveformMode::EXPIRES == wave.mode && diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index 5dcb7f948..e631190d4 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -612,7 +612,7 @@ void BusPwm::show() { if (_reversed) duty = maxBri - duty; #ifdef ESP8266 - stopWaveform(_pins[i]); + //stopWaveform(_pins[i]); // can cause the waveform to miss a cycle. instead we risk crossovers. startWaveformClockCycles(_pins[i], duty, analogPeriod - duty, 0, i ? _pins[0] : -1, hPoint, false); #else unsigned channel = _ledcStart + i; From 3c7f83407b64f21fa20262189c215fdd9852a972 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sat, 28 Sep 2024 23:15:20 -0400 Subject: [PATCH 3/6] Save a little RAM --- .../src/core_esp8266_waveform_phase.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp index 12a12066c..60b9b0eb5 100644 --- a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp +++ b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp @@ -117,8 +117,6 @@ typedef struct { int32_t periodCcys; // Set next phase cycle at low->high to maintain phase uint32_t expiryCcy; // For time-limited waveform, the CPU clock cycle when this waveform must stop. If WaveformMode::UPDATE, temporarily holds relative ccy count WaveformMode mode; - uint32_t phaseCcy; // positive phase offset ccy count - int8_t alignPhase; // < 0 no phase alignment, otherwise starts waveform in relative phase offset to given pin bool autoPwm; // perform PWM duty to idle cycle ratio correction under high load at the expense of precise timings } Waveform; @@ -133,6 +131,11 @@ namespace { int32_t toSetBits = 0; // Message to the NMI handler to start/modify exactly one waveform int32_t toDisableBits = 0; // Message to the NMI handler to disable exactly one pin from waveform generation + // toSetBits temporaries + // cheaper than packing them in every Waveform, since we permit only one use at a time + uint32_t phaseCcy; // positive phase offset ccy count + int8_t alignPhase; // < 0 no phase alignment, otherwise starts waveform in relative phase offset to given pin + uint32_t(*timer1CB)() = nullptr; bool timer1Running = false; @@ -202,8 +205,8 @@ int startWaveformClockCycles_weak(uint8_t pin, uint32_t highCcys, uint32_t lowCc wave.adjDutyCcys = 0; wave.periodCcys = periodCcys; wave.autoPwm = autoPwm; - wave.alignPhase = (alignPhase < 0) ? -1 : alignPhase; - wave.phaseCcy = phaseOffsetCcys; + waveform.alignPhase = (alignPhase < 0) ? -1 : alignPhase; + waveform.phaseCcy = phaseOffsetCcys; std::atomic_thread_fence(std::memory_order_acquire); const uint32_t pinBit = 1UL << pin; @@ -320,8 +323,8 @@ static IRAM_ATTR void timer1Interrupt() { switch (wave.mode) { case WaveformMode::INIT: waveform.states &= ~waveform.toSetBits; // Clear the state of any just started - if (wave.alignPhase >= 0 && waveform.enabled & (1UL << wave.alignPhase)) { - wave.nextPeriodCcy = waveform.pins[wave.alignPhase].nextPeriodCcy + wave.nextPeriodCcy; + if (waveform.alignPhase >= 0 && waveform.enabled & (1UL << waveform.alignPhase)) { + wave.nextPeriodCcy = waveform.pins[waveform.alignPhase].nextPeriodCcy + wave.nextPeriodCcy; } else { wave.nextPeriodCcy = waveform.nextEventCcy; @@ -339,10 +342,10 @@ static IRAM_ATTR void timer1Interrupt() { // @willmmiles new feature case WaveformMode::UPDATEPHASE: // in WaveformMode::UPDATEPHASE, we recalculate the targets without adjusting the state - if (wave.alignPhase >= 0 && waveform.enabled & (1UL << wave.alignPhase)) { - auto& align_wave = waveform.pins[wave.alignPhase]; + if (waveform.alignPhase >= 0 && waveform.enabled & (1UL << waveform.alignPhase)) { + auto& align_wave = waveform.pins[waveform.alignPhase]; // Go back one cycle - wave.nextPeriodCcy = align_wave.nextPeriodCcy - scaleCcys(align_wave.periodCcys, isCPU2X) + scaleCcys(wave.phaseCcy, isCPU2X); + wave.nextPeriodCcy = align_wave.nextPeriodCcy - scaleCcys(align_wave.periodCcys, isCPU2X) + scaleCcys(waveform.phaseCcy, isCPU2X); wave.endDutyCcy = wave.nextPeriodCcy + scaleCcys(wave.dutyCcys, isCPU2X); } default: From 59deebc961ec86f602504499ca45fda239a04bd6 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sun, 29 Sep 2024 10:00:27 -0400 Subject: [PATCH 4/6] Improve PWM on ESP8266 - Better phase updates without dropping samples - Make second pin duty cycle always after first, even inverted --- .../src/core_esp8266_waveform_phase.cpp | 20 +++++++++++-------- wled00/bus_manager.cpp | 11 ++++++---- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp index 60b9b0eb5..b89ec8bc1 100644 --- a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp +++ b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp @@ -242,7 +242,7 @@ int startWaveformClockCycles_weak(uint8_t pin, uint32_t highCcys, uint32_t lowCc wave.mode = WaveformMode::UPDATEEXPIRY; std::atomic_thread_fence(std::memory_order_release); waveform.toSetBits = 1UL << pin; - } else if (alignPhase) { + } else if (alignPhase >= 0) { // @willmmiles new feature wave.mode = WaveformMode::UPDATEPHASE; // recalculate start std::atomic_thread_fence(std::memory_order_release); @@ -303,7 +303,7 @@ static inline IRAM_ATTR int32_t scaleCcys(const int32_t ccys, const bool isCPU2X static IRAM_ATTR void timer1Interrupt() { const uint32_t isrStartCcy = ESP.getCycleCount(); - int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; + //int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; // ----- @willmmiles begin patch ----- nmiCrashWorkaround(); @@ -341,12 +341,16 @@ static IRAM_ATTR void timer1Interrupt() { break; // @willmmiles new feature case WaveformMode::UPDATEPHASE: - // in WaveformMode::UPDATEPHASE, we recalculate the targets without adjusting the state + // in WaveformMode::UPDATEPHASE, we recalculate the targets if (waveform.alignPhase >= 0 && waveform.enabled & (1UL << waveform.alignPhase)) { - auto& align_wave = waveform.pins[waveform.alignPhase]; - // Go back one cycle - wave.nextPeriodCcy = align_wave.nextPeriodCcy - scaleCcys(align_wave.periodCcys, isCPU2X) + scaleCcys(waveform.phaseCcy, isCPU2X); - wave.endDutyCcy = wave.nextPeriodCcy + scaleCcys(wave.dutyCcys, isCPU2X); + // Compute phase shift to realign with target + auto& align_wave = waveform.pins[waveform.alignPhase]; + int32_t shift = static_cast(align_wave.nextPeriodCcy + scaleCcys(waveform.phaseCcy, isCPU2X) - wave.nextPeriodCcy); + const int32_t periodCcys = scaleCcys(wave.periodCcys, isCPU2X); + if (shift > periodCcys/2) shift -= periodCcys; + else if (shift <= -periodCcys/2) shift += periodCcys; + wave.nextPeriodCcy += shift; + wave.endDutyCcy += shift; } default: break; @@ -462,7 +466,7 @@ static IRAM_ATTR void timer1Interrupt() { } now = ESP.getCycleCount(); } - clockDrift = 0; + //clockDrift = 0; } int32_t callbackCcys = 0; diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index e631190d4..c3c8a2121 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -565,7 +565,7 @@ void BusPwm::show() { const unsigned analogPeriod = F_CPU / _frequency; const unsigned maxBri = analogPeriod; // compute to clock cycle accuracy constexpr bool dithering = false; - constexpr unsigned bitShift = 7; // 2^7 clocks for dead time + constexpr unsigned bitShift = 8; // 256 clocks for dead time, ~3us at 80MHz #else // if _needsRefresh is true (UI hack) we are using dithering (credit @dedehai & @zalatnaicsongor) // https://github.com/Aircoookie/WLED/pull/4115 and https://github.com/zalatnaicsongor/WLED/pull/1) @@ -609,8 +609,10 @@ void BusPwm::show() { duty -= deadTime << 1; // shorten duty of larger signal except if full on } } - if (_reversed) duty = maxBri - duty; - + if (_reversed) { + if (i) hPoint += duty; // align start at time zero + duty = maxBri - duty; + } #ifdef ESP8266 //stopWaveform(_pins[i]); // can cause the waveform to miss a cycle. instead we risk crossovers. startWaveformClockCycles(_pins[i], duty, analogPeriod - duty, 0, i ? _pins[0] : -1, hPoint, false); @@ -625,7 +627,8 @@ void BusPwm::show() { ledc_update_duty((ledc_mode_t)gr, (ledc_channel_t)ch); #endif - hPoint += duty + (_reversed ? -1 : 1) * deadTime; // offset to cascade the signals + if (!_reversed) hPoint += duty; + hPoint += deadTime; // offset to cascade the signals if (hPoint >= maxBri) hPoint -= maxBri; // offset is out of bounds, reset } } From cb8dae1ddbe750527ad5d4f162a6aa1793748ce3 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sun, 29 Sep 2024 10:13:19 -0400 Subject: [PATCH 5/6] PWM: Revert always apply dead time --- wled00/bus_manager.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index c3c8a2121..cbcfa4b2b 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -573,11 +573,6 @@ void BusPwm::show() { const unsigned maxBri = (1<<_depth); // possible values: 16384 (14), 8192 (13), 4096 (12), 2048 (11), 1024 (10), 512 (9) and 256 (8) const unsigned bitShift = dithering * 4; // if dithering, _depth is 12 bit but LEDC channel is set to 8 bit (using 4 fractional bits) #endif - // add dead time between signals (when using dithering, two full 8bit pulses are required) - // this is needed for 2CH, but also adds some slack for ESP8266 which has less precise - // PWM timing. - const int deadTime = (1+dithering) << bitShift; - // use CIE brightness formula (cubic) to fit (or approximate linearity of) human eye perceived brightness // the formula is based on 12 bit resolution as there is no need for greater precision // see: https://en.wikipedia.org/wiki/Lightness @@ -601,9 +596,12 @@ void BusPwm::show() { // Phase shifting requires that LEDC timers are synchronised (see setup()). For PWM CCT (and H-bridge) it is // also mandatory that both channels use the same timer (pinManager takes care of that). for (unsigned i = 0; i < numPins; i++) { - unsigned duty = (_data[i] * pwmBri) / 255; + unsigned duty = (_data[i] * pwmBri) / 255; + unsigned deadTime = 0; if (_type == TYPE_ANALOG_2CH && Bus::getCCTBlend() == 0) { + // add dead time between signals (when using dithering, two full 8bit pulses are required) + deadTime = (1+dithering) << bitShift; // we only need to take care of shortening the signal at (almost) full brightness otherwise pulses may overlap if (_bri >= 254 && duty >= maxBri / 2 && duty < maxBri) { duty -= deadTime << 1; // shorten duty of larger signal except if full on From d37ee89e845ef83b75b1ad42345f73ad82c71629 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sat, 9 Nov 2024 19:53:47 -0500 Subject: [PATCH 6/6] ESP8266PWM: Fix phase shift glitches In some cases it was possible for the computed phase shift to skip a cycle. Update the shift calculation logic to prevent this. --- .../src/core_esp8266_waveform_phase.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp index b89ec8bc1..68cb9010e 100644 --- a/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp +++ b/lib/ESP8266PWM/src/core_esp8266_waveform_phase.cpp @@ -324,7 +324,7 @@ static IRAM_ATTR void timer1Interrupt() { case WaveformMode::INIT: waveform.states &= ~waveform.toSetBits; // Clear the state of any just started if (waveform.alignPhase >= 0 && waveform.enabled & (1UL << waveform.alignPhase)) { - wave.nextPeriodCcy = waveform.pins[waveform.alignPhase].nextPeriodCcy + wave.nextPeriodCcy; + wave.nextPeriodCcy = waveform.pins[waveform.alignPhase].nextPeriodCcy + scaleCcys(waveform.phaseCcy, isCPU2X); } else { wave.nextPeriodCcy = waveform.nextEventCcy; @@ -342,15 +342,15 @@ static IRAM_ATTR void timer1Interrupt() { // @willmmiles new feature case WaveformMode::UPDATEPHASE: // in WaveformMode::UPDATEPHASE, we recalculate the targets - if (waveform.alignPhase >= 0 && waveform.enabled & (1UL << waveform.alignPhase)) { + if ((waveform.alignPhase >= 0) && (waveform.enabled & (1UL << waveform.alignPhase))) { // Compute phase shift to realign with target - auto& align_wave = waveform.pins[waveform.alignPhase]; - int32_t shift = static_cast(align_wave.nextPeriodCcy + scaleCcys(waveform.phaseCcy, isCPU2X) - wave.nextPeriodCcy); - const int32_t periodCcys = scaleCcys(wave.periodCcys, isCPU2X); - if (shift > periodCcys/2) shift -= periodCcys; - else if (shift <= -periodCcys/2) shift += periodCcys; - wave.nextPeriodCcy += shift; - wave.endDutyCcy += shift; + auto const newPeriodCcy = waveform.pins[waveform.alignPhase].nextPeriodCcy + scaleCcys(waveform.phaseCcy, isCPU2X); + auto const period = scaleCcys(wave.periodCcys, isCPU2X); + auto shift = ((static_cast (newPeriodCcy - wave.nextPeriodCcy) + period/2) % period) - (period/2); + wave.nextPeriodCcy += static_cast(shift); + if (static_cast(wave.endDutyCcy - wave.nextPeriodCcy) > 0) { + wave.endDutyCcy = wave.nextPeriodCcy; + } } default: break;