diff --git a/tasmota/core_esp8266_waveform.cpp b/tasmota/core_esp8266_waveform.cpp index 29a485874..371e9e554 100644 --- a/tasmota/core_esp8266_waveform.cpp +++ b/tasmota/core_esp8266_waveform.cpp @@ -3,25 +3,26 @@ supporting outputs on all pins in parallel. Copyright (c) 2018 Earle F. Philhower, III. All rights reserved. + Copyright (c) 2020 Dirk O. Kaar. The core idea is to have a programmable waveform generator with a unique - high and low period (defined in microseconds or CPU clock cycles). TIMER1 - is set to 1-shot mode and is always loaded with the time until the next - edge of any live waveforms. + high and low period (defined in microseconds or CPU clock cycles). TIMER1 is + set to 1-shot mode and is always loaded with the time until the next edge + of any live waveforms. Up to one waveform generator per pin supported. - Each waveform generator is synchronized to the ESP clock cycle counter, not - the timer. This allows for removing interrupt jitter and delay as the - counter always increments once per 80MHz clock. Changes to a waveform are + Each waveform generator is synchronized to the ESP clock cycle counter, not the + timer. This allows for removing interrupt jitter and delay as the counter + always increments once per 80MHz clock. Changes to a waveform are contiguous and only take effect on the next waveform transition, allowing for smooth transitions. This replaces older tone(), analogWrite(), and the Servo classes. - Everywhere in the code where "cycles" is used, it means ESP.getCycleCount() - clock cycle count, or an interval measured in CPU clock cycles, but not - TIMER1 cycles (which may be 2 CPU clock cycles @ 160MHz). + Everywhere in the code where "ccy" or "ccys" is used, it means ESP.getCycleCount() + clock cycle time, or an interval measured in clock cycles, but not TIMER1 + cycles (which may be 2 CPU clock cycles @ 160MHz). This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -40,576 +41,400 @@ #ifdef ESP8266 +#include "core_esp8266_waveform.h" #include #include "ets_sys.h" -#include "core_esp8266_waveform.h" -#include "user_interface.h" -extern "C" { +#include -// Internal-only calls, not for applications -extern void _setPWMFreq(uint32_t freq); -extern bool _stopPWM(int pin); -extern bool _setPWM(int pin, uint32_t val, uint32_t range); -extern int startWaveformClockCycles(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles); +// Timer is 80MHz fixed. 160MHz CPU frequency need scaling. +constexpr bool ISCPUFREQ160MHZ = clockCyclesPerMicrosecond() == 160; +// Maximum delay between IRQs, Timer1, <= 2^23 / 80MHz +constexpr int32_t MAXIRQTICKSCCYS = microsecondsToClockCycles(10000); +// Maximum servicing time for any single IRQ +constexpr uint32_t ISRTIMEOUTCCYS = microsecondsToClockCycles(18); +// The latency between in-ISR rearming of the timer and the earliest firing +constexpr int32_t IRQLATENCYCCYS = microsecondsToClockCycles(2); +// The SDK and hardware take some time to actually get to our NMI code +constexpr int32_t DELTAIRQCCYS = ISCPUFREQ160MHZ ? + microsecondsToClockCycles(2) >> 1 : microsecondsToClockCycles(2); -// Maximum delay between IRQs -#define MAXIRQUS (10000) +// for INFINITE, the NMI proceeds on the waveform without expiry deadline. +// for EXPIRES, the NMI expires the waveform automatically on the expiry ccy. +// for UPDATEEXPIRY, the NMI recomputes the exact expiry ccy and transitions to EXPIRES. +// for INIT, the NMI initializes nextPeriodCcy, and if expiryCcy != 0 includes UPDATEEXPIRY. +enum class WaveformMode : uint8_t {INFINITE = 0, EXPIRES = 1, UPDATEEXPIRY = 2, INIT = 3}; // Waveform generator can create tones, PWM, and servos typedef struct { - uint32_t nextServiceCycle; // ESP cycle timer when a transition required - uint32_t expiryCycle; // For time-limited waveform, the cycle when this waveform must stop - uint32_t timeHighCycles; // Actual running waveform period (adjusted using desiredCycles) - uint32_t timeLowCycles; // - uint32_t desiredHighCycles; // Ideal waveform period to drive the error signal - uint32_t desiredLowCycles; // - uint32_t lastEdge; // Cycle when this generator last changed + uint32_t nextPeriodCcy; // ESP clock cycle when a period begins. If WaveformMode::INIT, temporarily holds positive phase offset ccy count + uint32_t endDutyCcy; // ESP clock cycle when going from duty to off + int32_t dutyCcys; // Set next off cycle at low->high to maintain phase + int32_t adjDutyCcys; // Temporary correction for next period + int32_t periodCcys; // Set next phase cycle at low->high to maintain phase + uint32_t expiryCcy; // For time-limited waveform, the CPU clock cycle when this waveform must stop. If WaveformMode::UPDATE, temporarily holds relative ccy count + WaveformMode mode; + int8_t alignPhase; // < 0 no phase alignment, otherwise starts waveform in relative phase offset to given pin + bool autoPwm; // perform PWM duty to idle cycle ratio correction under high load at the expense of precise timings } Waveform; -class WVFState { -public: - Waveform waveform[17]; // State of all possible pins - uint32_t waveformState = 0; // Is the pin high or low, updated in NMI so no access outside the NMI code - uint32_t waveformEnabled = 0; // Is it actively running, updated in NMI so no access outside the NMI code +namespace { - // Enable lock-free by only allowing updates to waveformState and waveformEnabled from IRQ service routine - uint32_t waveformToEnable = 0; // Message to the NMI handler to start a waveform on a inactive pin - uint32_t waveformToDisable = 0; // Message to the NMI handler to disable a pin from waveform generation + static struct { + Waveform pins[17]; // State of all possible pins + uint32_t states = 0; // Is the pin high or low, updated in NMI so no access outside the NMI code + uint32_t enabled = 0; // Is it actively running, updated in NMI so no access outside the NMI code - uint32_t waveformToChange = 0; // Mask of pin to change. One bit set in main app, cleared when effected in the NMI - uint32_t waveformNewHigh = 0; - uint32_t waveformNewLow = 0; + // Enable lock-free by only allowing updates to waveform.states and waveform.enabled from IRQ service routine + int32_t toSetBits = 0; // Message to the NMI handler to start/modify exactly one waveform + int32_t toDisableBits = 0; // Message to the NMI handler to disable exactly one pin from waveform generation - uint32_t (*timer1CB)() = NULL; + uint32_t(*timer1CB)() = nullptr; - // Optimize the NMI inner loop by keeping track of the min and max GPIO that we - // are generating. In the common case (1 PWM) these may be the same pin and - // we can avoid looking at the other pins. - uint16_t startPin = 0; - uint16_t endPin = 0; -}; -static WVFState wvfState; + bool timer1Running = false; + uint32_t nextEventCcy; + } waveform; -// Ensure everything is read/written to RAM -#define MEMBARRIER() { __asm__ volatile("" ::: "memory"); } +} + +// Interrupt on/off control +static ICACHE_RAM_ATTR void timer1Interrupt(); // Non-speed critical bits #pragma GCC optimize ("Os") -// Interrupt on/off control -static ICACHE_RAM_ATTR void timer1Interrupt(); -static bool timerRunning = false; - -static __attribute__((noinline)) void initTimer() { - if (!timerRunning) { - timer1_disable(); - ETS_FRC_TIMER1_INTR_ATTACH(NULL, NULL); - ETS_FRC_TIMER1_NMI_INTR_ATTACH(timer1Interrupt); - timer1_enable(TIM_DIV1, TIM_EDGE, TIM_SINGLE); - timerRunning = true; - timer1_write(microsecondsToClockCycles(10)); - } +static void initTimer() { + timer1_disable(); + ETS_FRC_TIMER1_INTR_ATTACH(NULL, NULL); + ETS_FRC_TIMER1_NMI_INTR_ATTACH(timer1Interrupt); + timer1_enable(TIM_DIV1, TIM_EDGE, TIM_SINGLE); + waveform.timer1Running = true; + timer1_write(IRQLATENCYCCYS); // Cause an interrupt post-haste } -static ICACHE_RAM_ATTR void forceTimerInterrupt() { - if (T1L > microsecondsToClockCycles(10)) { - T1L = microsecondsToClockCycles(10); - } +static void ICACHE_RAM_ATTR deinitTimer() { + ETS_FRC_TIMER1_NMI_INTR_ATTACH(NULL); + timer1_disable(); + timer1_isr_init(); + waveform.timer1Running = false; } -// PWM implementation using special purpose state machine -// -// Keep an ordered list of pins with the delta in cycles between each -// element, with a terminal entry making up the remainder of the PWM -// period. With this method sum(all deltas) == PWM period clock cycles. -// -// At t=0 set all pins high and set the timeout for the 1st edge. -// On interrupt, if we're at the last element reset to t=0 state -// Otherwise, clear that pin down and set delay for next element -// and so forth. +extern "C" { -constexpr int maxPWMs = 8; - -// PWM machine state -typedef struct PWMState { - uint32_t mask = 0; // Bitmask of active pins - uint32_t cnt = 0; // How many entries - uint32_t idx = 0; // Where the state machine is along the list - uint8_t pin[maxPWMs + 1]; - uint32_t delta[maxPWMs + 1]; - uint32_t nextServiceCycle; // Clock cycle for next step - struct PWMState *pwmUpdate; // Set by main code, cleared by ISR -} PWMState; - -static PWMState pwmState; -static uint32_t _pwmPeriod = microsecondsToClockCycles(1000000UL) / 1000; - - -// If there are no more scheduled activities, shut down Timer 1. -// Otherwise, do nothing. -static ICACHE_RAM_ATTR void disableIdleTimer() { - if (timerRunning && !wvfState.waveformEnabled && !pwmState.cnt && !wvfState.timer1CB) { - ETS_FRC_TIMER1_NMI_INTR_ATTACH(NULL); - timer1_disable(); - timer1_isr_init(); - timerRunning = false; - } -} - -// Notify the NMI that a new PWM state is available through the mailbox. -// Wait for mailbox to be emptied (either busy or delay() as needed) -static ICACHE_RAM_ATTR void _notifyPWM(PWMState *p, bool idle) { - p->pwmUpdate = nullptr; - pwmState.pwmUpdate = p; - MEMBARRIER(); - forceTimerInterrupt(); - while (pwmState.pwmUpdate) { - if (idle) { - delay(0); - } - MEMBARRIER(); - } -} - -static void _addPWMtoList(PWMState &p, int pin, uint32_t val, uint32_t range); - -// Called when analogWriteFreq() changed to update the PWM total period -void _setPWMFreq(uint32_t freq) { - // Convert frequency into clock cycles - uint32_t cc = microsecondsToClockCycles(1000000UL) / freq; - - // Simple static adjustment to bring period closer to requested due to overhead -#if F_CPU == 80000000 - cc -= microsecondsToClockCycles(2); -#else - cc -= microsecondsToClockCycles(1); -#endif - - if (cc == _pwmPeriod) { - return; // No change - } - - _pwmPeriod = cc; - - if (pwmState.cnt) { - PWMState p; // The working copy since we can't edit the one in use - p.cnt = 0; - for (uint32_t i = 0; i < pwmState.cnt; i++) { - auto pin = pwmState.pin[i]; - _addPWMtoList(p, pin, wvfState.waveform[pin].desiredHighCycles, wvfState.waveform[pin].desiredLowCycles); - } - // Update and wait for mailbox to be emptied +// Set a callback. Pass in NULL to stop it +void setTimer1Callback(uint32_t (*fn)()) { + waveform.timer1CB = fn; + std::atomic_thread_fence(std::memory_order_acq_rel); + if (!waveform.timer1Running && fn) { initTimer(); - _notifyPWM(&p, true); - disableIdleTimer(); + } else if (waveform.timer1Running && !fn && !waveform.enabled) { + deinitTimer(); } } -// Helper routine to remove an entry from the state machine -// and clean up any marked-off entries -static void _cleanAndRemovePWM(PWMState *p, int pin) { - uint32_t leftover = 0; - uint32_t in, out; - for (in = 0, out = 0; in < p->cnt; in++) { - if ((p->pin[in] != pin) && (p->mask & (1<pin[in]))) { - p->pin[out] = p->pin[in]; - p->delta[out] = p->delta[in] + leftover; - leftover = 0; - out++; - } else { - leftover += p->delta[in]; - p->mask &= ~(1<pin[in]); - } - } - p->cnt = out; - // Final pin is never used: p->pin[out] = 0xff; - p->delta[out] = p->delta[in] + leftover; -} - - -// Disable PWM on a specific pin (i.e. when a digitalWrite or analogWrite(0%/100%)) -ICACHE_RAM_ATTR bool _stopPWM(int pin) { - if (!((1<= _pwmPeriod) { - _stopPWM(pin); - digitalWrite(pin, HIGH); - return; - } - - if (p.cnt == 0) { - // Starting up from scratch, special case 1st element and PWM period - p.pin[0] = pin; - p.delta[0] = cc; - // Final pin is never used: p.pin[1] = 0xff; - p.delta[1] = _pwmPeriod - cc; - } else { - uint32_t ttl = 0; - uint32_t i; - // Skip along until we're at the spot to insert - for (i=0; (i <= p.cnt) && (ttl + p.delta[i] < cc); i++) { - ttl += p.delta[i]; - } - // Shift everything out by one to make space for new edge - for (int32_t j = p.cnt; j >= (int)i; j--) { - p.pin[j + 1] = p.pin[j]; - p.delta[j + 1] = p.delta[j]; - } - int off = cc - ttl; // The delta from the last edge to the one we're inserting - p.pin[i] = pin; - p.delta[i] = off; // Add the delta to this new pin - p.delta[i + 1] -= off; // And subtract it from the follower to keep sum(deltas) constant - } - p.cnt++; - p.mask |= 1<= maxPWMs) { - return false; // No space left - } - - _addPWMtoList(p, pin, val, range); - - // Set mailbox and wait for ISR to copy it over - initTimer(); - _notifyPWM(&p, true); - disableIdleTimer(); - return true; +int startWaveform(uint8_t pin, uint32_t highUS, uint32_t lowUS, + uint32_t runTimeUS, int8_t alignPhase, uint32_t phaseOffsetUS, bool autoPwm) { + return startWaveformClockCycles(pin, + microsecondsToClockCycles(highUS), microsecondsToClockCycles(lowUS), + microsecondsToClockCycles(runTimeUS), alignPhase, microsecondsToClockCycles(phaseOffsetUS), autoPwm); } // Start up a waveform on a pin, or change the current one. Will change to the new // waveform smoothly on next low->high transition. For immediate change, stopWaveform() // first, then it will immediately begin. -int startWaveform(uint8_t pin, uint32_t timeHighUS, uint32_t timeLowUS, uint32_t runTimeUS) { - return startWaveformClockCycles(pin, microsecondsToClockCycles(timeHighUS), microsecondsToClockCycles(timeLowUS), microsecondsToClockCycles(runTimeUS)); -} - -int startWaveformClockCycles(uint8_t pin, uint32_t timeHighCycles, uint32_t timeLowCycles, uint32_t runTimeCycles) { - if ((pin > 16) || isFlashInterfacePin(pin)) { +int startWaveformClockCycles(uint8_t pin, uint32_t highCcys, uint32_t lowCcys, + uint32_t runTimeCcys, int8_t alignPhase, uint32_t phaseOffsetCcys, bool autoPwm) { + uint32_t periodCcys = highCcys + lowCcys; + if (periodCcys < MAXIRQTICKSCCYS) { + if (!highCcys) { + periodCcys = (MAXIRQTICKSCCYS / periodCcys) * periodCcys; + } + else if (!lowCcys) { + highCcys = periodCcys = (MAXIRQTICKSCCYS / periodCcys) * periodCcys; + } + } + // sanity checks, including mixed signed/unsigned arithmetic safety + if ((pin > 16) || isFlashInterfacePin(pin) || (alignPhase > 16) || + static_cast(periodCcys) <= 0 || + static_cast(highCcys) < 0 || static_cast(lowCcys) < 0) { return false; } - Waveform *wave = &wvfState.waveform[pin]; - wave->expiryCycle = runTimeCycles ? ESP.getCycleCount() + runTimeCycles : 0; - if (runTimeCycles && !wave->expiryCycle) { - wave->expiryCycle = 1; // expiryCycle==0 means no timeout, so avoid setting it - } + Waveform& wave = waveform.pins[pin]; + wave.dutyCcys = highCcys; + wave.adjDutyCcys = 0; + wave.periodCcys = periodCcys; + wave.autoPwm = autoPwm; - _stopPWM(pin); // Make sure there's no PWM live here - - uint32_t mask = 1<timeHighCycles = timeHighCycles; - wave->desiredHighCycles = timeHighCycles; - wave->timeLowCycles = timeLowCycles; - wave->desiredLowCycles = timeLowCycles; - wave->lastEdge = 0; - wave->nextServiceCycle = ESP.getCycleCount() + microsecondsToClockCycles(1); - wvfState.waveformToEnable |= mask; - MEMBARRIER(); - initTimer(); - forceTimerInterrupt(); - while (wvfState.waveformToEnable) { - delay(0); // Wait for waveform to update - // No mem barrier here, the call to a global function implies global state updated + std::atomic_thread_fence(std::memory_order_release); + waveform.toSetBits = 1UL << pin; + std::atomic_thread_fence(std::memory_order_release); + if (!waveform.timer1Running) { + initTimer(); + } + else if (T1V > IRQLATENCYCCYS) { + // Must not interfere if Timer is due shortly + timer1_write(IRQLATENCYCCYS); } } - + else { + wave.mode = WaveformMode::INFINITE; // turn off possible expiry to make update atomic from NMI + std::atomic_thread_fence(std::memory_order_release); + wave.expiryCcy = runTimeCcys; // in WaveformMode::UPDATEEXPIRY, temporarily hold relative cycle count + if (runTimeCcys) { + wave.mode = WaveformMode::UPDATEEXPIRY; + std::atomic_thread_fence(std::memory_order_release); + waveform.toSetBits = 1UL << pin; + } + } + std::atomic_thread_fence(std::memory_order_acq_rel); + while (waveform.toSetBits) { + delay(0); // Wait for waveform to update + std::atomic_thread_fence(std::memory_order_acquire); + } return true; } - -// Set a callback. Pass in NULL to stop it -void setTimer1Callback(uint32_t (*fn)()) { - wvfState.timer1CB = fn; - if (fn) { - initTimer(); - forceTimerInterrupt(); - } - disableIdleTimer(); -} - - -// Speed critical bits -#pragma GCC optimize ("O2") - -// Normally would not want two copies like this, but due to different -// optimization levels the inline attribute gets lost if we try the -// other version. -static inline ICACHE_RAM_ATTR uint32_t GetCycleCountIRQ() { - uint32_t ccount; - __asm__ __volatile__("rsr %0,ccount":"=a"(ccount)); - return ccount; -} - -static inline ICACHE_RAM_ATTR uint32_t min_u32(uint32_t a, uint32_t b) { - if (a < b) { - return a; - } - return b; -} - // Stops a waveform on a pin int ICACHE_RAM_ATTR stopWaveform(uint8_t pin) { // Can't possibly need to stop anything if there is no timer active - if (!timerRunning) { + if (!waveform.timer1Running) { return false; } // If user sends in a pin >16 but <32, this will always point to a 0 bit // If they send >=32, then the shift will result in 0 and it will also return false - uint32_t mask = 1< IRQLATENCYCCYS) { + timer1_write(IRQLATENCYCCYS); } - forceTimerInterrupt(); - while (wvfState.waveformToDisable) { - MEMBARRIER(); // If it wasn't written yet, it has to be by now + while (waveform.toDisableBits) { /* no-op */ // Can't delay() since stopWaveform may be called from an IRQ + std::atomic_thread_fence(std::memory_order_acquire); } } - disableIdleTimer(); + if (!waveform.enabled && !waveform.timer1CB) { + deinitTimer(); + } return true; } -// The SDK and hardware take some time to actually get to our NMI code, so -// decrement the next IRQ's timer value by a bit so we can actually catch the -// real CPU cycle counter we want for the waveforms. - -// The SDK also sometimes is running at a different speed the the Arduino core -// so the ESP cycle counter is actually running at a variable speed. -// adjust(x) takes care of adjusting a delta clock cycle amount accordingly. -#if F_CPU == 80000000 - #define DELTAIRQ (microsecondsToClockCycles(3)) - #define adjust(x) ((x) << (turbo ? 1 : 0)) -#else - #define DELTAIRQ (microsecondsToClockCycles(2)) - #define adjust(x) ((x) >> (turbo ? 0 : 1)) -#endif - - -static ICACHE_RAM_ATTR void timer1Interrupt() { - // Flag if the core is at 160 MHz, for use by adjust() - bool turbo = (*(uint32_t*)0x3FF00014) & 1 ? true : false; - - uint32_t nextEventCycles = microsecondsToClockCycles(MAXIRQUS); - uint32_t timeoutCycle = GetCycleCountIRQ() + microsecondsToClockCycles(14); - - if (wvfState.waveformToEnable || wvfState.waveformToDisable) { - // Handle enable/disable requests from main app - wvfState.waveformEnabled = (wvfState.waveformEnabled & ~wvfState.waveformToDisable) | wvfState.waveformToEnable; // Set the requested waveforms on/off - wvfState.waveformState &= ~wvfState.waveformToEnable; // And clear the state of any just started - wvfState.waveformToEnable = 0; - wvfState.waveformToDisable = 0; - // No mem barrier. Globals must be written to RAM on ISR exit. - // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) - wvfState.startPin = __builtin_ffs(wvfState.waveformEnabled) - 1; - // Find the last bit by subtracting off GCC's count-leading-zeros (no offset in this one) - wvfState.endPin = 32 - __builtin_clz(wvfState.waveformEnabled); - } else if (!pwmState.cnt && pwmState.pwmUpdate) { - // Start up the PWM generator by copying from the mailbox - pwmState.cnt = 1; - pwmState.idx = 1; // Ensure copy this cycle, cause it to start at t=0 - pwmState.nextServiceCycle = GetCycleCountIRQ(); // Do it this loop! - // No need for mem barrier here. Global must be written by IRQ exit - } - - bool done = false; - if (wvfState.waveformEnabled || pwmState.cnt) { - do { - nextEventCycles = microsecondsToClockCycles(MAXIRQUS); - - // PWM state machine implementation - if (pwmState.cnt) { - int32_t cyclesToGo = pwmState.nextServiceCycle - GetCycleCountIRQ(); - if (cyclesToGo < 0) { - if (pwmState.idx == pwmState.cnt) { // Start of pulses, possibly copy new - if (pwmState.pwmUpdate) { - // Do the memory copy from temp to global and clear mailbox - pwmState = *(PWMState*)pwmState.pwmUpdate; - } - GPOS = pwmState.mask; // Set all active pins high - if (pwmState.mask & (1<<16)) { - GP16O = 1; - } - pwmState.idx = 0; - } else { - do { - // Drop the pin at this edge - if (pwmState.mask & (1<expiryCycle) { - int32_t expiryToGo = wave->expiryCycle - now; - if (expiryToGo < 0) { - // Done, remove! - if (i == 16) { - GP16O = 0; - } - GPOC = mask; - wvfState.waveformEnabled &= ~mask; - continue; - } - } - - // Check for toggles - int32_t cyclesToGo = wave->nextServiceCycle - now; - if (cyclesToGo < 0) { - uint32_t nextEdgeCycles; - uint32_t desired = 0; - uint32_t *timeToUpdate; - wvfState.waveformState ^= mask; - if (wvfState.waveformState & mask) { - if (i == 16) { - GP16O = 1; - } - GPOS = mask; - - if (wvfState.waveformToChange & mask) { - // Copy over next full-cycle timings - wave->timeHighCycles = wvfState.waveformNewHigh; - wave->desiredHighCycles = wvfState.waveformNewHigh; - wave->timeLowCycles = wvfState.waveformNewLow; - wave->desiredLowCycles = wvfState.waveformNewLow; - wave->lastEdge = 0; - wvfState.waveformToChange = 0; - } - if (wave->lastEdge) { - desired = wave->desiredLowCycles; - timeToUpdate = &wave->timeLowCycles; - } - nextEdgeCycles = wave->timeHighCycles; - } else { - if (i == 16) { - GP16O = 0; - } - GPOC = mask; - desired = wave->desiredHighCycles; - timeToUpdate = &wave->timeHighCycles; - nextEdgeCycles = wave->timeLowCycles; - } - if (desired) { - desired = adjust(desired); - int32_t err = desired - (now - wave->lastEdge); - if (abs(err) < desired) { // If we've lost > the entire phase, ignore this error signal - err /= 2; - *timeToUpdate += err; - } - } - nextEdgeCycles = adjust(nextEdgeCycles); - wave->nextServiceCycle = now + nextEdgeCycles; - nextEventCycles = min_u32(nextEventCycles, nextEdgeCycles); - wave->lastEdge = now; - } else { - uint32_t deltaCycles = wave->nextServiceCycle - now; - nextEventCycles = min_u32(nextEventCycles, deltaCycles); - } - } - - // Exit the loop if we've hit the fixed runtime limit or the next event is known to be after that timeout would occur - uint32_t now = GetCycleCountIRQ(); - int32_t cycleDeltaNextEvent = timeoutCycle - (now + nextEventCycles); - int32_t cyclesLeftTimeout = timeoutCycle - now; - done = (cycleDeltaNextEvent < 0) || (cyclesLeftTimeout < 0); - } while (!done); - } // if (wvfState.waveformEnabled) - - if (wvfState.timer1CB) { - nextEventCycles = min_u32(nextEventCycles, wvfState.timer1CB()); - } - - if (nextEventCycles < microsecondsToClockCycles(5)) { - nextEventCycles = microsecondsToClockCycles(5); - } - nextEventCycles -= DELTAIRQ; - - // Do it here instead of global function to save time and because we know it's edge-IRQ - T1L = nextEventCycles >> (turbo ? 1 : 0); -} - }; -#endif // ESP8266 \ No newline at end of file +// Speed critical bits +#pragma GCC optimize ("O2") + +// For dynamic CPU clock frequency switch in loop the scaling logic would have to be adapted. +// Using constexpr makes sure that the CPU clock frequency is compile-time fixed. +static inline ICACHE_RAM_ATTR int32_t scaleCcys(const int32_t ccys, const bool isCPU2X) { + if (ISCPUFREQ160MHZ) { + return isCPU2X ? ccys : (ccys >> 1); + } + else { + return isCPU2X ? (ccys << 1) : ccys; + } +} + +static ICACHE_RAM_ATTR void timer1Interrupt() { + const uint32_t isrStartCcy = ESP.getCycleCount(); + int32_t clockDrift = isrStartCcy - waveform.nextEventCcy; + const bool isCPU2X = CPU2X & 1; + if ((waveform.toSetBits && !(waveform.enabled & waveform.toSetBits)) || waveform.toDisableBits) { + // Handle enable/disable requests from main app. + waveform.enabled = (waveform.enabled & ~waveform.toDisableBits) | waveform.toSetBits; // Set the requested waveforms on/off + // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) + waveform.toDisableBits = 0; + } + + if (waveform.toSetBits) { + const int toSetPin = __builtin_ffs(waveform.toSetBits) - 1; + Waveform& wave = waveform.pins[toSetPin]; + switch (wave.mode) { + case WaveformMode::INIT: + waveform.states &= ~waveform.toSetBits; // Clear the state of any just started + if (wave.alignPhase >= 0 && waveform.enabled & (1UL << wave.alignPhase)) { + wave.nextPeriodCcy = waveform.pins[wave.alignPhase].nextPeriodCcy + wave.nextPeriodCcy; + } + else { + wave.nextPeriodCcy = waveform.nextEventCcy; + } + if (!wave.expiryCcy) { + wave.mode = WaveformMode::INFINITE; + break; + } + // fall through + case WaveformMode::UPDATEEXPIRY: + // in WaveformMode::UPDATEEXPIRY, expiryCcy temporarily holds relative CPU cycle count + wave.expiryCcy = wave.nextPeriodCcy + scaleCcys(wave.expiryCcy, isCPU2X); + wave.mode = WaveformMode::EXPIRES; + break; + default: + break; + } + waveform.toSetBits = 0; + } + + // Exit the loop if the next event, if any, is sufficiently distant. + const uint32_t isrTimeoutCcy = isrStartCcy + ISRTIMEOUTCCYS; + uint32_t busyPins = waveform.enabled; + waveform.nextEventCcy = isrStartCcy + MAXIRQTICKSCCYS; + + uint32_t now = ESP.getCycleCount(); + uint32_t isrNextEventCcy = now; + while (busyPins) { + if (static_cast(isrNextEventCcy - now) > IRQLATENCYCCYS) { + waveform.nextEventCcy = isrNextEventCcy; + break; + } + isrNextEventCcy = waveform.nextEventCcy; + uint32_t loopPins = busyPins; + while (loopPins) { + const int pin = __builtin_ffsl(loopPins) - 1; + const uint32_t pinBit = 1UL << pin; + loopPins ^= pinBit; + + Waveform& wave = waveform.pins[pin]; + + if (clockDrift) { + wave.endDutyCcy += clockDrift; + wave.nextPeriodCcy += clockDrift; + wave.expiryCcy += clockDrift; + } + + uint32_t waveNextEventCcy = (waveform.states & pinBit) ? wave.endDutyCcy : wave.nextPeriodCcy; + if (WaveformMode::EXPIRES == wave.mode && + static_cast(waveNextEventCcy - wave.expiryCcy) >= 0 && + static_cast(now - wave.expiryCcy) >= 0) { + // Disable any waveforms that are done + waveform.enabled ^= pinBit; + busyPins ^= pinBit; + } + else { + const int32_t overshootCcys = now - waveNextEventCcy; + if (overshootCcys >= 0) { + const int32_t periodCcys = scaleCcys(wave.periodCcys, isCPU2X); + if (waveform.states & pinBit) { + // active configuration and forward are 100% duty + if (wave.periodCcys == wave.dutyCcys) { + wave.nextPeriodCcy += periodCcys; + wave.endDutyCcy = wave.nextPeriodCcy; + } + else { + if (wave.autoPwm) { + wave.adjDutyCcys += overshootCcys; + } + waveform.states ^= pinBit; + if (16 == pin) { + GP16O = 0; + } + else { + GPOC = pinBit; + } + } + waveNextEventCcy = wave.nextPeriodCcy; + } + else { + wave.nextPeriodCcy += periodCcys; + if (!wave.dutyCcys) { + wave.endDutyCcy = wave.nextPeriodCcy; + } + else { + int32_t dutyCcys = scaleCcys(wave.dutyCcys, isCPU2X); + if (dutyCcys <= wave.adjDutyCcys) { + dutyCcys >>= 1; + wave.adjDutyCcys -= dutyCcys; + } + else if (wave.adjDutyCcys) { + dutyCcys -= wave.adjDutyCcys; + wave.adjDutyCcys = 0; + } + wave.endDutyCcy = now + dutyCcys; + if (static_cast(wave.endDutyCcy - wave.nextPeriodCcy) > 0) { + wave.endDutyCcy = wave.nextPeriodCcy; + } + waveform.states |= pinBit; + if (16 == pin) { + GP16O = 1; + } + else { + GPOS = pinBit; + } + } + waveNextEventCcy = wave.endDutyCcy; + } + + if (WaveformMode::EXPIRES == wave.mode && static_cast(waveNextEventCcy - wave.expiryCcy) > 0) { + waveNextEventCcy = wave.expiryCcy; + } + } + + if (static_cast(waveNextEventCcy - isrTimeoutCcy) >= 0) { + busyPins ^= pinBit; + if (static_cast(waveform.nextEventCcy - waveNextEventCcy) > 0) { + waveform.nextEventCcy = waveNextEventCcy; + } + } + else if (static_cast(isrNextEventCcy - waveNextEventCcy) > 0) { + isrNextEventCcy = waveNextEventCcy; + } + } + now = ESP.getCycleCount(); + } + clockDrift = 0; + } + + int32_t callbackCcys = 0; + if (waveform.timer1CB) { + callbackCcys = scaleCcys(microsecondsToClockCycles(waveform.timer1CB()), isCPU2X); + } + now = ESP.getCycleCount(); + int32_t nextEventCcys = waveform.nextEventCcy - now; + // Account for unknown duration of timer1CB(). + if (waveform.timer1CB && nextEventCcys > callbackCcys) { + waveform.nextEventCcy = now + callbackCcys; + nextEventCcys = callbackCcys; + } + + // Timer is 80MHz fixed. 160MHz CPU frequency need scaling. + int32_t deltaIrqCcys = DELTAIRQCCYS; + int32_t irqLatencyCcys = IRQLATENCYCCYS; + if (isCPU2X) { + nextEventCcys >>= 1; + deltaIrqCcys >>= 1; + irqLatencyCcys >>= 1; + } + + // Firing timer too soon, the NMI occurs before ISR has returned. + if (nextEventCcys < irqLatencyCcys + deltaIrqCcys) { + waveform.nextEventCcy = now + IRQLATENCYCCYS + DELTAIRQCCYS; + nextEventCcys = irqLatencyCcys; + } + else { + nextEventCcys -= deltaIrqCcys; + } + + // Register access is fast and edge IRQ was configured before. + T1L = nextEventCcys; +} + +#endif // ESP8266 diff --git a/tasmota/core_esp8266_waveform.h b/tasmota/core_esp8266_waveform.h new file mode 100644 index 000000000..ff5a0f56f --- /dev/null +++ b/tasmota/core_esp8266_waveform.h @@ -0,0 +1,93 @@ +/* + esp8266_waveform - General purpose waveform generation and control, + supporting outputs on all pins in parallel. + + Copyright (c) 2018 Earle F. Philhower, III. All rights reserved. + Copyright (c) 2020 Dirk O. Kaar. + + The core idea is to have a programmable waveform generator with a unique + high and low period (defined in microseconds or CPU clock cycles). TIMER1 is + set to 1-shot mode and is always loaded with the time until the next edge + of any live waveforms. + + Up to one waveform generator per pin supported. + + Each waveform generator is synchronized to the ESP clock cycle counter, not the + timer. This allows for removing interrupt jitter and delay as the counter + always increments once per 80MHz clock. Changes to a waveform are + contiguous and only take effect on the next waveform transition, + allowing for smooth transitions. + + This replaces older tone(), analogWrite(), and the Servo classes. + + Everywhere in the code where "ccy" or "ccys" is used, it means ESP.getCycleCount() + clock cycle count, or an interval measured in CPU clock cycles, but not TIMER1 + cycles (which may be 2 CPU clock cycles @ 160MHz). + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#ifdef ESP8266 + +#include + +#ifndef __ESP8266_WAVEFORM_H +#define __ESP8266_WAVEFORM_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Start or change a waveform of the specified high and low times on specific pin. +// If runtimeUS > 0 then automatically stop it after that many usecs, relative to the next +// full period. +// If waveform is not yet started on pin, and on pin == alignPhase a waveform is running, +// the new waveform is started at phaseOffsetUS phase offset, in microseconds, to that. +// Setting autoPwm to true allows the wave generator to maintain PWM duty to idle cycle ratio +// under load, for applications where frequency or duty cycle must not change, leave false. +// Returns true or false on success or failure. +int startWaveform(uint8_t pin, uint32_t timeHighUS, uint32_t timeLowUS, + uint32_t runTimeUS = 0, int8_t alignPhase = -1, uint32_t phaseOffsetUS = 0, bool autoPwm = false); +// Start or change a waveform of the specified high and low CPU clock cycles on specific pin. +// If runtimeCycles > 0 then automatically stop it after that many CPU clock cycles, relative to the next +// full period. +// If waveform is not yet started on pin, and on pin == alignPhase a waveform is running, +// the new waveform is started at phaseOffsetCcys phase offset, in CPU clock cycles, to that. +// Setting autoPwm to true allows the wave generator to maintain PWM duty to idle cycle ratio +// under load, for applications where frequency or duty cycle must not change, leave false. +// Returns true or false on success or failure. +int startWaveformClockCycles(uint8_t pin, uint32_t timeHighCcys, uint32_t timeLowCcys, + uint32_t runTimeCcys = 0, int8_t alignPhase = -1, uint32_t phaseOffsetCcys = 0, bool autoPwm = false); +// Stop a waveform, if any, on the specified pin. +// Returns true or false on success or failure. +int stopWaveform(uint8_t pin); + +// Add a callback function to be called on *EVERY* timer1 trigger. The +// callback returns the number of microseconds until the next desired call. +// However, since it is called every timer1 interrupt, it may be called +// again before this period. It should therefore use the ESP Cycle Counter +// to determine whether or not to perform an operation. +// Pass in NULL to disable the callback and, if no other waveforms being +// generated, stop the timer as well. +// Make sure the CB function has the ICACHE_RAM_ATTR decorator. +void setTimer1Callback(uint32_t (*fn)()); + +#ifdef __cplusplus +} +#endif + +#endif + +#endif // ESP8266 diff --git a/tasmota/core_esp8266_wiring_digital.cpp b/tasmota/core_esp8266_wiring_digital.cpp index 7fb3ccb00..982e1c6bf 100644 --- a/tasmota/core_esp8266_wiring_digital.cpp +++ b/tasmota/core_esp8266_wiring_digital.cpp @@ -33,12 +33,6 @@ extern "C" { -// Internal-only calls, not for applications -extern void _setPWMFreq(uint32_t freq); -extern bool _stopPWM(int pin); -extern bool _setPWM(int pin, uint32_t val, uint32_t range); -extern void resetPins(); - volatile uint32_t* const esp8266_gpioToFn[16] PROGMEM = { &GPF0, &GPF1, &GPF2, &GPF3, &GPF4, &GPF5, &GPF6, &GPF7, &GPF8, &GPF9, &GPF10, &GPF11, &GPF12, &GPF13, &GPF14, &GPF15 }; extern void __pinMode(uint8_t pin, uint8_t mode) { @@ -91,8 +85,7 @@ extern void __pinMode(uint8_t pin, uint8_t mode) { } extern void ICACHE_RAM_ATTR __digitalWrite(uint8_t pin, uint8_t val) { - stopWaveform(pin); // Disable any tone - _stopPWM(pin); // ...and any analogWrite + stopWaveform(pin); if(pin < 16){ if(val) GPOS = (1 << pin); else GPOC = (1 << pin); @@ -140,8 +133,10 @@ typedef struct { static interrupt_handler_t interrupt_handlers[16] = { {0, 0, 0, 0}, }; static uint32_t interrupt_reg = 0; -void ICACHE_RAM_ATTR interrupt_handler(void*) +void ICACHE_RAM_ATTR interrupt_handler(void *arg, void *frame) { + (void) arg; + (void) frame; uint32_t status = GPIE; GPIEC = status;//clear them interrupts uint32_t levels = GPI; @@ -153,8 +148,8 @@ void ICACHE_RAM_ATTR interrupt_handler(void*) while(!(changedbits & (1 << i))) i++; changedbits &= ~(1 << i); interrupt_handler_t *handler = &interrupt_handlers[i]; - if (handler->fn && - (handler->mode == CHANGE || + if (handler->fn && + (handler->mode == CHANGE || (handler->mode & 1) == !!(levels & (1 << i)))) { // to make ISR compatible to Arduino AVR model where interrupts are disabled // we disable them before we call the client ISR @@ -271,4 +266,4 @@ extern void detachInterrupt(uint8_t pin) __attribute__ ((weak, alias("__detachIn }; -#endif // ESP8266 \ No newline at end of file +#endif // ESP8266 diff --git a/tasmota/core_esp8266_wiring_pwm.cpp b/tasmota/core_esp8266_wiring_pwm.cpp index 5189fa8f5..f74334e43 100644 --- a/tasmota/core_esp8266_wiring_pwm.cpp +++ b/tasmota/core_esp8266_wiring_pwm.cpp @@ -28,12 +28,9 @@ extern "C" { -// Internal-only calls, not for applications -extern void _setPWMFreq(uint32_t freq); -extern bool _stopPWM(int pin); -extern bool _setPWM(int pin, uint32_t val, uint32_t range); - +static uint32_t analogMap = 0; static int32_t analogScale = PWMRANGE; +static uint16_t analogFreq = 1000; extern void __analogWriteRange(uint32_t range) { if (range > 0) { @@ -43,28 +40,38 @@ extern void __analogWriteRange(uint32_t range) { extern void __analogWriteFreq(uint32_t freq) { if (freq < 40) { - freq = 40; + analogFreq = 40; } else if (freq > 60000) { - freq = 60000; + analogFreq = 60000; } else { - freq = freq; + analogFreq = freq; } - _setPWMFreq(freq); } extern void __analogWrite(uint8_t pin, int val) { if (pin > 16) { return; } - + uint32_t analogPeriod = microsecondsToClockCycles(1000000UL) / analogFreq; if (val < 0) { val = 0; } else if (val > analogScale) { val = analogScale; } - pinMode(pin, OUTPUT); - _setPWM(pin, val, analogScale); + if (analogMap & 1UL << pin) { + analogMap &= ~(1 << pin); + } + else { + pinMode(pin, OUTPUT); + } + uint32_t high = (analogPeriod * val) / analogScale; + uint32_t low = analogPeriod - high; + // Find the first GPIO being generated by checking GCC's find-first-set (returns 1 + the bit of the first 1 in an int32_t) + int phaseReference = __builtin_ffs(analogMap) - 1; + if (startWaveformClockCycles(pin, high, low, 0, phaseReference, 0, true)) { + analogMap |= (1 << pin); + } } extern void analogWrite(uint8_t pin, int val) __attribute__((weak, alias("__analogWrite"))); @@ -73,4 +80,4 @@ extern void analogWriteRange(uint32_t range) __attribute__((weak, alias("__analo }; -#endif // ESP8266 \ No newline at end of file +#endif // ESP8266